u32 flags; /* VXLAN_F_* below */
struct work_struct sock_work;
- struct work_struct igmp_work;
+ struct work_struct igmp_join;
+ struct work_struct igmp_leave;
unsigned long age_interval;
struct timer_list age_timer;
/* First remote destination for a forwarding entry.
* Guaranteed to be non-NULL because remotes are never deleted.
*/
-static inline struct vxlan_rdst *first_remote(struct vxlan_fdb *fdb)
+static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
{
- return list_first_or_null_rcu(&fdb->remotes, struct vxlan_rdst, list);
+ return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
+}
+
+static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
+{
+ return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}
/* Find VXLAN socket based on network namespace and UDP port */
-static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
+static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
{
struct vxlan_sock *vs;
struct vxlan_sock *vs;
struct vxlan_dev *vxlan;
- vs = vxlan_find_port(net, port);
+ vs = vxlan_find_sock(net, port);
if (!vs)
return NULL;
if (skb == NULL)
goto errout;
- err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, first_remote(fdb));
+ err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0,
+ first_remote_rtnl(fdb));
if (err < 0) {
/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
return NULL;
}
+/* Replace destination of unicast mac */
+static int vxlan_fdb_replace(struct vxlan_fdb *f,
+ __be32 ip, __be16 port, __u32 vni, __u32 ifindex)
+{
+ struct vxlan_rdst *rd;
+
+ rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
+ if (rd)
+ return 0;
+
+ rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
+ if (!rd)
+ return 0;
+ rd->remote_ip = ip;
+ rd->remote_port = port;
+ rd->remote_vni = vni;
+ rd->remote_ifindex = ifindex;
+ return 1;
+}
+
/* Add/update destinations for multicast */
static int vxlan_fdb_append(struct vxlan_fdb *f,
__be32 ip, __be16 port, __u32 vni, __u32 ifindex)
f->updated = jiffies;
notify = 1;
}
+ if ((flags & NLM_F_REPLACE)) {
+ /* Only change unicasts */
+ if (!(is_multicast_ether_addr(f->eth_addr) ||
+ is_zero_ether_addr(f->eth_addr))) {
+ int rc = vxlan_fdb_replace(f, ip, port, vni,
+ ifindex);
+
+ if (rc < 0)
+ return rc;
+ notify |= rc;
+ } else
+ return -EOPNOTSUPP;
+ }
if ((flags & NLM_F_APPEND) &&
(is_multicast_ether_addr(f->eth_addr) ||
is_zero_ether_addr(f->eth_addr))) {
if (vxlan->addrmax && vxlan->addrcnt >= vxlan->addrmax)
return -ENOSPC;
+ /* Disallow replace to add a multicast entry */
+ if ((flags & NLM_F_REPLACE) &&
+ (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
+ return -EOPNOTSUPP;
+
netdev_dbg(vxlan->dev, "add %pM -> %pI4\n", mac, &ip);
f = kmalloc(sizeof(*f), GFP_ATOMIC);
if (!f)
f = vxlan_find_mac(vxlan, src_mac);
if (likely(f)) {
- struct vxlan_rdst *rdst = first_remote(f);
+ struct vxlan_rdst *rdst = first_remote_rcu(f);
if (likely(rdst->remote_ip == src_ip))
return false;
return false;
}
-
/* See if multicast group is already in use by other ID */
static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip)
{
queue_work(vxlan_wq, &vs->del_work);
}
-/* Callback to update multicast group membership.
- * Scheduled when vxlan goes up/down.
+/* Callback to update multicast group membership when first VNI on
+ * multicast asddress is brought up
+ * Done as workqueue because ip_mc_join_group acquires RTNL.
*/
-static void vxlan_igmp_work(struct work_struct *work)
+static void vxlan_igmp_join(struct work_struct *work)
{
- struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_work);
+ struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
};
lock_sock(sk);
- if (vxlan_group_used(vn, vxlan->default_dst.remote_ip))
- ip_mc_join_group(sk, &mreq);
- else
- ip_mc_leave_group(sk, &mreq);
+ ip_mc_join_group(sk, &mreq);
+ release_sock(sk);
+
+ vxlan_sock_release(vn, vs);
+ dev_put(vxlan->dev);
+}
+
+/* Inverse of vxlan_igmp_join when last VNI is brought down */
+static void vxlan_igmp_leave(struct work_struct *work)
+{
+ struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
+ struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
+ struct vxlan_sock *vs = vxlan->vn_sock;
+ struct sock *sk = vs->sock->sk;
+ struct ip_mreqn mreq = {
+ .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip,
+ .imr_ifindex = vxlan->default_dst.remote_ifindex,
+ };
+
+ lock_sock(sk);
+ ip_mc_leave_group(sk, &mreq);
release_sock(sk);
vxlan_sock_release(vn, vs);
}
f = vxlan_find_mac(vxlan, n->ha);
- if (f && first_remote(f)->remote_ip == htonl(INADDR_ANY)) {
+ if (f && first_remote_rcu(f)->remote_ip == htonl(INADDR_ANY)) {
/* bridge-local neighbor */
neigh_release(n);
goto out;
mod_timer(&vxlan->age_timer, next_timer);
}
+static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
+{
+ __u32 vni = vxlan->default_dst.remote_vni;
+
+ vxlan->vn_sock = vs;
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+}
+
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_sock *vs;
- __u32 vni = vxlan->default_dst.remote_vni;
dev->tstats = alloc_percpu(struct pcpu_tstats);
if (!dev->tstats)
return -ENOMEM;
spin_lock(&vn->sock_lock);
- vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
+ vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
if (vs) {
/* If we have a socket with same port already, reuse it */
atomic_inc(&vs->refcnt);
- vxlan->vn_sock = vs;
- hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
+ vxlan_vs_add_dev(vs, vxlan);
} else {
/* otherwise make new socket outside of RTNL */
dev_hold(dev);
/* Start ageing timer and join group when device is brought up */
static int vxlan_open(struct net_device *dev)
{
+ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_sock *vs = vxlan->vn_sock;
if (!vs)
return -ENOTCONN;
- if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
+ if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) &&
+ vxlan_group_used(vn, vxlan->default_dst.remote_ip)) {
vxlan_sock_hold(vs);
dev_hold(dev);
- queue_work(vxlan_wq, &vxlan->igmp_work);
+ queue_work(vxlan_wq, &vxlan->igmp_join);
}
if (vxlan->age_interval)
/* Cleanup timer and forwarding table on shutdown */
static int vxlan_stop(struct net_device *dev)
{
+ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_sock *vs = vxlan->vn_sock;
- if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
+ if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) &&
+ ! vxlan_group_used(vn, vxlan->default_dst.remote_ip)) {
vxlan_sock_hold(vs);
dev_hold(dev);
- queue_work(vxlan_wq, &vxlan->igmp_work);
+ queue_work(vxlan_wq, &vxlan->igmp_leave);
}
del_timer_sync(&vxlan->age_timer);
INIT_LIST_HEAD(&vxlan->next);
spin_lock_init(&vxlan->hash_lock);
- INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
+ INIT_WORK(&vxlan->igmp_join, vxlan_igmp_join);
+ INIT_WORK(&vxlan->igmp_leave, vxlan_igmp_leave);
INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
init_timer_deferrable(&vxlan->age_timer);
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct sock *sk;
struct sockaddr_in vxlan_addr = {
unsigned int h;
vs = kmalloc(sizeof(*vs), GFP_KERNEL);
- if (!vs)
+ if (!vs) {
+ pr_debug("memory alocation failure\n");
return ERR_PTR(-ENOMEM);
+ }
for (h = 0; h < VNI_HASH_SIZE; ++h)
INIT_HLIST_HEAD(&vs->vni_list[h]);
kfree(vs);
return ERR_PTR(rc);
}
+ atomic_set(&vs->refcnt, 1);
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
+ spin_lock(&vn->sock_lock);
+ hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
+ spin_unlock(&vn->sock_lock);
/* Mark socket as an encapsulation socket. */
udp_sk(sk)->encap_type = 1;
udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
udp_encap_enable();
- atomic_set(&vs->refcnt, 1);
+ return vs;
+}
+
+static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_sock *vs;
+
+ vs = vxlan_socket_create(net, port);
+ if (!IS_ERR(vs))
+ return vs;
+ spin_lock(&vn->sock_lock);
+ vs = vxlan_find_sock(net, port);
+ if (vs)
+ atomic_inc(&vs->refcnt);
+ else
+ vs = ERR_PTR(-EINVAL);
+
+ spin_unlock(&vn->sock_lock);
return vs;
}
/* Scheduled at device creation to bind to a socket */
static void vxlan_sock_work(struct work_struct *work)
{
- struct vxlan_dev *vxlan
- = container_of(work, struct vxlan_dev, sock_work);
- struct net_device *dev = vxlan->dev;
- struct net *net = dev_net(dev);
- __u32 vni = vxlan->default_dst.remote_vni;
- __be16 port = vxlan->dst_port;
+ struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
+ struct net *net = dev_net(vxlan->dev);
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- struct vxlan_sock *nvs, *ovs;
-
- nvs = vxlan_socket_create(net, port);
- if (IS_ERR(nvs)) {
- netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
- PTR_ERR(nvs));
- goto out;
- }
+ __be16 port = vxlan->dst_port;
+ struct vxlan_sock *nvs;
+ nvs = vxlan_sock_add(net, port);
spin_lock(&vn->sock_lock);
- /* Look again to see if can reuse socket */
- ovs = vxlan_find_port(net, port);
- if (ovs) {
- atomic_inc(&ovs->refcnt);
- vxlan->vn_sock = ovs;
- hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
- spin_unlock(&vn->sock_lock);
-
- sk_release_kernel(nvs->sock->sk);
- kfree(nvs);
- } else {
- vxlan->vn_sock = nvs;
- hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
- hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
- spin_unlock(&vn->sock_lock);
- }
-out:
- dev_put(dev);
+ if (!IS_ERR(nvs))
+ vxlan_vs_add_dev(nvs, vxlan);
+ spin_unlock(&vn->sock_lock);
+
+ dev_put(vxlan->dev);
}
static int vxlan_newlink(struct net *net, struct net_device *dev,
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
- flush_workqueue(vxlan_wq);
-
spin_lock(&vn->sock_lock);
- hlist_del_rcu(&vxlan->hlist);
+ if (!hlist_unhashed(&vxlan->hlist))
+ hlist_del_rcu(&vxlan->hlist);
spin_unlock(&vn->sock_lock);
list_del(&vxlan->next);
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan;
+ LIST_HEAD(list);
rtnl_lock();
list_for_each_entry(vxlan, &vn->vxlan_list, next)
- dev_close(vxlan->dev);
+ unregister_netdevice_queue(vxlan->dev, &list);
+ unregister_netdevice_many(&list);
rtnl_unlock();
}