#define VXLAN_N_VID (1u << 24)
#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
-/* IPv6 header + UDP + VXLAN + Ethernet header */
-#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
}
/* See if multicast group is already in use by other ID */
-static bool vxlan_group_used(struct vxlan_net *vn, union vxlan_addr *remote_ip)
+static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
{
struct vxlan_dev *vxlan;
+ /* The vxlan_sock is only used by dev, leaving group has
+ * no effect on other vxlan devices.
+ */
+ if (atomic_read(&dev->vn_sock->refcnt) == 1)
+ return false;
+
list_for_each_entry(vxlan, &vn->vxlan_list, next) {
- if (!netif_running(vxlan->dev))
+ if (!netif_running(vxlan->dev) || vxlan == dev)
continue;
- if (vxlan_addr_equal(&vxlan->default_dst.remote_ip,
- remote_ip))
- return true;
+ if (vxlan->vn_sock != dev->vn_sock)
+ continue;
+
+ if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
+ &dev->default_dst.remote_ip))
+ continue;
+
+ if (vxlan->default_dst.remote_ifindex !=
+ dev->default_dst.remote_ifindex)
+ continue;
+
+ return true;
}
return false;
struct iphdr *oip = NULL;
struct ipv6hdr *oip6 = NULL;
struct vxlan_dev *vxlan;
- struct pcpu_tstats *stats;
+ struct pcpu_sw_netstats *stats;
union vxlan_addr saddr;
__u32 vni;
int err = 0;
return false;
}
-static void vxlan_sock_put(struct sk_buff *skb)
-{
- sock_put(skb->sk);
-}
-
-/* On transmit, associate with the tunnel socket */
-static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb)
-{
- skb_orphan(skb);
- sock_hold(sk);
- skb->sk = sk;
- skb->destructor = vxlan_sock_put;
-}
-
/* Compute source port for outgoing packet
* first choice to use L4 flow hash since it will spread
* better and maybe available from hardware
unsigned int range = (port_max - port_min) + 1;
u32 hash;
- hash = skb_get_rxhash(skb);
+ hash = skb_get_hash(skb);
if (!hash)
hash = jhash(skb->data, 2 * ETH_ALEN,
(__force u32) skb->protocol);
ip6h->daddr = *daddr;
ip6h->saddr = *saddr;
- vxlan_set_owner(vs->sock->sk, skb);
-
err = handle_offloads(skb);
if (err)
return err;
uh->len = htons(skb->len);
uh->check = 0;
- vxlan_set_owner(vs->sock->sk, skb);
-
err = handle_offloads(skb);
if (err)
return err;
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct vxlan_dev *dst_vxlan)
{
- struct pcpu_tstats *tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
- struct pcpu_tstats *rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
+ struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
+ tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
+ rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
skb->pkt_type = PACKET_HOST;
skb->encapsulation = 0;
skb->dev = dst_vxlan->dev;
netdev_dbg(dev, "circular route to %pI4\n",
&dst->sin.sin_addr.s_addr);
dev->stats.collisions++;
- goto tx_error;
+ goto rt_tx_error;
}
/* Bypass encapsulation if the destination is local */
struct vxlan_dev *vxlan = netdev_priv(dev);
struct ethhdr *eth;
bool did_rsc = false;
- struct vxlan_rdst *rdst;
+ struct vxlan_rdst *rdst, *fdst = NULL;
struct vxlan_fdb *f;
skb_reset_mac_header(skb);
vxlan_fdb_miss(vxlan, eth->h_dest);
dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
}
list_for_each_entry_rcu(rdst, &f->remotes, list) {
struct sk_buff *skb1;
+ if (!fdst) {
+ fdst = rdst;
+ continue;
+ }
skb1 = skb_clone(skb, GFP_ATOMIC);
if (skb1)
vxlan_xmit_one(skb1, dev, rdst, did_rsc);
}
- dev_kfree_skb(skb);
+ if (fdst)
+ vxlan_xmit_one(skb, dev, fdst, did_rsc);
+ else
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_sock *vs;
+ int i;
- dev->tstats = alloc_percpu(struct pcpu_tstats);
+ dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ for_each_possible_cpu(i) {
+ struct pcpu_sw_netstats *vxlan_stats;
+ vxlan_stats = per_cpu_ptr(dev->tstats, i);
+ u64_stats_init(&vxlan_stats->syncp);
+ }
+
+
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
if (vs) {
/* Start ageing timer and join group when device is brought up */
static int vxlan_open(struct net_device *dev)
{
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_sock *vs = vxlan->vn_sock;
if (!vs)
return -ENOTCONN;
- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
- vxlan_group_used(vn, &vxlan->default_dst.remote_ip)) {
+ if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
vxlan_sock_hold(vs);
dev_hold(dev);
queue_work(vxlan_wq, &vxlan->igmp_join);
struct vxlan_sock *vs = vxlan->vn_sock;
if (vs && vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
- ! vxlan_group_used(vn, &vxlan->default_dst.remote_ip)) {
+ !vxlan_group_used(vn, vxlan)) {
vxlan_sock_hold(vs);
dev_hold(dev);
queue_work(vxlan_wq, &vxlan->igmp_leave);
{
}
+static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ struct net_device *lowerdev;
+ int max_mtu;
+
+ lowerdev = __dev_get_by_index(dev_net(dev), dst->remote_ifindex);
+ if (lowerdev == NULL)
+ return eth_change_mtu(dev, new_mtu);
+
+ if (dst->remote_ip.sa.sa_family == AF_INET6)
+ max_mtu = lowerdev->mtu - VXLAN6_HEADROOM;
+ else
+ max_mtu = lowerdev->mtu - VXLAN_HEADROOM;
+
+ if (new_mtu < 68 || new_mtu > max_mtu)
+ return -EINVAL;
+
+ dev->mtu = new_mtu;
+ return 0;
+}
+
static const struct net_device_ops vxlan_netdev_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
.ndo_start_xmit = vxlan_xmit,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_set_rx_mode = vxlan_set_multicast_list,
- .ndo_change_mtu = eth_change_mtu,
+ .ndo_change_mtu = vxlan_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
.ndo_fdb_add = vxlan_fdb_add,
vxlan->age_timer.function = vxlan_cleanup;
vxlan->age_timer.data = (unsigned long) vxlan;
- inet_get_local_port_range(&low, &high);
+ inet_get_local_port_range(dev_net(dev), &low, &high);
vxlan->port_min = low;
vxlan->port_max = high;
vxlan->dst_port = htons(vxlan_port);
* could be used for both IPv4 and IPv6 communications, but
* users may set bindv6only=1.
*/
-static int create_v6_sock(struct net *net, __be16 port, struct socket **psock)
+static struct socket *create_v6_sock(struct net *net, __be16 port)
{
struct sock *sk;
struct socket *sock;
rc = sock_create_kern(AF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (rc < 0) {
pr_debug("UDPv6 socket create failed\n");
- return rc;
+ return ERR_PTR(rc);
}
/* Put in proper namespace */
pr_debug("bind for UDPv6 socket %pI6:%u (%d)\n",
&vxlan_addr.sin6_addr, ntohs(vxlan_addr.sin6_port), rc);
sk_release_kernel(sk);
- return rc;
+ return ERR_PTR(rc);
}
/* At this point, IPv6 module should have been loaded in
* sock_create_kern().
*/
BUG_ON(!ipv6_stub);
- *psock = sock;
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
- return 0;
+ return sock;
}
#else
-static int create_v6_sock(struct net *net, __be16 port, struct socket **psock)
+static struct socket *create_v6_sock(struct net *net, __be16 port)
{
- return -EPFNOSUPPORT;
+ return ERR_PTR(-EPFNOSUPPORT);
}
#endif
-static int create_v4_sock(struct net *net, __be16 port, struct socket **psock)
+static struct socket *create_v4_sock(struct net *net, __be16 port)
{
struct sock *sk;
struct socket *sock;
rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (rc < 0) {
pr_debug("UDP socket create failed\n");
- return rc;
+ return ERR_PTR(rc);
}
/* Put in proper namespace */
pr_debug("bind for UDP socket %pI4:%u (%d)\n",
&vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
sk_release_kernel(sk);
- return rc;
+ return ERR_PTR(rc);
}
- *psock = sock;
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
- return 0;
+ return sock;
}
/* Create new listen socket if needed */
struct vxlan_sock *vs;
struct socket *sock;
struct sock *sk;
- int rc = 0;
unsigned int h;
vs = kmalloc(sizeof(*vs), GFP_KERNEL);
INIT_WORK(&vs->del_work, vxlan_del_work);
if (ipv6)
- rc = create_v6_sock(net, port, &sock);
+ sock = create_v6_sock(net, port);
else
- rc = create_v4_sock(net, port, &sock);
- if (rc < 0) {
+ sock = create_v4_sock(net, port);
+ if (IS_ERR(sock)) {
kfree(vs);
- return ERR_PTR(rc);
+ return ERR_CAST(sock);
}
vs->sock = sock;
/* update header length based on lower device */
dev->hard_header_len = lowerdev->hard_header_len +
(use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
- }
+ } else if (use_ipv6)
+ vxlan->flags |= VXLAN_F_IPV6;
if (data[IFLA_VXLAN_TOS])
vxlan->tos = nla_get_u8(data[IFLA_VXLAN_TOS]);