#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/vxlan.h>
#define VXLAN_VERSION "0.1"
static const u8 all_zeros_mac[ETH_ALEN];
-/* per UDP socket information */
-struct vxlan_sock {
- struct hlist_node hlist;
- struct rcu_head rcu;
- struct work_struct del_work;
- atomic_t refcnt;
- struct socket *sock;
- struct hlist_head vni_list[VNI_HASH_SIZE];
-};
-
/* per-network namespace private data for this module */
struct vxlan_net {
struct list_head vxlan_list;
return NULL;
}
-/* Look up VNI in a per net namespace table */
-static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
+static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
{
- struct vxlan_sock *vs;
struct vxlan_dev *vxlan;
- vs = vxlan_find_sock(net, port);
- if (!vs)
- return NULL;
-
hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
if (vxlan->default_dst.remote_vni == id)
return vxlan;
return NULL;
}
+/* Look up VNI in a per net namespace table */
+static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
+{
+ struct vxlan_sock *vs;
+
+ vs = vxlan_find_sock(net, port);
+ if (!vs)
+ return NULL;
+
+ return vxlan_vs_find_vni(vs, id);
+}
+
/* Fill in neighbour message in skbuff. */
static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
const struct vxlan_fdb *fdb,
atomic_inc(&vs->refcnt);
}
-static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
+void vxlan_sock_release(struct vxlan_sock *vs)
{
+ struct vxlan_net *vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
+
if (!atomic_dec_and_test(&vs->refcnt))
return;
queue_work(vxlan_wq, &vs->del_work);
}
+EXPORT_SYMBOL_GPL(vxlan_sock_release);
/* Callback to update multicast group membership when first VNI on
* multicast asddress is brought up
static void vxlan_igmp_join(struct work_struct *work)
{
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
- struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
struct ip_mreqn mreq = {
ip_mc_join_group(sk, &mreq);
release_sock(sk);
- vxlan_sock_release(vn, vs);
+ vxlan_sock_release(vs);
dev_put(vxlan->dev);
}
static void vxlan_igmp_leave(struct work_struct *work)
{
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
- struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
struct ip_mreqn mreq = {
ip_mc_leave_group(sk, &mreq);
release_sock(sk);
- vxlan_sock_release(vn, vs);
+ vxlan_sock_release(vs);
dev_put(vxlan->dev);
}
/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
- struct iphdr *oip;
+ struct vxlan_sock *vs;
struct vxlanhdr *vxh;
- struct vxlan_dev *vxlan;
- struct pcpu_tstats *stats;
__be16 port;
- __u32 vni;
- int err;
/* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
goto error;
}
- /* Is this VNI defined? */
- vni = ntohl(vxh->vx_vni) >> 8;
+ if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
+ goto drop;
+
port = inet_sk(sk)->inet_sport;
- vxlan = vxlan_find_vni(sock_net(sk), vni, port);
- if (!vxlan) {
- netdev_dbg(skb->dev, "unknown vni %d port %u\n",
- vni, ntohs(port));
+
+ vs = vxlan_find_sock(sock_net(sk), port);
+ if (!vs)
goto drop;
- }
- if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) {
- vxlan->dev->stats.rx_length_errors++;
- vxlan->dev->stats.rx_errors++;
+ vs->rcv(vs, skb, vxh->vx_vni);
+ return 0;
+
+drop:
+ /* Consume bad packet */
+ kfree_skb(skb);
+ return 0;
+
+error:
+ /* Return non vxlan pkt */
+ return 1;
+}
+
+static void vxlan_rcv(struct vxlan_sock *vs,
+ struct sk_buff *skb, __be32 vx_vni)
+{
+ struct iphdr *oip;
+ struct vxlan_dev *vxlan;
+ struct pcpu_tstats *stats;
+ __u32 vni;
+ int err;
+
+ vni = ntohl(vx_vni) >> 8;
+ /* Is this VNI defined? */
+ vxlan = vxlan_vs_find_vni(vs, vni);
+ if (!vxlan)
goto drop;
- }
skb_reset_mac_header(skb);
-
skb->protocol = eth_type_trans(skb, vxlan->dev);
/* Ignore packet loops (and multicast echo) */
netif_rx(skb);
- return 0;
-error:
- return 1;
+ return;
drop:
/* Consume bad packet */
kfree_skb(skb);
- return 0;
}
static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
}
/* On transmit, associate with the tunnel socket */
-static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
+static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb)
{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct sock *sk = vxlan->vn_sock->sock->sk;
-
skb_orphan(skb);
sock_hold(sk);
skb->sk = sk;
* better and maybe available from hardware
* secondary choice is to use jhash on the Ethernet header
*/
-static __be16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb)
+__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
{
- unsigned int range = (vxlan->port_max - vxlan->port_min) + 1;
+ unsigned int range = (port_max - port_min) + 1;
u32 hash;
hash = skb_get_rxhash(skb);
hash = jhash(skb->data, 2 * ETH_ALEN,
(__force u32) skb->protocol);
- return htons((((u64) hash * range) >> 32) + vxlan->port_min);
+ return htons((((u64) hash * range) >> 32) + port_min);
}
+EXPORT_SYMBOL_GPL(vxlan_src_port);
static int handle_offloads(struct sk_buff *skb)
{
return 0;
}
+int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs,
+ struct rtable *rt, struct sk_buff *skb,
+ __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
+ __be16 src_port, __be16 dst_port, __be32 vni)
+{
+ struct vxlanhdr *vxh;
+ struct udphdr *uh;
+ int err;
+
+ if (!skb->encapsulation) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
+ vxh->vx_flags = htonl(VXLAN_FLAGS);
+ vxh->vx_vni = vni;
+
+ __skb_push(skb, sizeof(*uh));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+
+ uh->dest = dst_port;
+ uh->source = src_port;
+
+ uh->len = htons(skb->len);
+ uh->check = 0;
+
+ vxlan_set_owner(vs->sock->sk, skb);
+
+ err = handle_offloads(skb);
+ if (err)
+ return err;
+
+ return iptunnel_xmit(net, rt, skb, src, dst,
+ IPPROTO_UDP, tos, ttl, df);
+}
+EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
+
/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct vxlan_dev *dst_vxlan)
struct vxlan_dev *vxlan = netdev_priv(dev);
struct rtable *rt;
const struct iphdr *old_iph;
- struct vxlanhdr *vxh;
- struct udphdr *uh;
struct flowi4 fl4;
__be32 dst;
__be16 src_port, dst_port;
goto drop;
}
- if (!skb->encapsulation) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
-
/* Need space for new headers (invalidates iph ptr) */
if (skb_cow_head(skb, VXLAN_HEADROOM))
goto drop;
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
- src_port = vxlan_src_port(vxlan, skb);
+ src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb);
memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = rdst->remote_ifindex;
if (rt->dst.dev == dev) {
netdev_dbg(dev, "circular route to %pI4\n", &dst);
- ip_rt_put(rt);
dev->stats.collisions++;
- goto tx_error;
+ goto rt_tx_error;
}
/* Bypass encapsulation if the destination is local */
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
return;
}
- vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- vxh->vx_flags = htonl(VXLAN_FLAGS);
- vxh->vx_vni = htonl(vni << 8);
-
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
- uh->check = 0;
-
- vxlan_set_owner(dev, skb);
-
- if (handle_offloads(skb))
- goto drop;
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, dst,
- IPPROTO_UDP, tos, ttl, df);
+ err = vxlan_xmit_skb(dev_net(dev), vxlan->vn_sock, rt, skb,
+ fl4.saddr, dst, tos, ttl, df,
+ src_port, dst_port, htonl(vni << 8));
+
+ if (err < 0)
+ goto rt_tx_error;
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return;
dev->stats.tx_dropped++;
goto tx_free;
+rt_tx_error:
+ ip_rt_put(rt);
tx_error:
dev->stats.tx_errors++;
tx_free:
static void vxlan_uninit(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
struct vxlan_sock *vs = vxlan->vn_sock;
vxlan_fdb_delete_default(vxlan);
if (vs)
- vxlan_sock_release(vn, vs);
+ vxlan_sock_release(vs);
free_percpu(dev->tstats);
}
kfree_rcu(vs, rcu);
}
-static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
+static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
+ vxlan_rcv_t *rcv, void *data)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
return ERR_PTR(rc);
}
atomic_set(&vs->refcnt, 1);
+ vs->rcv = rcv;
+ vs->data = data;
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
return vs;
}
-static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port)
+struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
+ vxlan_rcv_t *rcv, void *data,
+ bool no_share)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
- vs = vxlan_socket_create(net, port);
+ vs = vxlan_socket_create(net, port, rcv, data);
if (!IS_ERR(vs))
return vs;
+ if (no_share) /* Return error if sharing is not allowed. */
+ return vs;
+
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(net, port);
- if (vs)
- atomic_inc(&vs->refcnt);
- else
+ if (vs) {
+ if (vs->rcv == rcv)
+ atomic_inc(&vs->refcnt);
+ else
+ vs = ERR_PTR(-EBUSY);
+ }
+ spin_unlock(&vn->sock_lock);
+
+ if (!vs)
vs = ERR_PTR(-EINVAL);
- spin_unlock(&vn->sock_lock);
return vs;
}
+EXPORT_SYMBOL_GPL(vxlan_sock_add);
/* Scheduled at device creation to bind to a socket */
static void vxlan_sock_work(struct work_struct *work)
__be16 port = vxlan->dst_port;
struct vxlan_sock *nvs;
- nvs = vxlan_sock_add(net, port);
+ nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false);
spin_lock(&vn->sock_lock);
if (!IS_ERR(nvs))
vxlan_vs_add_dev(nvs, vxlan);