macvtap: Add support of packet capture on macvtap device.

[~andy/linux] / drivers / net / macvtap.c
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c

index 9dccb1edfd2aba2070023f4ae874bac0cc432293..85ecfccf9c60d62f90bfdadd0d8a9c07b1fec7a7 100644 (file)
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -70,6 +70,11 @@ static const struct proto_ops macvtap_socket_ops;
  #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
  #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG)
  
+static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev)
+{
+       return rcu_dereference(dev->rx_handler_data);
+}
+
  /*
   * RCU usage:
   * The macvtap_queue and the macvlan_dev are loosely coupled, the
@@ -271,24 +276,27 @@ static void macvtap_del_queues(struct net_device *dev)
                 sock_put(&qlist[j]->sk);
  }
  
-/*
- * Forward happens for data that gets sent from one macvlan
- * endpoint to another one in bridge mode. We just take
- * the skb and put it into the receive queue.
- */
-static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
+static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
  {
-       struct macvlan_dev *vlan = netdev_priv(dev);
-       struct macvtap_queue *q = macvtap_get_queue(dev, skb);
+       struct sk_buff *skb = *pskb;
+       struct net_device *dev = skb->dev;
+       struct macvlan_dev *vlan;
+       struct macvtap_queue *q;
         netdev_features_t features = TAP_FEATURES;
  
+       vlan = macvtap_get_vlan_rcu(dev);
+       if (!vlan)
+               return RX_HANDLER_PASS;
+
+       q = macvtap_get_queue(dev, skb);
         if (!q)
-               goto drop;
+               return RX_HANDLER_PASS;
  
         if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len)
                 goto drop;
  
-       skb->dev = dev;
+       skb_push(skb, ETH_HLEN);
+
         /* Apply the forward feature mask so that we perform segmentation
          * according to users wishes.  This only works if VNET_HDR is
          * enabled.
@@ -320,22 +328,13 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
  
  wake_up:
         wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
-       return NET_RX_SUCCESS;
+       return RX_HANDLER_CONSUMED;
  
  drop:
+       /* Count errors/drops only here, thus don't care about args. */
+       macvlan_count_rx(vlan, 0, 0, 0);
         kfree_skb(skb);
-       return NET_RX_DROP;
-}
-
-/*
- * Receive is for data from the external interface (lowerdev),
- * in case of macvtap, we can treat that the same way as
- * forward, which macvlan cannot.
- */
-static int macvtap_receive(struct sk_buff *skb)
-{
-       skb_push(skb, ETH_HLEN);
-       return macvtap_forward(skb->dev, skb);
+       return RX_HANDLER_CONSUMED;
  }
  
  static int macvtap_get_minor(struct macvlan_dev *vlan)
@@ -385,6 +384,8 @@ static int macvtap_newlink(struct net *src_net,
                            struct nlattr *data[])
  {
         struct macvlan_dev *vlan = netdev_priv(dev);
+       int err;
+
         INIT_LIST_HEAD(&vlan->queue_list);
  
         /* Since macvlan supports all offloads by default, make
@@ -392,16 +393,21 @@ static int macvtap_newlink(struct net *src_net,
          */
         vlan->tap_features = TUN_OFFLOADS;
  
+       err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan);
+       if (err)
+               return err;
+
         /* Don't put anything that may fail after macvlan_common_newlink
          * because we can't undo what it does.
          */
         return macvlan_common_newlink(src_net, dev, tb, data,
-                                     macvtap_receive, macvtap_forward);
+                                     netif_rx, dev_forward_skb);
  }
  
  static void macvtap_dellink(struct net_device *dev,
                             struct list_head *head)
  {
+       netdev_rx_handler_unregister(dev);
         macvtap_del_queues(dev);
         macvlan_dellink(dev, head);
  }
@@ -588,7 +594,7 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb,
         return 0;
  }
  
-static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
+static void macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
                                    struct virtio_net_hdr *vnet_hdr)
  {
         memset(vnet_hdr, 0, sizeof(*vnet_hdr));
@@ -619,8 +625,6 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
         } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
                 vnet_hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID;
         } /* else everything is zero */
-
-       return 0;
  }
  
  /* Get packet from user space buffer */
@@ -628,6 +632,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
                                 const struct iovec *iv, unsigned long total_len,
                                 size_t count, int noblock)
  {
+       int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN);
         struct sk_buff *skb;
         struct macvlan_dev *vlan;
         unsigned long len = total_len;
@@ -670,6 +675,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
  
         if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
                 copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN;
+               if (copylen > good_linear)
+                       copylen = good_linear;
                 linear = copylen;
                 if (iov_pages(iv, vnet_hdr_len + copylen, count)
                     <= MAX_SKB_FRAGS)
@@ -678,7 +685,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
  
         if (!zerocopy) {
                 copylen = len;
-               linear = vnet_hdr.hdr_len;
+               if (vnet_hdr.hdr_len > good_linear)
+                       linear = good_linear;
+               else
+                       linear = vnet_hdr.hdr_len;
         }
  
         skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen,
@@ -721,9 +731,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
                 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
         }
         if (vlan) {
-               local_bh_disable();
-               macvlan_start_xmit(skb, vlan->dev);
-               local_bh_enable();
+               skb->dev = vlan->dev;
+               dev_queue_xmit(skb);
         } else {
                 kfree_skb(skb);
         }
@@ -738,7 +747,7 @@ err:
         rcu_read_lock();
         vlan = rcu_dereference(q->vlan);
         if (vlan)
-               vlan->dev->stats.tx_dropped++;
+               this_cpu_inc(vlan->pcpu_stats->tx_dropped);
         rcu_read_unlock();
  
         return err;
@@ -761,7 +770,6 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
                                 const struct sk_buff *skb,
                                 const struct iovec *iv, int len)
  {
-       struct macvlan_dev *vlan;
         int ret;
         int vnet_hdr_len = 0;
         int vlan_offset = 0;
@@ -773,9 +781,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
                 if ((len -= vnet_hdr_len) < 0)
                         return -EINVAL;
  
-               ret = macvtap_skb_to_vnet_hdr(skb, &vnet_hdr);
-               if (ret)
-                       return ret;
+               macvtap_skb_to_vnet_hdr(skb, &vnet_hdr);
  
                 if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr)))
                         return -EFAULT;
@@ -815,19 +821,10 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
         copied += len;
  
  done:
-       rcu_read_lock();
-       vlan = rcu_dereference(q->vlan);
-       if (vlan) {
-               preempt_disable();
-               macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0);
-               preempt_enable();
-       }
-       rcu_read_unlock();
-
         return ret ? ret : copied;
  }
  
-static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
+static ssize_t macvtap_do_read(struct macvtap_queue *q,
                                const struct iovec *iv, unsigned long len,
                                int noblock)
  {
@@ -878,8 +875,10 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
                 goto out;
         }
  
-       ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
+       ret = macvtap_do_read(q, iv, len, file->f_flags & O_NONBLOCK);
         ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
+       if (ret > 0)
+               iocb->ki_pos = ret;
  out:
         return ret;
  }
@@ -1110,7 +1109,7 @@ static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
         int ret;
         if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
                 return -EINVAL;
-       ret = macvtap_do_read(q, iocb, m->msg_iov, total_len,
+       ret = macvtap_do_read(q, m->msg_iov, total_len,
                           flags & MSG_DONTWAIT);
         if (ret > total_len) {
                 m->msg_flags |= MSG_TRUNC;