]> Pileus Git - ~andy/linux/blobdiff - drivers/net/tun.c
tun: fix carrier on/off status
[~andy/linux] / drivers / net / tun.c
index fbd106edbe59bdcae871e29d6fd70c993634cdea..ffdb84474c47a52ffa393966b5430fcf35fc7137 100644 (file)
@@ -109,11 +109,11 @@ struct tap_filter {
        unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
 };
 
-/* 1024 is probably a high enough limit: modern hypervisors seem to support on
- * the order of 100-200 CPUs so this leaves us some breathing space if we want
- * to match a queue per guest CPU.
- */
-#define MAX_TAP_QUEUES 1024
+/* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for
+ * the netdevice to be fit in one page. So we can make sure the success of
+ * memory allocation. TODO: increase the limit. */
+#define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES
+#define MAX_TAP_FLOWS  4096
 
 #define TUN_FLOW_EXPIRE (3 * HZ)
 
@@ -185,6 +185,8 @@ struct tun_struct {
        unsigned long ageing_time;
        unsigned int numdisabled;
        struct list_head disabled;
+       void *security;
+       u32 flow_count;
 };
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -218,6 +220,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
                e->queue_index = queue_index;
                e->tun = tun;
                hlist_add_head_rcu(&e->hash_link, head);
+               ++tun->flow_count;
        }
        return e;
 }
@@ -228,6 +231,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
                  e->rxhash, e->queue_index);
        hlist_del_rcu(&e->hash_link);
        kfree_rcu(e, rcu);
+       --tun->flow_count;
 }
 
 static void tun_flow_flush(struct tun_struct *tun)
@@ -317,7 +321,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
                e->updated = jiffies;
        } else {
                spin_lock_bh(&tun->lock);
-               if (!tun_flow_find(head, rxhash))
+               if (!tun_flow_find(head, rxhash) &&
+                   tun->flow_count < MAX_TAP_FLOWS)
                        tun_flow_create(tun, head, rxhash, queue_index);
 
                if (!timer_pending(&tun->flow_gc_timer))
@@ -404,8 +409,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
        struct tun_struct *tun;
        struct net_device *dev;
 
-       tun = rcu_dereference_protected(tfile->tun,
-                                       lockdep_rtnl_is_held());
+       tun = rtnl_dereference(tfile->tun);
+
        if (tun) {
                u16 index = tfile->queue_index;
                BUG_ON(index >= tun->numqueues);
@@ -414,8 +419,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
                rcu_assign_pointer(tun->tfiles[index],
                                   tun->tfiles[tun->numqueues - 1]);
                rcu_assign_pointer(tfile->tun, NULL);
-               ntfile = rcu_dereference_protected(tun->tfiles[index],
-                                                  lockdep_rtnl_is_held());
+               ntfile = rtnl_dereference(tun->tfiles[index]);
                ntfile->queue_index = index;
 
                --tun->numqueues;
@@ -429,14 +433,19 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
                /* Drop read queue */
                skb_queue_purge(&tfile->sk.sk_receive_queue);
                tun_set_real_num_queues(tun);
-       } else if (tfile->detached && clean)
+       } else if (tfile->detached && clean) {
                tun = tun_enable_queue(tfile);
+               sock_put(&tfile->sk);
+       }
 
        if (clean) {
-               if (tun && tun->numqueues == 0 && tun->numdisabled == 0 &&
-                   !(tun->flags & TUN_PERSIST))
-                       if (tun->dev->reg_state == NETREG_REGISTERED)
+               if (tun && tun->numqueues == 0 && tun->numdisabled == 0) {
+                       netif_carrier_off(tun->dev);
+
+                       if (!(tun->flags & TUN_PERSIST) &&
+                           tun->dev->reg_state == NETREG_REGISTERED)
                                unregister_netdevice(tun->dev);
+               }
 
                BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED,
                                 &tfile->socket.flags));
@@ -458,8 +467,7 @@ static void tun_detach_all(struct net_device *dev)
        int i, n = tun->numqueues;
 
        for (i = 0; i < n; i++) {
-               tfile = rcu_dereference_protected(tun->tfiles[i],
-                                                 lockdep_rtnl_is_held());
+               tfile = rtnl_dereference(tun->tfiles[i]);
                BUG_ON(!tfile);
                wake_up_all(&tfile->wq.wait);
                rcu_assign_pointer(tfile->tun, NULL);
@@ -469,8 +477,7 @@ static void tun_detach_all(struct net_device *dev)
 
        synchronize_net();
        for (i = 0; i < n; i++) {
-               tfile = rcu_dereference_protected(tun->tfiles[i],
-                                                 lockdep_rtnl_is_held());
+               tfile = rtnl_dereference(tun->tfiles[i]);
                /* Drop read queue */
                skb_queue_purge(&tfile->sk.sk_receive_queue);
                sock_put(&tfile->sk);
@@ -481,6 +488,9 @@ static void tun_detach_all(struct net_device *dev)
                sock_put(&tfile->sk);
        }
        BUG_ON(tun->numdisabled != 0);
+
+       if (tun->flags & TUN_PERSIST)
+               module_put(THIS_MODULE);
 }
 
 static int tun_attach(struct tun_struct *tun, struct file *file)
@@ -488,8 +498,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
        struct tun_file *tfile = file->private_data;
        int err;
 
+       err = security_tun_dev_attach(tfile->socket.sk, tun->security);
+       if (err < 0)
+               goto out;
+
        err = -EINVAL;
-       if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held()))
+       if (rtnl_dereference(tfile->tun))
                goto out;
 
        err = -EBUSY;
@@ -1371,6 +1385,7 @@ static void tun_free_netdev(struct net_device *dev)
 
        BUG_ON(!(list_empty(&tun->disabled)));
        tun_flow_uninit(tun);
+       security_tun_dev_free_security(tun->security);
        free_netdev(dev);
 }
 
@@ -1544,6 +1559,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
        struct net_device *dev;
        int err;
 
+       if (tfile->detached)
+               return -EINVAL;
+
        dev = __dev_get_by_name(net, ifr->ifr_name);
        if (dev) {
                if (ifr->ifr_flags & IFF_TUN_EXCL)
@@ -1557,7 +1575,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
                if (tun_not_capable(tun))
                        return -EPERM;
-               err = security_tun_dev_attach(tfile->socket.sk);
+               err = security_tun_dev_open(tun->security);
                if (err < 0)
                        return err;
 
@@ -1572,6 +1590,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
        else {
                char *name;
                unsigned long flags = 0;
+               int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
+                            MAX_TAP_QUEUES : 1;
 
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                        return -EPERM;
@@ -1595,8 +1615,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                        name = ifr->ifr_name;
 
                dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
-                                      tun_setup,
-                                      MAX_TAP_QUEUES, MAX_TAP_QUEUES);
+                                      tun_setup, queues, queues);
+
                if (!dev)
                        return -ENOMEM;
 
@@ -1614,7 +1634,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
                spin_lock_init(&tun->lock);
 
-               security_tun_dev_post_create(&tfile->sk);
+               err = security_tun_dev_alloc_security(&tun->security);
+               if (err < 0)
+                       goto err_free_dev;
 
                tun_net_init(dev);
 
@@ -1639,10 +1661,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
                    device_create_file(&tun->dev->dev, &dev_attr_owner) ||
                    device_create_file(&tun->dev->dev, &dev_attr_group))
                        pr_err("Failed to create tun sysfs files\n");
-
-               netif_carrier_on(tun->dev);
        }
 
+       netif_carrier_on(tun->dev);
+
        tun_debug(KERN_INFO, tun, "tun_set_iff\n");
 
        if (ifr->ifr_flags & IFF_NO_PI)
@@ -1738,8 +1760,7 @@ static void tun_detach_filter(struct tun_struct *tun, int n)
        struct tun_file *tfile;
 
        for (i = 0; i < n; i++) {
-               tfile = rcu_dereference_protected(tun->tfiles[i],
-                                                 lockdep_rtnl_is_held());
+               tfile = rtnl_dereference(tun->tfiles[i]);
                sk_detach_filter(tfile->socket.sk);
        }
 
@@ -1752,8 +1773,7 @@ static int tun_attach_filter(struct tun_struct *tun)
        struct tun_file *tfile;
 
        for (i = 0; i < tun->numqueues; i++) {
-               tfile = rcu_dereference_protected(tun->tfiles[i],
-                                                 lockdep_rtnl_is_held());
+               tfile = rtnl_dereference(tun->tfiles[i]);
                ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
                if (ret) {
                        tun_detach_filter(tun, i);
@@ -1771,8 +1791,7 @@ static void tun_set_sndbuf(struct tun_struct *tun)
        int i;
 
        for (i = 0; i < tun->numqueues; i++) {
-               tfile = rcu_dereference_protected(tun->tfiles[i],
-                                               lockdep_rtnl_is_held());
+               tfile = rtnl_dereference(tun->tfiles[i]);
                tfile->socket.sk->sk_sndbuf = tun->sndbuf;
        }
 }
@@ -1787,15 +1806,16 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 
        if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
                tun = tfile->detached;
-               if (!tun)
+               if (!tun) {
                        ret = -EINVAL;
-               else if (tun_not_capable(tun))
-                       ret = -EPERM;
-               else
-                       ret = tun_attach(tun, file);
+                       goto unlock;
+               }
+               ret = security_tun_dev_attach_queue(tun->security);
+               if (ret < 0)
+                       goto unlock;
+               ret = tun_attach(tun, file);
        } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
-               tun = rcu_dereference_protected(tfile->tun,
-                                               lockdep_rtnl_is_held());
+               tun = rtnl_dereference(tfile->tun);
                if (!tun || !(tun->flags & TUN_TAP_MQ))
                        ret = -EINVAL;
                else
@@ -1803,6 +1823,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
        } else
                ret = -EINVAL;
 
+unlock:
        rtnl_unlock();
        return ret;
 }
@@ -1880,10 +1901,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
                /* Disable/Enable persist mode. Keep an extra reference to the
                 * module to prevent the module being unprobed.
                 */
-               if (arg) {
+               if (arg && !(tun->flags & TUN_PERSIST)) {
                        tun->flags |= TUN_PERSIST;
                        __module_get(THIS_MODULE);
-               } else {
+               }
+               if (!arg && (tun->flags & TUN_PERSIST)) {
                        tun->flags &= ~TUN_PERSIST;
                        module_put(THIS_MODULE);
                }