]> Pileus Git - ~andy/linux/blobdiff - net/core/dev.c
dev: introduce skb_scrub_packet()
[~andy/linux] / net / core / dev.c
index 13e6447f03987b49b32695bec2f4e50684e78115..370354a9c5f6926e977ce4374541bc4b6e2e5dac 100644 (file)
 #include <linux/inetdevice.h>
 #include <linux/cpu_rmap.h>
 #include <linux/static_key.h>
+#include <linux/hashtable.h>
+#include <linux/vmalloc.h>
 
 #include "net-sysfs.h"
 
@@ -166,6 +168,12 @@ static struct list_head offload_base __read_mostly;
 DEFINE_RWLOCK(dev_base_lock);
 EXPORT_SYMBOL(dev_base_lock);
 
+/* protects napi_hash addition/deletion and napi_gen_id */
+static DEFINE_SPINLOCK(napi_hash_lock);
+
+static unsigned int napi_gen_id;
+static DEFINE_HASHTABLE(napi_hash, 8);
+
 seqcount_t devnet_rename_seq;
 
 static inline void dev_base_seq_inc(struct net *net)
@@ -200,7 +208,7 @@ static inline void rps_unlock(struct softnet_data *sd)
 }
 
 /* Device list insertion */
-static int list_netdevice(struct net_device *dev)
+static void list_netdevice(struct net_device *dev)
 {
        struct net *net = dev_net(dev);
 
@@ -214,8 +222,6 @@ static int list_netdevice(struct net_device *dev)
        write_unlock_bh(&dev_base_lock);
 
        dev_base_seq_inc(net);
-
-       return 0;
 }
 
 /* Device list removal
@@ -1200,9 +1206,7 @@ static int __dev_open(struct net_device *dev)
         * If we don't do this there is a chance ndo_poll_controller
         * or ndo_poll may be running while we open the device
         */
-       ret = netpoll_rx_disable(dev);
-       if (ret)
-               return ret;
+       netpoll_rx_disable(dev);
 
        ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
        ret = notifier_to_errno(ret);
@@ -1311,9 +1315,7 @@ static int __dev_close(struct net_device *dev)
        LIST_HEAD(single);
 
        /* Temporarily disable netpoll until the interface is down */
-       retval = netpoll_rx_disable(dev);
-       if (retval)
-               return retval;
+       netpoll_rx_disable(dev);
 
        list_add(&dev->unreg_list, &single);
        retval = __dev_close_many(&single);
@@ -1355,14 +1357,11 @@ static int dev_close_many(struct list_head *head)
  */
 int dev_close(struct net_device *dev)
 {
-       int ret = 0;
        if (dev->flags & IFF_UP) {
                LIST_HEAD(single);
 
                /* Block netpoll rx while the interface is going down */
-               ret = netpoll_rx_disable(dev);
-               if (ret)
-                       return ret;
+               netpoll_rx_disable(dev);
 
                list_add(&dev->unreg_list, &single);
                dev_close_many(&single);
@@ -1370,7 +1369,7 @@ int dev_close(struct net_device *dev)
 
                netpoll_rx_enable(dev);
        }
-       return ret;
+       return 0;
 }
 EXPORT_SYMBOL(dev_close);
 
@@ -1400,6 +1399,14 @@ void dev_disable_lro(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_disable_lro);
 
+static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
+                                  struct net_device *dev)
+{
+       struct netdev_notifier_info info;
+
+       netdev_notifier_info_init(&info, dev);
+       return nb->notifier_call(nb, val, &info);
+}
 
 static int dev_boot_phase = 1;
 
@@ -1432,7 +1439,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
                goto unlock;
        for_each_net(net) {
                for_each_netdev(net, dev) {
-                       err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
+                       err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
                        err = notifier_to_errno(err);
                        if (err)
                                goto rollback;
@@ -1440,7 +1447,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
                        if (!(dev->flags & IFF_UP))
                                continue;
 
-                       nb->notifier_call(nb, NETDEV_UP, dev);
+                       call_netdevice_notifier(nb, NETDEV_UP, dev);
                }
        }
 
@@ -1456,10 +1463,11 @@ rollback:
                                goto outroll;
 
                        if (dev->flags & IFF_UP) {
-                               nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
-                               nb->notifier_call(nb, NETDEV_DOWN, dev);
+                               call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+                                                       dev);
+                               call_netdevice_notifier(nb, NETDEV_DOWN, dev);
                        }
-                       nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+                       call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
                }
        }
 
@@ -1497,10 +1505,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
        for_each_net(net) {
                for_each_netdev(net, dev) {
                        if (dev->flags & IFF_UP) {
-                               nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
-                               nb->notifier_call(nb, NETDEV_DOWN, dev);
+                               call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
+                                                       dev);
+                               call_netdevice_notifier(nb, NETDEV_DOWN, dev);
                        }
-                       nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+                       call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
                }
        }
 unlock:
@@ -1509,6 +1518,25 @@ unlock:
 }
 EXPORT_SYMBOL(unregister_netdevice_notifier);
 
+/**
+ *     call_netdevice_notifiers_info - call all network notifier blocks
+ *     @val: value passed unmodified to notifier function
+ *     @dev: net_device pointer passed unmodified to notifier function
+ *     @info: notifier information data
+ *
+ *     Call all network notifier blocks.  Parameters and return value
+ *     are as for raw_notifier_call_chain().
+ */
+
+int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
+                                 struct netdev_notifier_info *info)
+{
+       ASSERT_RTNL();
+       netdev_notifier_info_init(info, dev);
+       return raw_notifier_call_chain(&netdev_chain, val, info);
+}
+EXPORT_SYMBOL(call_netdevice_notifiers_info);
+
 /**
  *     call_netdevice_notifiers - call all network notifier blocks
  *      @val: value passed unmodified to notifier function
@@ -1520,8 +1548,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
-       ASSERT_RTNL();
-       return raw_notifier_call_chain(&netdev_chain, val, dev);
+       struct netdev_notifier_info info;
+
+       return call_netdevice_notifiers_info(val, dev, &info);
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
@@ -1623,22 +1652,13 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
                }
        }
 
-       skb_orphan(skb);
-
        if (unlikely(!is_skb_forwardable(dev, skb))) {
                atomic_long_inc(&dev->rx_dropped);
                kfree_skb(skb);
                return NET_RX_DROP;
        }
-       skb->skb_iif = 0;
-       skb->dev = dev;
-       skb_dst_drop(skb);
-       skb->tstamp.tv64 = 0;
-       skb->pkt_type = PACKET_HOST;
+       skb_scrub_packet(skb);
        skb->protocol = eth_type_trans(skb, dev);
-       skb->mark = 0;
-       secpath_reset(skb);
-       nf_reset(skb);
        return netif_rx(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1703,7 +1723,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
                        skb_reset_mac_header(skb2);
 
                        if (skb_network_header(skb2) < skb2->data ||
-                           skb2->network_header > skb2->tail) {
+                           skb_network_header(skb2) > skb_tail_pointer(skb2)) {
                                net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
                                                     ntohs(skb2->protocol),
                                                     dev->name);
@@ -2147,6 +2167,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
        struct net_device *dev = skb->dev;
        const char *driver = "";
 
+       if (!net_ratelimit())
+               return;
+
        if (dev && dev->dev.parent)
                driver = dev_driver_string(dev->dev.parent);
 
@@ -2206,30 +2229,51 @@ out:
 }
 EXPORT_SYMBOL(skb_checksum_help);
 
-/**
- *     skb_mac_gso_segment - mac layer segmentation handler.
- *     @skb: buffer to segment
- *     @features: features for the output path (see dev->features)
- */
-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
-                                   netdev_features_t features)
+__be16 skb_network_protocol(struct sk_buff *skb)
 {
-       struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-       struct packet_offload *ptype;
        __be16 type = skb->protocol;
        int vlan_depth = ETH_HLEN;
 
-       while (type == htons(ETH_P_8021Q)) {
+       /* Tunnel gso handlers can set protocol to ethernet. */
+       if (type == htons(ETH_P_TEB)) {
+               struct ethhdr *eth;
+
+               if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
+                       return 0;
+
+               eth = (struct ethhdr *)skb_mac_header(skb);
+               type = eth->h_proto;
+       }
+
+       while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
                struct vlan_hdr *vh;
 
                if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
-                       return ERR_PTR(-EINVAL);
+                       return 0;
 
                vh = (struct vlan_hdr *)(skb->data + vlan_depth);
                type = vh->h_vlan_encapsulated_proto;
                vlan_depth += VLAN_HLEN;
        }
 
+       return type;
+}
+
+/**
+ *     skb_mac_gso_segment - mac layer segmentation handler.
+ *     @skb: buffer to segment
+ *     @features: features for the output path (see dev->features)
+ */
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+                                   netdev_features_t features)
+{
+       struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+       struct packet_offload *ptype;
+       __be16 type = skb_network_protocol(skb);
+
+       if (unlikely(!type))
+               return ERR_PTR(-EINVAL);
+
        __skb_pull(skb, skb->mac_len);
 
        rcu_read_lock();
@@ -2396,24 +2440,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
        return 0;
 }
 
-static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
-{
-       return ((features & NETIF_F_GEN_CSUM) ||
-               ((features & NETIF_F_V4_CSUM) &&
-                protocol == htons(ETH_P_IP)) ||
-               ((features & NETIF_F_V6_CSUM) &&
-                protocol == htons(ETH_P_IPV6)) ||
-               ((features & NETIF_F_FCOE_CRC) &&
-                protocol == htons(ETH_P_FCOE)));
-}
-
 static netdev_features_t harmonize_features(struct sk_buff *skb,
        __be16 protocol, netdev_features_t features)
 {
        if (skb->ip_summed != CHECKSUM_NONE &&
            !can_checksum_protocol(features, protocol)) {
                features &= ~NETIF_F_ALL_CSUM;
-               features &= ~NETIF_F_SG;
        } else if (illegal_highdma(skb->dev, skb)) {
                features &= ~NETIF_F_SG;
        }
@@ -2429,20 +2461,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
        if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
                features &= ~NETIF_F_GSO_MASK;
 
-       if (protocol == htons(ETH_P_8021Q)) {
+       if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
                struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
                protocol = veh->h_vlan_encapsulated_proto;
        } else if (!vlan_tx_tag_present(skb)) {
                return harmonize_features(skb, protocol, features);
        }
 
-       features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
+       features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+                                              NETIF_F_HW_VLAN_STAG_TX);
 
-       if (protocol != htons(ETH_P_8021Q)) {
+       if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
                return harmonize_features(skb, protocol, features);
        } else {
                features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
-                               NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
+                               NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+                               NETIF_F_HW_VLAN_STAG_TX;
                return harmonize_features(skb, protocol, features);
        }
 }
@@ -2454,7 +2488,7 @@ EXPORT_SYMBOL(netif_skb_features);
  *     2. skb is fragmented and the device does not support SG.
  */
 static inline int skb_needs_linearize(struct sk_buff *skb,
-                                     int features)
+                                     netdev_features_t features)
 {
        return skb_is_nonlinear(skb) &&
                        ((skb_has_frag_list(skb) &&
@@ -2483,8 +2517,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                features = netif_skb_features(skb);
 
                if (vlan_tx_tag_present(skb) &&
-                   !(features & NETIF_F_HW_VLAN_TX)) {
-                       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                   !vlan_hw_offload_capable(features, skb->vlan_proto)) {
+                       skb = __vlan_put_tag(skb, skb->vlan_proto,
+                                            vlan_tx_tag_get(skb));
                        if (unlikely(!skb))
                                goto out;
 
@@ -2543,13 +2578,6 @@ gso:
                skb->next = nskb->next;
                nskb->next = NULL;
 
-               /*
-                * If device doesn't need nskb->dst, release it right now while
-                * its hot in this cpu cache
-                */
-               if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
-                       skb_dst_drop(nskb);
-
                if (!list_empty(&ptype_all))
                        dev_queue_xmit_nit(nskb, dev);
 
@@ -2569,8 +2597,11 @@ gso:
        } while (skb->next);
 
 out_kfree_gso_skb:
-       if (likely(skb->next == NULL))
+       if (likely(skb->next == NULL)) {
                skb->destructor = DEV_GSO_CB(skb)->destructor;
+               consume_skb(skb);
+               return rc;
+       }
 out_kfree_skb:
        kfree_skb(skb);
 out:
@@ -2588,6 +2619,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
         */
        if (shinfo->gso_size)  {
                unsigned int hdr_len;
+               u16 gso_segs = shinfo->gso_segs;
 
                /* mac layer + network layer */
                hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
@@ -2597,7 +2629,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
                        hdr_len += tcp_hdrlen(skb);
                else
                        hdr_len += sizeof(struct udphdr);
-               qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len;
+
+               if (shinfo->gso_type & SKB_GSO_DODGY)
+                       gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
+                                               shinfo->gso_size);
+
+               qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
        }
 }
 
@@ -3049,6 +3086,46 @@ static int rps_ipi_queued(struct softnet_data *sd)
        return 0;
 }
 
+#ifdef CONFIG_NET_FLOW_LIMIT
+int netdev_flow_limit_table_len __read_mostly = (1 << 12);
+#endif
+
+static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
+{
+#ifdef CONFIG_NET_FLOW_LIMIT
+       struct sd_flow_limit *fl;
+       struct softnet_data *sd;
+       unsigned int old_flow, new_flow;
+
+       if (qlen < (netdev_max_backlog >> 1))
+               return false;
+
+       sd = &__get_cpu_var(softnet_data);
+
+       rcu_read_lock();
+       fl = rcu_dereference(sd->flow_limit);
+       if (fl) {
+               new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
+               old_flow = fl->history[fl->history_head];
+               fl->history[fl->history_head] = new_flow;
+
+               fl->history_head++;
+               fl->history_head &= FLOW_LIMIT_HISTORY - 1;
+
+               if (likely(fl->buckets[old_flow]))
+                       fl->buckets[old_flow]--;
+
+               if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
+                       fl->count++;
+                       rcu_read_unlock();
+                       return true;
+               }
+       }
+       rcu_read_unlock();
+#endif
+       return false;
+}
+
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
@@ -3058,13 +3135,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 {
        struct softnet_data *sd;
        unsigned long flags;
+       unsigned int qlen;
 
        sd = &per_cpu(softnet_data, cpu);
 
        local_irq_save(flags);
 
        rps_lock(sd);
-       if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+       qlen = skb_queue_len(&sd->input_pkt_queue);
+       if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
                if (skb_queue_len(&sd->input_pkt_queue)) {
 enqueue:
                        __skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -3325,7 +3404,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
  *     netdev_rx_handler_unregister - unregister receive handler
  *     @dev: device to unregister a handler from
  *
- *     Unregister a receive hander from a device.
+ *     Unregister a receive handler from a device.
  *
  *     The caller must hold the rtnl_mutex.
  */
@@ -3354,6 +3433,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
        case __constant_htons(ETH_P_IP):
        case __constant_htons(ETH_P_IPV6):
        case __constant_htons(ETH_P_8021Q):
+       case __constant_htons(ETH_P_8021AD):
                return true;
        default:
                return false;
@@ -3394,7 +3474,8 @@ another_round:
 
        __this_cpu_inc(softnet_data.processed);
 
-       if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+       if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
+           skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
                skb = vlan_untag(skb);
                if (unlikely(!skb))
                        goto unlock;
@@ -3810,7 +3891,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
        NAPI_GRO_CB(skb)->frag0 = NULL;
        NAPI_GRO_CB(skb)->frag0_len = 0;
 
-       if (skb->mac_header == skb->tail &&
+       if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
            pinfo->nr_frags &&
            !PageHighMem(skb_frag_page(frag0))) {
                NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
@@ -4054,6 +4135,58 @@ void napi_complete(struct napi_struct *n)
 }
 EXPORT_SYMBOL(napi_complete);
 
+/* must be called under rcu_read_lock(), as we dont take a reference */
+struct napi_struct *napi_by_id(unsigned int napi_id)
+{
+       unsigned int hash = napi_id % HASH_SIZE(napi_hash);
+       struct napi_struct *napi;
+
+       hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
+               if (napi->napi_id == napi_id)
+                       return napi;
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(napi_by_id);
+
+void napi_hash_add(struct napi_struct *napi)
+{
+       if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
+
+               spin_lock(&napi_hash_lock);
+
+               /* 0 is not a valid id, we also skip an id that is taken
+                * we expect both events to be extremely rare
+                */
+               napi->napi_id = 0;
+               while (!napi->napi_id) {
+                       napi->napi_id = ++napi_gen_id;
+                       if (napi_by_id(napi->napi_id))
+                               napi->napi_id = 0;
+               }
+
+               hlist_add_head_rcu(&napi->napi_hash_node,
+                       &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+
+               spin_unlock(&napi_hash_lock);
+       }
+}
+EXPORT_SYMBOL_GPL(napi_hash_add);
+
+/* Warning : caller is responsible to make sure rcu grace period
+ * is respected before freeing memory containing @napi
+ */
+void napi_hash_del(struct napi_struct *napi)
+{
+       spin_lock(&napi_hash_lock);
+
+       if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
+               hlist_del_rcu(&napi->napi_hash_node);
+
+       spin_unlock(&napi_hash_lock);
+}
+EXPORT_SYMBOL_GPL(napi_hash_del);
+
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
                    int (*poll)(struct napi_struct *, int), int weight)
 {
@@ -4062,6 +4195,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
        napi->gro_list = NULL;
        napi->skb = NULL;
        napi->poll = poll;
+       if (weight > NAPI_POLL_WEIGHT)
+               pr_err_once("netif_napi_add() called with weight %d on device %s\n",
+                           weight, dev->name);
        napi->weight = weight;
        list_add(&napi->dev_list, &dev->napi_list);
        napi->dev = dev;
@@ -4349,7 +4485,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
        else
                list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
        dev_hold(upper_dev);
-
+       call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
        return 0;
 }
 
@@ -4409,6 +4545,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
        list_del_rcu(&upper->list);
        dev_put(upper_dev);
        kfree_rcu(upper, rcu);
+       call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
@@ -4679,8 +4816,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
        }
 
        if (dev->flags & IFF_UP &&
-           (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
-               call_netdevice_notifiers(NETDEV_CHANGE, dev);
+           (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
+               struct netdev_notifier_change_info change_info;
+
+               change_info.flags_changed = changes;
+               call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+                                             &change_info.info);
+       }
 }
 
 /**
@@ -4923,20 +5065,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
                features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
        }
 
-       /* Fix illegal SG+CSUM combinations. */
-       if ((features & NETIF_F_SG) &&
-           !(features & NETIF_F_ALL_CSUM)) {
-               netdev_dbg(dev,
-                       "Dropping NETIF_F_SG since no checksum feature.\n");
-               features &= ~NETIF_F_SG;
-       }
-
        /* TSO requires that SG is present as well. */
        if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
                netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
                features &= ~NETIF_F_ALL_TSO;
        }
 
+       if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
+                                       !(features & NETIF_F_IP_CSUM)) {
+               netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
+               features &= ~NETIF_F_TSO;
+               features &= ~NETIF_F_TSO_ECN;
+       }
+
+       if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
+                                        !(features & NETIF_F_IPV6_CSUM)) {
+               netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
+               features &= ~NETIF_F_TSO6;
+       }
+
        /* TSO ECN requires that TSO is present as well. */
        if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
                features &= ~NETIF_F_TSO_ECN;
@@ -5098,17 +5245,28 @@ static void netdev_init_one_queue(struct net_device *dev,
 #endif
 }
 
+static void netif_free_tx_queues(struct net_device *dev)
+{
+       if (is_vmalloc_addr(dev->_tx))
+               vfree(dev->_tx);
+       else
+               kfree(dev->_tx);
+}
+
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
        unsigned int count = dev->num_tx_queues;
        struct netdev_queue *tx;
+       size_t sz = count * sizeof(*tx);
 
-       BUG_ON(count < 1);
-
-       tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
-       if (!tx)
-               return -ENOMEM;
+       BUG_ON(count < 1 || count > 0xffff);
 
+       tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
+       if (!tx) {
+               tx = vzalloc(sz);
+               if (!tx)
+                       return -ENOMEM;
+       }
        dev->_tx = tx;
 
        netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -5167,7 +5325,8 @@ int register_netdevice(struct net_device *dev)
                }
        }
 
-       if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) &&
+       if (((dev->hw_features | dev->features) &
+            NETIF_F_HW_VLAN_CTAG_FILTER) &&
            (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
             !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
                netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
@@ -5204,6 +5363,14 @@ int register_netdevice(struct net_device *dev)
         */
        dev->vlan_features |= NETIF_F_HIGHDMA;
 
+       /* Make NETIF_F_SG inheritable to tunnel devices.
+        */
+       dev->hw_enc_features |= NETIF_F_SG;
+
+       /* Make NETIF_F_SG inheritable to MPLS.
+        */
+       dev->mpls_features |= NETIF_F_SG;
+
        ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
        ret = notifier_to_errno(ret);
        if (ret)
@@ -5647,7 +5814,7 @@ free_all:
 
 free_pcpu:
        free_percpu(dev->pcpu_refcnt);
-       kfree(dev->_tx);
+       netif_free_tx_queues(dev);
 #ifdef CONFIG_RPS
        kfree(dev->_rx);
 #endif
@@ -5672,7 +5839,7 @@ void free_netdev(struct net_device *dev)
 
        release_net(dev_net(dev));
 
-       kfree(dev->_tx);
+       netif_free_tx_queues(dev);
 #ifdef CONFIG_RPS
        kfree(dev->_rx);
 #endif
@@ -5983,7 +6150,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
 }
 EXPORT_SYMBOL(netdev_increment_features);
 
-static struct hlist_head *netdev_create_hash(void)
+static struct hlist_head * __net_init netdev_create_hash(void)
 {
        int i;
        struct hlist_head *hash;
@@ -6239,6 +6406,10 @@ static int __init net_dev_init(void)
                sd->backlog.weight = weight_p;
                sd->backlog.gro_list = NULL;
                sd->backlog.gro_count = 0;
+
+#ifdef CONFIG_NET_FLOW_LIMIT
+               sd->flow_limit = NULL;
+#endif
        }
 
        dev_boot_phase = 0;