]> Pileus Git - ~andy/linux/blobdiff - drivers/net/bonding/bond_main.c
bonding: add bond_time_in_interval() and use it for time comparison
[~andy/linux] / drivers / net / bonding / bond_main.c
index ac60b697ab7ebc4c51744d223f7dd0a61ddc5c0b..d58237b3dd98d4c45d2f4b08796a6a3dfb24eb67 100644 (file)
@@ -77,6 +77,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/pkt_sched.h>
+#include <linux/rculist.h>
 #include "bonding.h"
 #include "bond_3ad.h"
 #include "bond_alb.h"
@@ -1037,7 +1038,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                if (new_active)
                        bond_set_slave_active_flags(new_active);
        } else {
-               bond->curr_active_slave = new_active;
+               rcu_assign_pointer(bond->curr_active_slave, new_active);
        }
 
        if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
@@ -1127,7 +1128,7 @@ void bond_select_active_slave(struct bonding *bond)
  */
 static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
 {
-       list_add_tail(&new_slave->list, &bond->slave_list);
+       list_add_tail_rcu(&new_slave->list, &bond->slave_list);
        bond->slave_cnt++;
 }
 
@@ -1143,7 +1144,7 @@ static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
  */
 static void bond_detach_slave(struct bonding *bond, struct slave *slave)
 {
-       list_del(&slave->list);
+       list_del_rcu(&slave->list);
        bond->slave_cnt--;
 }
 
@@ -1751,7 +1752,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
                 * so we can change it without calling change_active_interface()
                 */
                if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP)
-                       bond->curr_active_slave = new_slave;
+                       rcu_assign_pointer(bond->curr_active_slave, new_slave);
 
                break;
        } /* switch(bond_mode) */
@@ -1951,7 +1952,7 @@ static int __bond_release_one(struct net_device *bond_dev,
        }
 
        if (all) {
-               bond->curr_active_slave = NULL;
+               rcu_assign_pointer(bond->curr_active_slave, NULL);
        } else if (oldcurrent == slave) {
                /*
                 * Note that we hold RTNL over this sequence, so there
@@ -1983,6 +1984,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 
        write_unlock_bh(&bond->lock);
        unblock_netpoll_tx();
+       synchronize_rcu();
 
        if (list_empty(&bond->slave_list)) {
                call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
@@ -2633,6 +2635,20 @@ out_unlock:
        return RX_HANDLER_ANOTHER;
 }
 
+/* function to verify if we're in the arp_interval timeslice, returns true if
+ * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval +
+ * arp_interval/2) . the arp_interval/2 is needed for really fast networks.
+ */
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+                                 int mod)
+{
+       int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
+
+       return time_in_range(jiffies,
+                            last_act - delta_in_ticks,
+                            last_act + mod * delta_in_ticks + delta_in_ticks/2);
+}
+
 /*
  * this function is called regularly to monitor each slave's link
  * ensuring that traffic is being sent and received when arp monitoring
@@ -2646,13 +2662,9 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
                                            arp_work.work);
        struct slave *slave, *oldcurrent;
        int do_failover = 0;
-       int delta_in_ticks, extra_ticks;
 
        read_lock(&bond->lock);
 
-       delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
-       extra_ticks = delta_in_ticks / 2;
-
        if (list_empty(&bond->slave_list))
                goto re_arm;
 
@@ -2669,12 +2681,8 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
                unsigned long trans_start = dev_trans_start(slave->dev);
 
                if (slave->link != BOND_LINK_UP) {
-                       if (time_in_range(jiffies,
-                               trans_start - delta_in_ticks,
-                               trans_start + delta_in_ticks + extra_ticks) &&
-                           time_in_range(jiffies,
-                               slave->dev->last_rx - delta_in_ticks,
-                               slave->dev->last_rx + delta_in_ticks + extra_ticks)) {
+                       if (bond_time_in_interval(bond, trans_start, 1) &&
+                           bond_time_in_interval(bond, slave->dev->last_rx, 1)) {
 
                                slave->link  = BOND_LINK_UP;
                                bond_set_active_slave(slave);
@@ -2702,12 +2710,8 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
                         * when the source ip is 0, so don't take the link down
                         * if we don't know our ip yet
                         */
-                       if (!time_in_range(jiffies,
-                               trans_start - delta_in_ticks,
-                               trans_start + 2 * delta_in_ticks + extra_ticks) ||
-                           !time_in_range(jiffies,
-                               slave->dev->last_rx - delta_in_ticks,
-                               slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) {
+                       if (!bond_time_in_interval(bond, trans_start, 2) ||
+                           !bond_time_in_interval(bond, slave->dev->last_rx, 2)) {
 
                                slave->link  = BOND_LINK_DOWN;
                                bond_set_backup_slave(slave);
@@ -2747,7 +2751,8 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
 
 re_arm:
        if (bond->params.arp_interval)
-               queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
+               queue_delayed_work(bond->wq, &bond->arp_work,
+                                  msecs_to_jiffies(bond->params.arp_interval));
 
        read_unlock(&bond->lock);
 }
@@ -2760,32 +2765,21 @@ re_arm:
  *
  * Called with bond->lock held for read.
  */
-static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
+static int bond_ab_arp_inspect(struct bonding *bond)
 {
-       unsigned long trans_start;
+       unsigned long trans_start, last_rx;
        struct slave *slave;
-       int extra_ticks;
        int commit = 0;
 
-       /* All the time comparisons below need some extra time. Otherwise, on
-        * fast networks the ARP probe/reply may arrive within the same jiffy
-        * as it was sent.  Then, the next time the ARP monitor is run, one
-        * arp_interval will already have passed in the comparisons.
-        */
-       extra_ticks = delta_in_ticks / 2;
-
        bond_for_each_slave(bond, slave) {
                slave->new_link = BOND_LINK_NOCHANGE;
+               last_rx = slave_last_rx(bond, slave);
 
                if (slave->link != BOND_LINK_UP) {
-                       if (time_in_range(jiffies,
-                               slave_last_rx(bond, slave) - delta_in_ticks,
-                               slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) {
-
+                       if (bond_time_in_interval(bond, last_rx, 1)) {
                                slave->new_link = BOND_LINK_UP;
                                commit++;
                        }
-
                        continue;
                }
 
@@ -2794,9 +2788,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
                 * active.  This avoids bouncing, as the last receive
                 * times need a full ARP monitor cycle to be updated.
                 */
-               if (time_in_range(jiffies,
-                                 slave->jiffies - delta_in_ticks,
-                                 slave->jiffies + 2 * delta_in_ticks + extra_ticks))
+               if (bond_time_in_interval(bond, slave->jiffies, 2))
                        continue;
 
                /*
@@ -2814,10 +2806,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
                 */
                if (!bond_is_active_slave(slave) &&
                    !bond->current_arp_slave &&
-                   !time_in_range(jiffies,
-                       slave_last_rx(bond, slave) - delta_in_ticks,
-                       slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) {
-
+                   !bond_time_in_interval(bond, last_rx, 3)) {
                        slave->new_link = BOND_LINK_DOWN;
                        commit++;
                }
@@ -2830,13 +2819,8 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
                 */
                trans_start = dev_trans_start(slave->dev);
                if (bond_is_active_slave(slave) &&
-                   (!time_in_range(jiffies,
-                       trans_start - delta_in_ticks,
-                       trans_start + 2 * delta_in_ticks + extra_ticks) ||
-                    !time_in_range(jiffies,
-                       slave_last_rx(bond, slave) - delta_in_ticks,
-                       slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) {
-
+                   (!bond_time_in_interval(bond, trans_start, 2) ||
+                    !bond_time_in_interval(bond, last_rx, 2))) {
                        slave->new_link = BOND_LINK_DOWN;
                        commit++;
                }
@@ -2851,7 +2835,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
  *
  * Called with RTNL and bond->lock for read.
  */
-static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
+static void bond_ab_arp_commit(struct bonding *bond)
 {
        unsigned long trans_start;
        struct slave *slave;
@@ -2863,11 +2847,9 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
 
                case BOND_LINK_UP:
                        trans_start = dev_trans_start(slave->dev);
-                       if ((!bond->curr_active_slave &&
-                            time_in_range(jiffies,
-                                          trans_start - delta_in_ticks,
-                                          trans_start + delta_in_ticks + delta_in_ticks / 2)) ||
-                           bond->curr_active_slave != slave) {
+                       if (bond->curr_active_slave != slave ||
+                           (!bond->curr_active_slave &&
+                            bond_time_in_interval(bond, trans_start, 1))) {
                                slave->link = BOND_LINK_UP;
                                if (bond->current_arp_slave) {
                                        bond_set_slave_inactive_flags(
@@ -3008,7 +2990,7 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 
        should_notify_peers = bond_should_notify_peers(bond);
 
-       if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
+       if (bond_ab_arp_inspect(bond)) {
                read_unlock(&bond->lock);
 
                /* Race avoidance with bond_close flush of workqueue */
@@ -3021,7 +3003,7 @@ void bond_activebackup_arp_mon(struct work_struct *work)
 
                read_lock(&bond->lock);
 
-               bond_ab_arp_commit(bond, delta_in_ticks);
+               bond_ab_arp_commit(bond);
 
                read_unlock(&bond->lock);
                rtnl_unlock();
@@ -3628,11 +3610,17 @@ static int bond_neigh_init(struct neighbour *n)
  * The bonding ndo_neigh_setup is called at init time beofre any
  * slave exists. So we must declare proxy setup function which will
  * be used at run time to resolve the actual slave neigh param setup.
+ *
+ * It's also called by master devices (such as vlans) to setup their
+ * underlying devices. In that case - do nothing, we're already set up from
+ * our init.
  */
 static int bond_neigh_setup(struct net_device *dev,
                            struct neigh_parms *parms)
 {
-       parms->neigh_setup   = bond_neigh_init;
+       /* modify only our neigh_parms */
+       if (parms->dev == dev)
+               parms->neigh_setup = bond_neigh_init;
 
        return 0;
 }
@@ -3795,12 +3783,50 @@ unwind:
        return res;
 }
 
+/**
+ * bond_xmit_slave_id - transmit skb through slave with slave_id
+ * @bond: bonding device that is transmitting
+ * @skb: buffer to transmit
+ * @slave_id: slave id up to slave_cnt-1 through which to transmit
+ *
+ * This function tries to transmit through slave with slave_id but in case
+ * it fails, it tries to find the first available slave for transmission.
+ * The skb is consumed in all cases, thus the function is void.
+ */
+void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
+{
+       struct slave *slave;
+       int i = slave_id;
+
+       /* Here we start from the slave with slave_id */
+       bond_for_each_slave_rcu(bond, slave) {
+               if (--i < 0) {
+                       if (slave_can_tx(slave)) {
+                               bond_dev_queue_xmit(bond, skb, slave->dev);
+                               return;
+                       }
+               }
+       }
+
+       /* Here we start from the first slave up to slave_id */
+       i = slave_id;
+       bond_for_each_slave_rcu(bond, slave) {
+               if (--i < 0)
+                       break;
+               if (slave_can_tx(slave)) {
+                       bond_dev_queue_xmit(bond, skb, slave->dev);
+                       return;
+               }
+       }
+       /* no slave that can tx has been found */
+       kfree_skb(skb);
+}
+
 static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
-       struct slave *slave, *start_at;
-       int i, slave_no, res = 1;
        struct iphdr *iph = ip_hdr(skb);
+       struct slave *slave;
 
        /*
         * Start with the curr_active_slave that joined the bond as the
@@ -3809,46 +3835,20 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
         * send the join/membership reports.  The curr_active_slave found
         * will send all of this type of traffic.
         */
-       if ((iph->protocol == IPPROTO_IGMP) &&
-           (skb->protocol == htons(ETH_P_IP))) {
-               slave = bond->curr_active_slave;
-               if (!slave)
-                       goto out;
+       if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
+               slave = rcu_dereference(bond->curr_active_slave);
+               if (slave && slave_can_tx(slave))
+                       bond_dev_queue_xmit(bond, skb, slave->dev);
+               else
+                       bond_xmit_slave_id(bond, skb, 0);
        } else {
-               /*
-                * Concurrent TX may collide on rr_tx_counter; we accept
-                * that as being rare enough not to justify using an
-                * atomic op here.
-                */
-               slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
-
-               bond_for_each_slave(bond, slave) {
-                       slave_no--;
-                       if (slave_no < 0)
-                               break;
-               }
-       }
-
-       start_at = slave;
-       bond_for_each_slave_from(bond, slave, i, start_at) {
-               if (IS_UP(slave->dev) &&
-                   (slave->link == BOND_LINK_UP) &&
-                   bond_is_active_slave(slave)) {
-                       res = bond_dev_queue_xmit(bond, skb, slave->dev);
-                       break;
-               }
-       }
-
-out:
-       if (res) {
-               /* no suitable interface, frame not sent */
-               kfree_skb(skb);
+               bond_xmit_slave_id(bond, skb,
+                                  bond->rr_tx_counter++ % bond->slave_cnt);
        }
 
        return NETDEV_TX_OK;
 }
 
-
 /*
  * in active-backup mode, we know that bond->curr_active_slave is always valid if
  * the bond has a usable interface.
@@ -3857,14 +3857,11 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 {
        struct bonding *bond = netdev_priv(bond_dev);
        struct slave *slave;
-       int res = 1;
 
-       slave = bond->curr_active_slave;
+       slave = rcu_dereference(bond->curr_active_slave);
        if (slave)
-               res = bond_dev_queue_xmit(bond, skb, slave->dev);
-
-       if (res)
-               /* no suitable interface, frame not sent */
+               bond_dev_queue_xmit(bond, skb, slave->dev);
+       else
                kfree_skb(skb);
 
        return NETDEV_TX_OK;
@@ -3878,84 +3875,39 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
-       struct slave *slave, *start_at;
-       int slave_no;
-       int i;
-       int res = 1;
 
-       slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt);
-
-       bond_for_each_slave(bond, slave) {
-               slave_no--;
-               if (slave_no < 0)
-                       break;
-       }
-
-       start_at = slave;
-
-       bond_for_each_slave_from(bond, slave, i, start_at) {
-               if (IS_UP(slave->dev) &&
-                   (slave->link == BOND_LINK_UP) &&
-                   bond_is_active_slave(slave)) {
-                       res = bond_dev_queue_xmit(bond, skb, slave->dev);
-                       break;
-               }
-       }
-
-       if (res) {
-               /* no suitable interface, frame not sent */
-               kfree_skb(skb);
-       }
+       bond_xmit_slave_id(bond, skb,
+                          bond->xmit_hash_policy(skb, bond->slave_cnt));
 
        return NETDEV_TX_OK;
 }
 
-/*
- * in broadcast mode, we send everything to all usable interfaces.
- */
+/* in broadcast mode, we send everything to all usable interfaces. */
 static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
 {
        struct bonding *bond = netdev_priv(bond_dev);
-       struct slave *slave, *start_at;
-       struct net_device *tx_dev = NULL;
-       int i;
-       int res = 1;
-
-       start_at = bond->curr_active_slave;
-       if (!start_at)
-               goto out;
+       struct slave *slave = NULL;
 
-       bond_for_each_slave_from(bond, slave, i, start_at) {
-               if (IS_UP(slave->dev) &&
-                   (slave->link == BOND_LINK_UP) &&
-                   bond_is_active_slave(slave)) {
-                       if (tx_dev) {
-                               struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
-                               if (!skb2) {
-                                       pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n",
-                                              bond_dev->name);
-                                       continue;
-                               }
+       bond_for_each_slave_rcu(bond, slave) {
+               if (bond_is_last_slave(bond, slave))
+                       break;
+               if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) {
+                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
-                               res = bond_dev_queue_xmit(bond, skb2, tx_dev);
-                               if (res) {
-                                       kfree_skb(skb2);
-                                       continue;
-                               }
+                       if (!skb2) {
+                               pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n",
+                                      bond_dev->name);
+                               continue;
                        }
-                       tx_dev = slave->dev;
+                       /* bond_dev_queue_xmit always returns 0 */
+                       bond_dev_queue_xmit(bond, skb2, slave->dev);
                }
        }
-
-       if (tx_dev)
-               res = bond_dev_queue_xmit(bond, skb, tx_dev);
-
-out:
-       if (res)
-               /* no suitable interface, frame not sent */
+       if (slave && IS_UP(slave->dev) && slave->link == BOND_LINK_UP)
+               bond_dev_queue_xmit(bond, skb, slave->dev);
+       else
                kfree_skb(skb);
 
-       /* frame sent to all suitable interfaces */
        return NETDEV_TX_OK;
 }
 
@@ -3991,7 +3943,7 @@ static inline int bond_slave_override(struct bonding *bond,
                return 1;
 
        /* Find out if any slaves have the same mapping as this skb. */
-       bond_for_each_slave(bond, check_slave) {
+       bond_for_each_slave_rcu(bond, check_slave) {
                if (check_slave->queue_id == skb->queue_mapping) {
                        slave = check_slave;
                        break;
@@ -4076,14 +4028,12 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
        if (is_netpoll_tx_blocked(dev))
                return NETDEV_TX_BUSY;
 
-       read_lock(&bond->lock);
-
+       rcu_read_lock();
        if (!list_empty(&bond->slave_list))
                ret = __bond_start_xmit(skb, dev);
        else
                kfree_skb(skb);
-
-       read_unlock(&bond->lock);
+       rcu_read_unlock();
 
        return ret;
 }