#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/pkt_sched.h>
+#include <linux/rculist.h>
#include "bonding.h"
#include "bond_3ad.h"
#include "bond_alb.h"
if (new_active)
bond_set_slave_active_flags(new_active);
} else {
- bond->curr_active_slave = new_active;
+ rcu_assign_pointer(bond->curr_active_slave, new_active);
}
if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
*/
static void bond_attach_slave(struct bonding *bond, struct slave *new_slave)
{
- list_add_tail(&new_slave->list, &bond->slave_list);
+ list_add_tail_rcu(&new_slave->list, &bond->slave_list);
bond->slave_cnt++;
}
*/
static void bond_detach_slave(struct bonding *bond, struct slave *slave)
{
- list_del(&slave->list);
+ list_del_rcu(&slave->list);
bond->slave_cnt--;
}
* so we can change it without calling change_active_interface()
*/
if (!bond->curr_active_slave && new_slave->link == BOND_LINK_UP)
- bond->curr_active_slave = new_slave;
+ rcu_assign_pointer(bond->curr_active_slave, new_slave);
break;
} /* switch(bond_mode) */
}
if (all) {
- bond->curr_active_slave = NULL;
+ rcu_assign_pointer(bond->curr_active_slave, NULL);
} else if (oldcurrent == slave) {
/*
* Note that we hold RTNL over this sequence, so there
write_unlock_bh(&bond->lock);
unblock_netpoll_tx();
+ synchronize_rcu();
if (list_empty(&bond->slave_list)) {
call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
return RX_HANDLER_ANOTHER;
}
+/* function to verify if we're in the arp_interval timeslice, returns true if
+ * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval +
+ * arp_interval/2) . the arp_interval/2 is needed for really fast networks.
+ */
+static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
+ int mod)
+{
+ int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
+
+ return time_in_range(jiffies,
+ last_act - delta_in_ticks,
+ last_act + mod * delta_in_ticks + delta_in_ticks/2);
+}
+
/*
* this function is called regularly to monitor each slave's link
* ensuring that traffic is being sent and received when arp monitoring
arp_work.work);
struct slave *slave, *oldcurrent;
int do_failover = 0;
- int delta_in_ticks, extra_ticks;
read_lock(&bond->lock);
- delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
- extra_ticks = delta_in_ticks / 2;
-
if (list_empty(&bond->slave_list))
goto re_arm;
unsigned long trans_start = dev_trans_start(slave->dev);
if (slave->link != BOND_LINK_UP) {
- if (time_in_range(jiffies,
- trans_start - delta_in_ticks,
- trans_start + delta_in_ticks + extra_ticks) &&
- time_in_range(jiffies,
- slave->dev->last_rx - delta_in_ticks,
- slave->dev->last_rx + delta_in_ticks + extra_ticks)) {
+ if (bond_time_in_interval(bond, trans_start, 1) &&
+ bond_time_in_interval(bond, slave->dev->last_rx, 1)) {
slave->link = BOND_LINK_UP;
bond_set_active_slave(slave);
* when the source ip is 0, so don't take the link down
* if we don't know our ip yet
*/
- if (!time_in_range(jiffies,
- trans_start - delta_in_ticks,
- trans_start + 2 * delta_in_ticks + extra_ticks) ||
- !time_in_range(jiffies,
- slave->dev->last_rx - delta_in_ticks,
- slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) {
+ if (!bond_time_in_interval(bond, trans_start, 2) ||
+ !bond_time_in_interval(bond, slave->dev->last_rx, 2)) {
slave->link = BOND_LINK_DOWN;
bond_set_backup_slave(slave);
re_arm:
if (bond->params.arp_interval)
- queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
+ queue_delayed_work(bond->wq, &bond->arp_work,
+ msecs_to_jiffies(bond->params.arp_interval));
read_unlock(&bond->lock);
}
*
* Called with bond->lock held for read.
*/
-static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
+static int bond_ab_arp_inspect(struct bonding *bond)
{
- unsigned long trans_start;
+ unsigned long trans_start, last_rx;
struct slave *slave;
- int extra_ticks;
int commit = 0;
- /* All the time comparisons below need some extra time. Otherwise, on
- * fast networks the ARP probe/reply may arrive within the same jiffy
- * as it was sent. Then, the next time the ARP monitor is run, one
- * arp_interval will already have passed in the comparisons.
- */
- extra_ticks = delta_in_ticks / 2;
-
bond_for_each_slave(bond, slave) {
slave->new_link = BOND_LINK_NOCHANGE;
+ last_rx = slave_last_rx(bond, slave);
if (slave->link != BOND_LINK_UP) {
- if (time_in_range(jiffies,
- slave_last_rx(bond, slave) - delta_in_ticks,
- slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) {
-
+ if (bond_time_in_interval(bond, last_rx, 1)) {
slave->new_link = BOND_LINK_UP;
commit++;
}
-
continue;
}
* active. This avoids bouncing, as the last receive
* times need a full ARP monitor cycle to be updated.
*/
- if (time_in_range(jiffies,
- slave->jiffies - delta_in_ticks,
- slave->jiffies + 2 * delta_in_ticks + extra_ticks))
+ if (bond_time_in_interval(bond, slave->jiffies, 2))
continue;
/*
*/
if (!bond_is_active_slave(slave) &&
!bond->current_arp_slave &&
- !time_in_range(jiffies,
- slave_last_rx(bond, slave) - delta_in_ticks,
- slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) {
-
+ !bond_time_in_interval(bond, last_rx, 3)) {
slave->new_link = BOND_LINK_DOWN;
commit++;
}
*/
trans_start = dev_trans_start(slave->dev);
if (bond_is_active_slave(slave) &&
- (!time_in_range(jiffies,
- trans_start - delta_in_ticks,
- trans_start + 2 * delta_in_ticks + extra_ticks) ||
- !time_in_range(jiffies,
- slave_last_rx(bond, slave) - delta_in_ticks,
- slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) {
-
+ (!bond_time_in_interval(bond, trans_start, 2) ||
+ !bond_time_in_interval(bond, last_rx, 2))) {
slave->new_link = BOND_LINK_DOWN;
commit++;
}
*
* Called with RTNL and bond->lock for read.
*/
-static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
+static void bond_ab_arp_commit(struct bonding *bond)
{
unsigned long trans_start;
struct slave *slave;
case BOND_LINK_UP:
trans_start = dev_trans_start(slave->dev);
- if ((!bond->curr_active_slave &&
- time_in_range(jiffies,
- trans_start - delta_in_ticks,
- trans_start + delta_in_ticks + delta_in_ticks / 2)) ||
- bond->curr_active_slave != slave) {
+ if (bond->curr_active_slave != slave ||
+ (!bond->curr_active_slave &&
+ bond_time_in_interval(bond, trans_start, 1))) {
slave->link = BOND_LINK_UP;
if (bond->current_arp_slave) {
bond_set_slave_inactive_flags(
should_notify_peers = bond_should_notify_peers(bond);
- if (bond_ab_arp_inspect(bond, delta_in_ticks)) {
+ if (bond_ab_arp_inspect(bond)) {
read_unlock(&bond->lock);
/* Race avoidance with bond_close flush of workqueue */
read_lock(&bond->lock);
- bond_ab_arp_commit(bond, delta_in_ticks);
+ bond_ab_arp_commit(bond);
read_unlock(&bond->lock);
rtnl_unlock();
* The bonding ndo_neigh_setup is called at init time beofre any
* slave exists. So we must declare proxy setup function which will
* be used at run time to resolve the actual slave neigh param setup.
+ *
+ * It's also called by master devices (such as vlans) to setup their
+ * underlying devices. In that case - do nothing, we're already set up from
+ * our init.
*/
static int bond_neigh_setup(struct net_device *dev,
struct neigh_parms *parms)
{
- parms->neigh_setup = bond_neigh_init;
+ /* modify only our neigh_parms */
+ if (parms->dev == dev)
+ parms->neigh_setup = bond_neigh_init;
return 0;
}
return res;
}
+/**
+ * bond_xmit_slave_id - transmit skb through slave with slave_id
+ * @bond: bonding device that is transmitting
+ * @skb: buffer to transmit
+ * @slave_id: slave id up to slave_cnt-1 through which to transmit
+ *
+ * This function tries to transmit through slave with slave_id but in case
+ * it fails, it tries to find the first available slave for transmission.
+ * The skb is consumed in all cases, thus the function is void.
+ */
+void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
+{
+ struct slave *slave;
+ int i = slave_id;
+
+ /* Here we start from the slave with slave_id */
+ bond_for_each_slave_rcu(bond, slave) {
+ if (--i < 0) {
+ if (slave_can_tx(slave)) {
+ bond_dev_queue_xmit(bond, skb, slave->dev);
+ return;
+ }
+ }
+ }
+
+ /* Here we start from the first slave up to slave_id */
+ i = slave_id;
+ bond_for_each_slave_rcu(bond, slave) {
+ if (--i < 0)
+ break;
+ if (slave_can_tx(slave)) {
+ bond_dev_queue_xmit(bond, skb, slave->dev);
+ return;
+ }
+ }
+ /* no slave that can tx has been found */
+ kfree_skb(skb);
+}
+
static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct slave *slave, *start_at;
- int i, slave_no, res = 1;
struct iphdr *iph = ip_hdr(skb);
+ struct slave *slave;
/*
* Start with the curr_active_slave that joined the bond as the
* send the join/membership reports. The curr_active_slave found
* will send all of this type of traffic.
*/
- if ((iph->protocol == IPPROTO_IGMP) &&
- (skb->protocol == htons(ETH_P_IP))) {
- slave = bond->curr_active_slave;
- if (!slave)
- goto out;
+ if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
+ slave = rcu_dereference(bond->curr_active_slave);
+ if (slave && slave_can_tx(slave))
+ bond_dev_queue_xmit(bond, skb, slave->dev);
+ else
+ bond_xmit_slave_id(bond, skb, 0);
} else {
- /*
- * Concurrent TX may collide on rr_tx_counter; we accept
- * that as being rare enough not to justify using an
- * atomic op here.
- */
- slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
-
- bond_for_each_slave(bond, slave) {
- slave_no--;
- if (slave_no < 0)
- break;
- }
- }
-
- start_at = slave;
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (IS_UP(slave->dev) &&
- (slave->link == BOND_LINK_UP) &&
- bond_is_active_slave(slave)) {
- res = bond_dev_queue_xmit(bond, skb, slave->dev);
- break;
- }
- }
-
-out:
- if (res) {
- /* no suitable interface, frame not sent */
- kfree_skb(skb);
+ bond_xmit_slave_id(bond, skb,
+ bond->rr_tx_counter++ % bond->slave_cnt);
}
return NETDEV_TX_OK;
}
-
/*
* in active-backup mode, we know that bond->curr_active_slave is always valid if
* the bond has a usable interface.
{
struct bonding *bond = netdev_priv(bond_dev);
struct slave *slave;
- int res = 1;
- slave = bond->curr_active_slave;
+ slave = rcu_dereference(bond->curr_active_slave);
if (slave)
- res = bond_dev_queue_xmit(bond, skb, slave->dev);
-
- if (res)
- /* no suitable interface, frame not sent */
+ bond_dev_queue_xmit(bond, skb, slave->dev);
+ else
kfree_skb(skb);
return NETDEV_TX_OK;
static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct slave *slave, *start_at;
- int slave_no;
- int i;
- int res = 1;
- slave_no = bond->xmit_hash_policy(skb, bond->slave_cnt);
-
- bond_for_each_slave(bond, slave) {
- slave_no--;
- if (slave_no < 0)
- break;
- }
-
- start_at = slave;
-
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (IS_UP(slave->dev) &&
- (slave->link == BOND_LINK_UP) &&
- bond_is_active_slave(slave)) {
- res = bond_dev_queue_xmit(bond, skb, slave->dev);
- break;
- }
- }
-
- if (res) {
- /* no suitable interface, frame not sent */
- kfree_skb(skb);
- }
+ bond_xmit_slave_id(bond, skb,
+ bond->xmit_hash_policy(skb, bond->slave_cnt));
return NETDEV_TX_OK;
}
-/*
- * in broadcast mode, we send everything to all usable interfaces.
- */
+/* in broadcast mode, we send everything to all usable interfaces. */
static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
- struct slave *slave, *start_at;
- struct net_device *tx_dev = NULL;
- int i;
- int res = 1;
-
- start_at = bond->curr_active_slave;
- if (!start_at)
- goto out;
+ struct slave *slave = NULL;
- bond_for_each_slave_from(bond, slave, i, start_at) {
- if (IS_UP(slave->dev) &&
- (slave->link == BOND_LINK_UP) &&
- bond_is_active_slave(slave)) {
- if (tx_dev) {
- struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2) {
- pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n",
- bond_dev->name);
- continue;
- }
+ bond_for_each_slave_rcu(bond, slave) {
+ if (bond_is_last_slave(bond, slave))
+ break;
+ if (IS_UP(slave->dev) && slave->link == BOND_LINK_UP) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
- res = bond_dev_queue_xmit(bond, skb2, tx_dev);
- if (res) {
- kfree_skb(skb2);
- continue;
- }
+ if (!skb2) {
+ pr_err("%s: Error: bond_xmit_broadcast(): skb_clone() failed\n",
+ bond_dev->name);
+ continue;
}
- tx_dev = slave->dev;
+ /* bond_dev_queue_xmit always returns 0 */
+ bond_dev_queue_xmit(bond, skb2, slave->dev);
}
}
-
- if (tx_dev)
- res = bond_dev_queue_xmit(bond, skb, tx_dev);
-
-out:
- if (res)
- /* no suitable interface, frame not sent */
+ if (slave && IS_UP(slave->dev) && slave->link == BOND_LINK_UP)
+ bond_dev_queue_xmit(bond, skb, slave->dev);
+ else
kfree_skb(skb);
- /* frame sent to all suitable interfaces */
return NETDEV_TX_OK;
}
return 1;
/* Find out if any slaves have the same mapping as this skb. */
- bond_for_each_slave(bond, check_slave) {
+ bond_for_each_slave_rcu(bond, check_slave) {
if (check_slave->queue_id == skb->queue_mapping) {
slave = check_slave;
break;
if (is_netpoll_tx_blocked(dev))
return NETDEV_TX_BUSY;
- read_lock(&bond->lock);
-
+ rcu_read_lock();
if (!list_empty(&bond->slave_list))
ret = __bond_start_xmit(skb, dev);
else
kfree_skb(skb);
-
- read_unlock(&bond->lock);
+ rcu_read_unlock();
return ret;
}