From: Jay Vosburgh Date: Sat, 23 Sep 2006 04:54:53 +0000 (-0700) Subject: [PATCH] bonding: Validate probe replies in ARP monitor X-Git-Tag: v2.6.19-rc1~897^2~11 X-Git-Url: http://pileus.org/git/?p=~andy%2Flinux;a=commitdiff_plain;h=f5b2b966f032f22d3a289045a5afd4afa09f09c6 [PATCH] bonding: Validate probe replies in ARP monitor Add logic to check ARP request / reply packets used for ARP monitor link integrity checking. The current method simply examines the slave device to see if it has sent and received traffic; this can be fooled by extraneous traffic. For example, if multiple hosts running bonding are behind a common switch, the probe traffic from the multiple instances of bonding will update the tx/rx times on each other's slave devices. Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index afac780445c..dc942eaf490 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -192,6 +192,17 @@ or, for backwards compatibility, the option value. E.g., arp_interval Specifies the ARP link monitoring frequency in milliseconds. + + The ARP monitor works by periodically checking the slave + devices to determine whether they have sent or received + traffic recently (the precise criteria depends upon the + bonding mode, and the state of the slave). Regular traffic is + generated via ARP probes issued for the addresses specified by + the arp_ip_target option. + + This behavior can be modified by the arp_validate option, + below. + If ARP monitoring is used in an etherchannel compatible mode (modes 0 and 2), the switch should be configured in a mode that evenly distributes packets across all links. If the @@ -213,6 +224,54 @@ arp_ip_target maximum number of targets that can be specified is 16. The default value is no IP addresses. +arp_validate + + Specifies whether or not ARP probes and replies should be + validated in the active-backup mode. This causes the ARP + monitor to examine the incoming ARP requests and replies, and + only consider a slave to be up if it is receiving the + appropriate ARP traffic. + + Possible values are: + + none or 0 + + No validation is performed. This is the default. + + active or 1 + + Validation is performed only for the active slave. + + backup or 2 + + Validation is performed only for backup slaves. + + all or 3 + + Validation is performed for all slaves. + + For the active slave, the validation checks ARP replies to + confirm that they were generated by an arp_ip_target. Since + backup slaves do not typically receive these replies, the + validation performed for backup slaves is on the ARP request + sent out via the active slave. It is possible that some + switch or network configurations may result in situations + wherein the backup slaves do not receive the ARP requests; in + such a situation, validation of backup slaves must be + disabled. + + This option is useful in network configurations in which + multiple bonding hosts are concurrently issuing ARPs to one or + more targets beyond a common switch. Should the link between + the switch and target fail (but not the switch itself), the + probe traffic generated by the multiple bonding instances will + fool the standard ARP monitor into considering the links as + still up. Use of the arp_validate option can resolve this, as + the ARP monitor will only consider ARP requests and replies + associated with its own instance of bonding. + + This option was added in bonding version 3.1.0. + downdelay Specifies the time, in milliseconds, to wait before disabling diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index bafe62f7c9b..fd521b05db8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -96,6 +96,7 @@ static char *lacp_rate = NULL; static char *xmit_hash_policy = NULL; static int arp_interval = BOND_LINK_ARP_INTERV; static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; +static char *arp_validate = NULL; struct bond_params bonding_defaults; module_param(max_bonds, int, 0); @@ -127,6 +128,8 @@ module_param(arp_interval, int, 0); MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); module_param_array(arp_ip_target, charp, NULL, 0); MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); +module_param(arp_validate, charp, 0); +MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); /*----------------------------- Global variables ----------------------------*/ @@ -170,6 +173,14 @@ struct bond_parm_tbl xmit_hashtype_tbl[] = { { NULL, -1}, }; +struct bond_parm_tbl arp_validate_tbl[] = { +{ "none", BOND_ARP_VALIDATE_NONE}, +{ "active", BOND_ARP_VALIDATE_ACTIVE}, +{ "backup", BOND_ARP_VALIDATE_BACKUP}, +{ "all", BOND_ARP_VALIDATE_ALL}, +{ NULL, -1}, +}; + /*-------------------------- Forward declarations ---------------------------*/ static void bond_send_gratuitous_arp(struct bonding *bond); @@ -1424,6 +1435,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) bond_compute_features(bond); + new_slave->last_arp_rx = jiffies; + if (bond->params.miimon && !bond->params.use_carrier) { link_reporting = bond_check_dev_link(bond, slave_dev, 1); @@ -1785,7 +1798,8 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) dev_set_mac_address(slave_dev, &addr); slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | - IFF_SLAVE_INACTIVE | IFF_BONDING); + IFF_SLAVE_INACTIVE | IFF_BONDING | + IFF_SLAVE_NEEDARP); kfree(slave); @@ -2298,6 +2312,25 @@ static int bond_has_ip(struct bonding *bond) return 0; } +static int bond_has_this_ip(struct bonding *bond, u32 ip) +{ + struct vlan_entry *vlan, *vlan_next; + + if (ip == bond->master_ip) + return 1; + + if (list_empty(&bond->vlan_list)) + return 0; + + list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, + vlan_list) { + if (ip == vlan->vlan_ip) + return 1; + } + + return 0; +} + /* * We go to the (large) trouble of VLAN tagging ARP frames because * switches in VLAN mode (especially if ports are configured as @@ -2436,6 +2469,93 @@ static void bond_send_gratuitous_arp(struct bonding *bond) } } +static void bond_validate_arp(struct bonding *bond, struct slave *slave, u32 sip, u32 tip) +{ + int i; + u32 *targets = bond->params.arp_targets; + + targets = bond->params.arp_targets; + for (i = 0; (i < BOND_MAX_ARP_TARGETS) && targets[i]; i++) { + dprintk("bva: sip %u.%u.%u.%u tip %u.%u.%u.%u t[%d] " + "%u.%u.%u.%u bhti(tip) %d\n", + NIPQUAD(sip), NIPQUAD(tip), i, NIPQUAD(targets[i]), + bond_has_this_ip(bond, tip)); + if (sip == targets[i]) { + if (bond_has_this_ip(bond, tip)) + slave->last_arp_rx = jiffies; + return; + } + } +} + +static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +{ + struct arphdr *arp; + struct slave *slave; + struct bonding *bond; + unsigned char *arp_ptr; + u32 sip, tip; + + if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) + goto out; + + bond = dev->priv; + read_lock(&bond->lock); + + dprintk("bond_arp_rcv: bond %s skb->dev %s orig_dev %s\n", + bond->dev->name, skb->dev ? skb->dev->name : "NULL", + orig_dev ? orig_dev->name : "NULL"); + + slave = bond_get_slave_by_dev(bond, orig_dev); + if (!slave || !slave_do_arp_validate(bond, slave)) + goto out_unlock; + + /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ + if (!pskb_may_pull(skb, (sizeof(struct arphdr) + + (2 * dev->addr_len) + + (2 * sizeof(u32))))) + goto out_unlock; + + arp = skb->nh.arph; + if (arp->ar_hln != dev->addr_len || + skb->pkt_type == PACKET_OTHERHOST || + skb->pkt_type == PACKET_LOOPBACK || + arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_pln != 4) + goto out_unlock; + + arp_ptr = (unsigned char *)(arp + 1); + arp_ptr += dev->addr_len; + memcpy(&sip, arp_ptr, 4); + arp_ptr += 4 + dev->addr_len; + memcpy(&tip, arp_ptr, 4); + + dprintk("bond_arp_rcv: %s %s/%d av %d sv %d sip %u.%u.%u.%u" + " tip %u.%u.%u.%u\n", bond->dev->name, slave->dev->name, + slave->state, bond->params.arp_validate, + slave_do_arp_validate(bond, slave), NIPQUAD(sip), NIPQUAD(tip)); + + /* + * Backup slaves won't see the ARP reply, but do come through + * here for each ARP probe (so we swap the sip/tip to validate + * the probe). In a "redundant switch, common router" type of + * configuration, the ARP probe will (hopefully) travel from + * the active, through one switch, the router, then the other + * switch before reaching the backup. + */ + if (slave->state == BOND_STATE_ACTIVE) + bond_validate_arp(bond, slave, sip, tip); + else + bond_validate_arp(bond, slave, tip, sip); + +out_unlock: + read_unlock(&bond->lock); +out: + dev_kfree_skb(skb); + return NET_RX_SUCCESS; +} + /* * this function is called regularly to monitor each slave's link * ensuring that traffic is being sent and received when arp monitoring @@ -2600,7 +2720,8 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) */ bond_for_each_slave(bond, slave, i) { if (slave->link != BOND_LINK_UP) { - if ((jiffies - slave->dev->last_rx) <= delta_in_ticks) { + if ((jiffies - slave_last_rx(bond, slave)) <= + delta_in_ticks) { slave->link = BOND_LINK_UP; @@ -2645,7 +2766,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) if ((slave != bond->curr_active_slave) && (!bond->current_arp_slave) && - (((jiffies - slave->dev->last_rx) >= 3*delta_in_ticks) && + (((jiffies - slave_last_rx(bond, slave)) >= 3*delta_in_ticks) && bond_has_ip(bond))) { /* a backup slave has gone down; three times * the delta allows the current slave to be @@ -2692,7 +2813,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev) * if it is up and needs to take over as the curr_active_slave */ if ((((jiffies - slave->dev->trans_start) >= (2*delta_in_ticks)) || - (((jiffies - slave->dev->last_rx) >= (2*delta_in_ticks)) && + (((jiffies - slave_last_rx(bond, slave)) >= (2*delta_in_ticks)) && bond_has_ip(bond))) && ((jiffies - slave->jiffies) >= 2*delta_in_ticks)) { @@ -3315,6 +3436,21 @@ static void bond_unregister_lacpdu(struct bonding *bond) dev_remove_pack(&(BOND_AD_INFO(bond).ad_pkt_type)); } +void bond_register_arp(struct bonding *bond) +{ + struct packet_type *pt = &bond->arp_mon_pt; + + pt->type = htons(ETH_P_ARP); + pt->dev = NULL; /*bond->dev;XXX*/ + pt->func = bond_arp_rcv; + dev_add_pack(pt); +} + +void bond_unregister_arp(struct bonding *bond) +{ + dev_remove_pack(&bond->arp_mon_pt); +} + /*---------------------------- Hashing Policies -----------------------------*/ /* @@ -3401,6 +3537,9 @@ static int bond_open(struct net_device *bond_dev) } else { arp_timer->function = (void *)&bond_loadbalance_arp_mon; } + if (bond->params.arp_validate) + bond_register_arp(bond); + add_timer(arp_timer); } @@ -3428,6 +3567,9 @@ static int bond_close(struct net_device *bond_dev) bond_unregister_lacpdu(bond); } + if (bond->params.arp_validate) + bond_unregister_arp(bond); + write_lock_bh(&bond->lock); @@ -4281,6 +4423,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl) static int bond_check_params(struct bond_params *params) { + int arp_validate_value; + /* * Convert string parameters. */ @@ -4484,6 +4628,29 @@ static int bond_check_params(struct bond_params *params) arp_interval = 0; } + if (arp_validate) { + if (bond_mode != BOND_MODE_ACTIVEBACKUP) { + printk(KERN_ERR DRV_NAME + ": arp_validate only supported in active-backup mode\n"); + return -EINVAL; + } + if (!arp_interval) { + printk(KERN_ERR DRV_NAME + ": arp_validate requires arp_interval\n"); + return -EINVAL; + } + + arp_validate_value = bond_parse_parm(arp_validate, + arp_validate_tbl); + if (arp_validate_value == -1) { + printk(KERN_ERR DRV_NAME + ": Error: invalid arp_validate \"%s\"\n", + arp_validate == NULL ? "NULL" : arp_validate); + return -EINVAL; + } + } else + arp_validate_value = 0; + if (miimon) { printk(KERN_INFO DRV_NAME ": MII link monitoring set to %d ms\n", @@ -4492,8 +4659,10 @@ static int bond_check_params(struct bond_params *params) int i; printk(KERN_INFO DRV_NAME - ": ARP monitoring set to %d ms with %d target(s):", - arp_interval, arp_ip_count); + ": ARP monitoring set to %d ms, validate %s, with %d target(s):", + arp_interval, + arp_validate_tbl[arp_validate_value].modename, + arp_ip_count); for (i = 0; i < arp_ip_count; i++) printk (" %s", arp_ip_target[i]); @@ -4527,6 +4696,7 @@ static int bond_check_params(struct bond_params *params) params->xmit_policy = xmit_hashtype; params->miimon = miimon; params->arp_interval = arp_interval; + params->arp_validate = arp_validate_value; params->updelay = updelay; params->downdelay = downdelay; params->use_carrier = use_carrier; diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 15b6a29bb4d..ced9ed8f995 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -51,6 +51,7 @@ extern struct bond_params bonding_defaults; extern struct bond_parm_tbl bond_mode_tbl[]; extern struct bond_parm_tbl bond_lacp_tbl[]; extern struct bond_parm_tbl xmit_hashtype_tbl[]; +extern struct bond_parm_tbl arp_validate_tbl[]; static int expected_refcount = -1; static struct class *netdev_class; @@ -502,6 +503,53 @@ out: } static CLASS_DEVICE_ATTR(xmit_hash_policy, S_IRUGO | S_IWUSR, bonding_show_xmit_hash, bonding_store_xmit_hash); +/* + * Show and set arp_validate. + */ +static ssize_t bonding_show_arp_validate(struct class_device *cd, char *buf) +{ + struct bonding *bond = to_bond(cd); + + return sprintf(buf, "%s %d\n", + arp_validate_tbl[bond->params.arp_validate].modename, + bond->params.arp_validate) + 1; +} + +static ssize_t bonding_store_arp_validate(struct class_device *cd, const char *buf, size_t count) +{ + int new_value; + struct bonding *bond = to_bond(cd); + + new_value = bond_parse_parm((char *)buf, arp_validate_tbl); + if (new_value < 0) { + printk(KERN_ERR DRV_NAME + ": %s: Ignoring invalid arp_validate value %s\n", + bond->dev->name, buf); + return -EINVAL; + } + if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) { + printk(KERN_ERR DRV_NAME + ": %s: arp_validate only supported in active-backup mode.\n", + bond->dev->name); + return -EINVAL; + } + printk(KERN_INFO DRV_NAME ": %s: setting arp_validate to %s (%d).\n", + bond->dev->name, arp_validate_tbl[new_value].modename, + new_value); + + if (!bond->params.arp_validate && new_value) { + bond_register_arp(bond); + } else if (bond->params.arp_validate && !new_value) { + bond_unregister_arp(bond); + } + + bond->params.arp_validate = new_value; + + return count; +} + +static CLASS_DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate); + /* * Show and set the arp timer interval. There are two tricky bits * here. First, if ARP monitoring is activated, then we must disable @@ -914,6 +962,11 @@ static ssize_t bonding_store_miimon(struct class_device *cd, const char *buf, si "ARP monitoring. Disabling ARP monitoring...\n", bond->dev->name); bond->params.arp_interval = 0; + if (bond->params.arp_validate) { + bond_unregister_arp(bond); + bond->params.arp_validate = + BOND_ARP_VALIDATE_NONE; + } /* Kill ARP timer, else it brings bond's link down */ if (bond->mii_timer.function) { printk(KERN_INFO DRV_NAME @@ -1273,6 +1326,7 @@ static CLASS_DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, N static struct attribute *per_bond_attrs[] = { &class_device_attr_slaves.attr, &class_device_attr_mode.attr, + &class_device_attr_arp_validate.attr, &class_device_attr_arp_interval.attr, &class_device_attr_arp_ip_target.attr, &class_device_attr_downdelay.attr, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 17caafe5824..db16fee40a5 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -22,8 +22,8 @@ #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "3.0.3" -#define DRV_RELDATE "March 23, 2006" +#define DRV_VERSION "3.1.0-test" +#define DRV_RELDATE "September 9, 2006" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" @@ -126,6 +126,7 @@ struct bond_params { int xmit_policy; int miimon; int arp_interval; + int arp_validate; int use_carrier; int updelay; int downdelay; @@ -151,6 +152,7 @@ struct slave { struct slave *prev; int delay; u32 jiffies; + u32 last_arp_rx; s8 link; /* one of BOND_LINK_XXXX */ s8 state; /* one of BOND_STATE_XXXX */ u32 original_flags; @@ -198,6 +200,7 @@ struct bonding { struct bond_params params; struct list_head vlan_list; struct vlan_group *vlgrp; + struct packet_type arp_mon_pt; }; /** @@ -228,6 +231,25 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) return (struct bonding *)slave->dev->master->priv; } +#define BOND_ARP_VALIDATE_NONE 0 +#define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE) +#define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP) +#define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \ + BOND_ARP_VALIDATE_BACKUP) + +extern inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave) +{ + return bond->params.arp_validate & (1 << slave->state); +} + +extern inline u32 slave_last_rx(struct bonding *bond, struct slave *slave) +{ + if (slave_do_arp_validate(bond, slave)) + return slave->last_arp_rx; + + return slave->dev->last_rx; +} + static inline void bond_set_slave_inactive_flags(struct slave *slave) { struct bonding *bond = slave->dev->master->priv; @@ -235,12 +257,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave) bond->params.mode != BOND_MODE_ALB) slave->state = BOND_STATE_BACKUP; slave->dev->priv_flags |= IFF_SLAVE_INACTIVE; + if (slave_do_arp_validate(bond, slave)) + slave->dev->priv_flags |= IFF_SLAVE_NEEDARP; } static inline void bond_set_slave_active_flags(struct slave *slave) { slave->state = BOND_STATE_ACTIVE; - slave->dev->priv_flags &= ~IFF_SLAVE_INACTIVE; + slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP); } static inline void bond_set_master_3ad_flags(struct bonding *bond) @@ -284,6 +308,8 @@ int bond_parse_parm(char *mode_arg, struct bond_parm_tbl *tbl); const char *bond_mode_name(int mode); void bond_select_active_slave(struct bonding *bond); void bond_change_active_slave(struct bonding *bond, struct slave *new_active); +void bond_register_arp(struct bonding *); +void bond_unregister_arp(struct bonding *); #endif /* _LINUX_BONDING_H */ diff --git a/include/linux/if.h b/include/linux/if.h index a023ec1274f..8018c2e22c0 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -60,6 +60,7 @@ #define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */ #define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ #define IFF_BONDING 0x20 /* bonding master or slave */ +#define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43289127b45..afd80eff272 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1016,7 +1016,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) } /* On bonding slaves other than the currently active slave, suppress - * duplicates except for 802.3ad ETH_P_SLOW and alb non-mcast/bcast. + * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and + * ARP on active-backup slaves with arp_validate enabled. */ static inline int skb_bond_should_drop(struct sk_buff *skb) { @@ -1025,6 +1026,10 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) if (master && (dev->priv_flags & IFF_SLAVE_INACTIVE)) { + if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && + skb->protocol == __constant_htons(ETH_P_ARP)) + return 0; + if (master->priv_flags & IFF_MASTER_ALB) { if (skb->pkt_type != PACKET_BROADCAST && skb->pkt_type != PACKET_MULTICAST)