]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
Merge tag 'for-linus-20140225' of git://git.infradead.org/linux-mtd
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static DEFINE_SPINLOCK(inet_addr_hash_lock);
110
111 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 {
113         u32 val = (__force u32) addr ^ net_hash_mix(net);
114
115         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120         u32 hash = inet_addr_hash(net, ifa->ifa_local);
121
122         spin_lock(&inet_addr_hash_lock);
123         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124         spin_unlock(&inet_addr_hash_lock);
125 }
126
127 static void inet_hash_remove(struct in_ifaddr *ifa)
128 {
129         spin_lock(&inet_addr_hash_lock);
130         hlist_del_init_rcu(&ifa->hash);
131         spin_unlock(&inet_addr_hash_lock);
132 }
133
134 /**
135  * __ip_dev_find - find the first device with a given source address.
136  * @net: the net namespace
137  * @addr: the source address
138  * @devref: if true, take a reference on the found device
139  *
140  * If a caller uses devref=false, it should be protected by RCU, or RTNL
141  */
142 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143 {
144         u32 hash = inet_addr_hash(net, addr);
145         struct net_device *result = NULL;
146         struct in_ifaddr *ifa;
147
148         rcu_read_lock();
149         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150                 if (ifa->ifa_local == addr) {
151                         struct net_device *dev = ifa->ifa_dev->dev;
152
153                         if (!net_eq(dev_net(dev), net))
154                                 continue;
155                         result = dev;
156                         break;
157                 }
158         }
159         if (!result) {
160                 struct flowi4 fl4 = { .daddr = addr };
161                 struct fib_result res = { 0 };
162                 struct fib_table *local;
163
164                 /* Fallback to FIB local table so that communication
165                  * over loopback subnets work.
166                  */
167                 local = fib_get_table(net, RT_TABLE_LOCAL);
168                 if (local &&
169                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170                     res.type == RTN_LOCAL)
171                         result = FIB_RES_DEV(res);
172         }
173         if (result && devref)
174                 dev_hold(result);
175         rcu_read_unlock();
176         return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179
180 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181
182 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184                          int destroy);
185 #ifdef CONFIG_SYSCTL
186 static void devinet_sysctl_register(struct in_device *idev);
187 static void devinet_sysctl_unregister(struct in_device *idev);
188 #else
189 static void devinet_sysctl_register(struct in_device *idev)
190 {
191 }
192 static void devinet_sysctl_unregister(struct in_device *idev)
193 {
194 }
195 #endif
196
197 /* Locks all the inet devices. */
198
199 static struct in_ifaddr *inet_alloc_ifa(void)
200 {
201         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 }
203
204 static void inet_rcu_free_ifa(struct rcu_head *head)
205 {
206         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207         if (ifa->ifa_dev)
208                 in_dev_put(ifa->ifa_dev);
209         kfree(ifa);
210 }
211
212 static void inet_free_ifa(struct in_ifaddr *ifa)
213 {
214         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 }
216
217 void in_dev_finish_destroy(struct in_device *idev)
218 {
219         struct net_device *dev = idev->dev;
220
221         WARN_ON(idev->ifa_list);
222         WARN_ON(idev->mc_list);
223         kfree(rcu_dereference_protected(idev->mc_hash, 1));
224 #ifdef NET_REFCNT_DEBUG
225         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
226 #endif
227         dev_put(dev);
228         if (!idev->dead)
229                 pr_err("Freeing alive in_device %p\n", idev);
230         else
231                 kfree(idev);
232 }
233 EXPORT_SYMBOL(in_dev_finish_destroy);
234
235 static struct in_device *inetdev_init(struct net_device *dev)
236 {
237         struct in_device *in_dev;
238
239         ASSERT_RTNL();
240
241         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242         if (!in_dev)
243                 goto out;
244         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245                         sizeof(in_dev->cnf));
246         in_dev->cnf.sysctl = NULL;
247         in_dev->dev = dev;
248         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249         if (!in_dev->arp_parms)
250                 goto out_kfree;
251         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252                 dev_disable_lro(dev);
253         /* Reference in_dev->dev */
254         dev_hold(dev);
255         /* Account for reference dev->ip_ptr (below) */
256         in_dev_hold(in_dev);
257
258         devinet_sysctl_register(in_dev);
259         ip_mc_init_dev(in_dev);
260         if (dev->flags & IFF_UP)
261                 ip_mc_up(in_dev);
262
263         /* we can receive as soon as ip_ptr is set -- do this last */
264         rcu_assign_pointer(dev->ip_ptr, in_dev);
265 out:
266         return in_dev;
267 out_kfree:
268         kfree(in_dev);
269         in_dev = NULL;
270         goto out;
271 }
272
273 static void in_dev_rcu_put(struct rcu_head *head)
274 {
275         struct in_device *idev = container_of(head, struct in_device, rcu_head);
276         in_dev_put(idev);
277 }
278
279 static void inetdev_destroy(struct in_device *in_dev)
280 {
281         struct in_ifaddr *ifa;
282         struct net_device *dev;
283
284         ASSERT_RTNL();
285
286         dev = in_dev->dev;
287
288         in_dev->dead = 1;
289
290         ip_mc_destroy_dev(in_dev);
291
292         while ((ifa = in_dev->ifa_list) != NULL) {
293                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294                 inet_free_ifa(ifa);
295         }
296
297         RCU_INIT_POINTER(dev->ip_ptr, NULL);
298
299         devinet_sysctl_unregister(in_dev);
300         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301         arp_ifdown(dev);
302
303         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304 }
305
306 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307 {
308         rcu_read_lock();
309         for_primary_ifa(in_dev) {
310                 if (inet_ifa_match(a, ifa)) {
311                         if (!b || inet_ifa_match(b, ifa)) {
312                                 rcu_read_unlock();
313                                 return 1;
314                         }
315                 }
316         } endfor_ifa(in_dev);
317         rcu_read_unlock();
318         return 0;
319 }
320
321 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322                          int destroy, struct nlmsghdr *nlh, u32 portid)
323 {
324         struct in_ifaddr *promote = NULL;
325         struct in_ifaddr *ifa, *ifa1 = *ifap;
326         struct in_ifaddr *last_prim = in_dev->ifa_list;
327         struct in_ifaddr *prev_prom = NULL;
328         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329
330         ASSERT_RTNL();
331
332         /* 1. Deleting primary ifaddr forces deletion all secondaries
333          * unless alias promotion is set
334          **/
335
336         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338
339                 while ((ifa = *ifap1) != NULL) {
340                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341                             ifa1->ifa_scope <= ifa->ifa_scope)
342                                 last_prim = ifa;
343
344                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345                             ifa1->ifa_mask != ifa->ifa_mask ||
346                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
347                                 ifap1 = &ifa->ifa_next;
348                                 prev_prom = ifa;
349                                 continue;
350                         }
351
352                         if (!do_promote) {
353                                 inet_hash_remove(ifa);
354                                 *ifap1 = ifa->ifa_next;
355
356                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357                                 blocking_notifier_call_chain(&inetaddr_chain,
358                                                 NETDEV_DOWN, ifa);
359                                 inet_free_ifa(ifa);
360                         } else {
361                                 promote = ifa;
362                                 break;
363                         }
364                 }
365         }
366
367         /* On promotion all secondaries from subnet are changing
368          * the primary IP, we must remove all their routes silently
369          * and later to add them back with new prefsrc. Do this
370          * while all addresses are on the device list.
371          */
372         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373                 if (ifa1->ifa_mask == ifa->ifa_mask &&
374                     inet_ifa_match(ifa1->ifa_address, ifa))
375                         fib_del_ifaddr(ifa, ifa1);
376         }
377
378         /* 2. Unlink it */
379
380         *ifap = ifa1->ifa_next;
381         inet_hash_remove(ifa1);
382
383         /* 3. Announce address deletion */
384
385         /* Send message first, then call notifier.
386            At first sight, FIB update triggered by notifier
387            will refer to already deleted ifaddr, that could confuse
388            netlink listeners. It is not true: look, gated sees
389            that route deleted and if it still thinks that ifaddr
390            is valid, it will try to restore deleted routes... Grr.
391            So that, this order is correct.
392          */
393         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395
396         if (promote) {
397                 struct in_ifaddr *next_sec = promote->ifa_next;
398
399                 if (prev_prom) {
400                         prev_prom->ifa_next = promote->ifa_next;
401                         promote->ifa_next = last_prim->ifa_next;
402                         last_prim->ifa_next = promote;
403                 }
404
405                 promote->ifa_flags &= ~IFA_F_SECONDARY;
406                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407                 blocking_notifier_call_chain(&inetaddr_chain,
408                                 NETDEV_UP, promote);
409                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410                         if (ifa1->ifa_mask != ifa->ifa_mask ||
411                             !inet_ifa_match(ifa1->ifa_address, ifa))
412                                         continue;
413                         fib_add_ifaddr(ifa);
414                 }
415
416         }
417         if (destroy)
418                 inet_free_ifa(ifa1);
419 }
420
421 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422                          int destroy)
423 {
424         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425 }
426
427 static void check_lifetime(struct work_struct *work);
428
429 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430
431 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
432                              u32 portid)
433 {
434         struct in_device *in_dev = ifa->ifa_dev;
435         struct in_ifaddr *ifa1, **ifap, **last_primary;
436
437         ASSERT_RTNL();
438
439         if (!ifa->ifa_local) {
440                 inet_free_ifa(ifa);
441                 return 0;
442         }
443
444         ifa->ifa_flags &= ~IFA_F_SECONDARY;
445         last_primary = &in_dev->ifa_list;
446
447         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448              ifap = &ifa1->ifa_next) {
449                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450                     ifa->ifa_scope <= ifa1->ifa_scope)
451                         last_primary = &ifa1->ifa_next;
452                 if (ifa1->ifa_mask == ifa->ifa_mask &&
453                     inet_ifa_match(ifa1->ifa_address, ifa)) {
454                         if (ifa1->ifa_local == ifa->ifa_local) {
455                                 inet_free_ifa(ifa);
456                                 return -EEXIST;
457                         }
458                         if (ifa1->ifa_scope != ifa->ifa_scope) {
459                                 inet_free_ifa(ifa);
460                                 return -EINVAL;
461                         }
462                         ifa->ifa_flags |= IFA_F_SECONDARY;
463                 }
464         }
465
466         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467                 prandom_seed((__force u32) ifa->ifa_local);
468                 ifap = last_primary;
469         }
470
471         ifa->ifa_next = *ifap;
472         *ifap = ifa;
473
474         inet_hash_insert(dev_net(in_dev->dev), ifa);
475
476         cancel_delayed_work(&check_lifetime_work);
477         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
478
479         /* Send message first, then call notifier.
480            Notifier will trigger FIB update, so that
481            listeners of netlink will know about new ifaddr */
482         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
484
485         return 0;
486 }
487
488 static int inet_insert_ifa(struct in_ifaddr *ifa)
489 {
490         return __inet_insert_ifa(ifa, NULL, 0);
491 }
492
493 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494 {
495         struct in_device *in_dev = __in_dev_get_rtnl(dev);
496
497         ASSERT_RTNL();
498
499         if (!in_dev) {
500                 inet_free_ifa(ifa);
501                 return -ENOBUFS;
502         }
503         ipv4_devconf_setall(in_dev);
504         neigh_parms_data_state_setall(in_dev->arp_parms);
505         if (ifa->ifa_dev != in_dev) {
506                 WARN_ON(ifa->ifa_dev);
507                 in_dev_hold(in_dev);
508                 ifa->ifa_dev = in_dev;
509         }
510         if (ipv4_is_loopback(ifa->ifa_local))
511                 ifa->ifa_scope = RT_SCOPE_HOST;
512         return inet_insert_ifa(ifa);
513 }
514
515 /* Caller must hold RCU or RTNL :
516  * We dont take a reference on found in_device
517  */
518 struct in_device *inetdev_by_index(struct net *net, int ifindex)
519 {
520         struct net_device *dev;
521         struct in_device *in_dev = NULL;
522
523         rcu_read_lock();
524         dev = dev_get_by_index_rcu(net, ifindex);
525         if (dev)
526                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
527         rcu_read_unlock();
528         return in_dev;
529 }
530 EXPORT_SYMBOL(inetdev_by_index);
531
532 /* Called only from RTNL semaphored context. No locks. */
533
534 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
535                                     __be32 mask)
536 {
537         ASSERT_RTNL();
538
539         for_primary_ifa(in_dev) {
540                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
541                         return ifa;
542         } endfor_ifa(in_dev);
543         return NULL;
544 }
545
546 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
547 {
548         struct net *net = sock_net(skb->sk);
549         struct nlattr *tb[IFA_MAX+1];
550         struct in_device *in_dev;
551         struct ifaddrmsg *ifm;
552         struct in_ifaddr *ifa, **ifap;
553         int err = -EINVAL;
554
555         ASSERT_RTNL();
556
557         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
558         if (err < 0)
559                 goto errout;
560
561         ifm = nlmsg_data(nlh);
562         in_dev = inetdev_by_index(net, ifm->ifa_index);
563         if (in_dev == NULL) {
564                 err = -ENODEV;
565                 goto errout;
566         }
567
568         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569              ifap = &ifa->ifa_next) {
570                 if (tb[IFA_LOCAL] &&
571                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572                         continue;
573
574                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575                         continue;
576
577                 if (tb[IFA_ADDRESS] &&
578                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580                         continue;
581
582                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
583                 return 0;
584         }
585
586         err = -EADDRNOTAVAIL;
587 errout:
588         return err;
589 }
590
591 #define INFINITY_LIFE_TIME      0xFFFFFFFF
592
593 static void check_lifetime(struct work_struct *work)
594 {
595         unsigned long now, next, next_sec, next_sched;
596         struct in_ifaddr *ifa;
597         struct hlist_node *n;
598         int i;
599
600         now = jiffies;
601         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
602
603         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604                 bool change_needed = false;
605
606                 rcu_read_lock();
607                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608                         unsigned long age;
609
610                         if (ifa->ifa_flags & IFA_F_PERMANENT)
611                                 continue;
612
613                         /* We try to batch several events at once. */
614                         age = (now - ifa->ifa_tstamp +
615                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
616
617                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618                             age >= ifa->ifa_valid_lft) {
619                                 change_needed = true;
620                         } else if (ifa->ifa_preferred_lft ==
621                                    INFINITY_LIFE_TIME) {
622                                 continue;
623                         } else if (age >= ifa->ifa_preferred_lft) {
624                                 if (time_before(ifa->ifa_tstamp +
625                                                 ifa->ifa_valid_lft * HZ, next))
626                                         next = ifa->ifa_tstamp +
627                                                ifa->ifa_valid_lft * HZ;
628
629                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630                                         change_needed = true;
631                         } else if (time_before(ifa->ifa_tstamp +
632                                                ifa->ifa_preferred_lft * HZ,
633                                                next)) {
634                                 next = ifa->ifa_tstamp +
635                                        ifa->ifa_preferred_lft * HZ;
636                         }
637                 }
638                 rcu_read_unlock();
639                 if (!change_needed)
640                         continue;
641                 rtnl_lock();
642                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643                         unsigned long age;
644
645                         if (ifa->ifa_flags & IFA_F_PERMANENT)
646                                 continue;
647
648                         /* We try to batch several events at once. */
649                         age = (now - ifa->ifa_tstamp +
650                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
651
652                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653                             age >= ifa->ifa_valid_lft) {
654                                 struct in_ifaddr **ifap;
655
656                                 for (ifap = &ifa->ifa_dev->ifa_list;
657                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
658                                         if (*ifap == ifa) {
659                                                 inet_del_ifa(ifa->ifa_dev,
660                                                              ifap, 1);
661                                                 break;
662                                         }
663                                 }
664                         } else if (ifa->ifa_preferred_lft !=
665                                    INFINITY_LIFE_TIME &&
666                                    age >= ifa->ifa_preferred_lft &&
667                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
669                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
670                         }
671                 }
672                 rtnl_unlock();
673         }
674
675         next_sec = round_jiffies_up(next);
676         next_sched = next;
677
678         /* If rounded timeout is accurate enough, accept it. */
679         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680                 next_sched = next_sec;
681
682         now = jiffies;
683         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
686
687         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
688                         next_sched - now);
689 }
690
691 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
692                              __u32 prefered_lft)
693 {
694         unsigned long timeout;
695
696         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
697
698         timeout = addrconf_timeout_fixup(valid_lft, HZ);
699         if (addrconf_finite_timeout(timeout))
700                 ifa->ifa_valid_lft = timeout;
701         else
702                 ifa->ifa_flags |= IFA_F_PERMANENT;
703
704         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
705         if (addrconf_finite_timeout(timeout)) {
706                 if (timeout == 0)
707                         ifa->ifa_flags |= IFA_F_DEPRECATED;
708                 ifa->ifa_preferred_lft = timeout;
709         }
710         ifa->ifa_tstamp = jiffies;
711         if (!ifa->ifa_cstamp)
712                 ifa->ifa_cstamp = ifa->ifa_tstamp;
713 }
714
715 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
716                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
717 {
718         struct nlattr *tb[IFA_MAX+1];
719         struct in_ifaddr *ifa;
720         struct ifaddrmsg *ifm;
721         struct net_device *dev;
722         struct in_device *in_dev;
723         int err;
724
725         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
726         if (err < 0)
727                 goto errout;
728
729         ifm = nlmsg_data(nlh);
730         err = -EINVAL;
731         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
732                 goto errout;
733
734         dev = __dev_get_by_index(net, ifm->ifa_index);
735         err = -ENODEV;
736         if (dev == NULL)
737                 goto errout;
738
739         in_dev = __in_dev_get_rtnl(dev);
740         err = -ENOBUFS;
741         if (in_dev == NULL)
742                 goto errout;
743
744         ifa = inet_alloc_ifa();
745         if (ifa == NULL)
746                 /*
747                  * A potential indev allocation can be left alive, it stays
748                  * assigned to its device and is destroy with it.
749                  */
750                 goto errout;
751
752         ipv4_devconf_setall(in_dev);
753         neigh_parms_data_state_setall(in_dev->arp_parms);
754         in_dev_hold(in_dev);
755
756         if (tb[IFA_ADDRESS] == NULL)
757                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
758
759         INIT_HLIST_NODE(&ifa->hash);
760         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
761         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
762         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
763                                          ifm->ifa_flags;
764         ifa->ifa_scope = ifm->ifa_scope;
765         ifa->ifa_dev = in_dev;
766
767         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
768         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
769
770         if (tb[IFA_BROADCAST])
771                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
772
773         if (tb[IFA_LABEL])
774                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
775         else
776                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
777
778         if (tb[IFA_CACHEINFO]) {
779                 struct ifa_cacheinfo *ci;
780
781                 ci = nla_data(tb[IFA_CACHEINFO]);
782                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
783                         err = -EINVAL;
784                         goto errout_free;
785                 }
786                 *pvalid_lft = ci->ifa_valid;
787                 *pprefered_lft = ci->ifa_prefered;
788         }
789
790         return ifa;
791
792 errout_free:
793         inet_free_ifa(ifa);
794 errout:
795         return ERR_PTR(err);
796 }
797
798 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
799 {
800         struct in_device *in_dev = ifa->ifa_dev;
801         struct in_ifaddr *ifa1, **ifap;
802
803         if (!ifa->ifa_local)
804                 return NULL;
805
806         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
807              ifap = &ifa1->ifa_next) {
808                 if (ifa1->ifa_mask == ifa->ifa_mask &&
809                     inet_ifa_match(ifa1->ifa_address, ifa) &&
810                     ifa1->ifa_local == ifa->ifa_local)
811                         return ifa1;
812         }
813         return NULL;
814 }
815
816 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
817 {
818         struct net *net = sock_net(skb->sk);
819         struct in_ifaddr *ifa;
820         struct in_ifaddr *ifa_existing;
821         __u32 valid_lft = INFINITY_LIFE_TIME;
822         __u32 prefered_lft = INFINITY_LIFE_TIME;
823
824         ASSERT_RTNL();
825
826         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
827         if (IS_ERR(ifa))
828                 return PTR_ERR(ifa);
829
830         ifa_existing = find_matching_ifa(ifa);
831         if (!ifa_existing) {
832                 /* It would be best to check for !NLM_F_CREATE here but
833                  * userspace alreay relies on not having to provide this.
834                  */
835                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
836                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
837         } else {
838                 inet_free_ifa(ifa);
839
840                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
841                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
842                         return -EEXIST;
843                 ifa = ifa_existing;
844                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
845                 cancel_delayed_work(&check_lifetime_work);
846                 queue_delayed_work(system_power_efficient_wq,
847                                 &check_lifetime_work, 0);
848                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
849                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
850         }
851         return 0;
852 }
853
854 /*
855  *      Determine a default network mask, based on the IP address.
856  */
857
858 static int inet_abc_len(__be32 addr)
859 {
860         int rc = -1;    /* Something else, probably a multicast. */
861
862         if (ipv4_is_zeronet(addr))
863                 rc = 0;
864         else {
865                 __u32 haddr = ntohl(addr);
866
867                 if (IN_CLASSA(haddr))
868                         rc = 8;
869                 else if (IN_CLASSB(haddr))
870                         rc = 16;
871                 else if (IN_CLASSC(haddr))
872                         rc = 24;
873         }
874
875         return rc;
876 }
877
878
879 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
880 {
881         struct ifreq ifr;
882         struct sockaddr_in sin_orig;
883         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
884         struct in_device *in_dev;
885         struct in_ifaddr **ifap = NULL;
886         struct in_ifaddr *ifa = NULL;
887         struct net_device *dev;
888         char *colon;
889         int ret = -EFAULT;
890         int tryaddrmatch = 0;
891
892         /*
893          *      Fetch the caller's info block into kernel space
894          */
895
896         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
897                 goto out;
898         ifr.ifr_name[IFNAMSIZ - 1] = 0;
899
900         /* save original address for comparison */
901         memcpy(&sin_orig, sin, sizeof(*sin));
902
903         colon = strchr(ifr.ifr_name, ':');
904         if (colon)
905                 *colon = 0;
906
907         dev_load(net, ifr.ifr_name);
908
909         switch (cmd) {
910         case SIOCGIFADDR:       /* Get interface address */
911         case SIOCGIFBRDADDR:    /* Get the broadcast address */
912         case SIOCGIFDSTADDR:    /* Get the destination address */
913         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
914                 /* Note that these ioctls will not sleep,
915                    so that we do not impose a lock.
916                    One day we will be forced to put shlock here (I mean SMP)
917                  */
918                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
919                 memset(sin, 0, sizeof(*sin));
920                 sin->sin_family = AF_INET;
921                 break;
922
923         case SIOCSIFFLAGS:
924                 ret = -EPERM;
925                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
926                         goto out;
927                 break;
928         case SIOCSIFADDR:       /* Set interface address (and family) */
929         case SIOCSIFBRDADDR:    /* Set the broadcast address */
930         case SIOCSIFDSTADDR:    /* Set the destination address */
931         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
932                 ret = -EPERM;
933                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
934                         goto out;
935                 ret = -EINVAL;
936                 if (sin->sin_family != AF_INET)
937                         goto out;
938                 break;
939         default:
940                 ret = -EINVAL;
941                 goto out;
942         }
943
944         rtnl_lock();
945
946         ret = -ENODEV;
947         dev = __dev_get_by_name(net, ifr.ifr_name);
948         if (!dev)
949                 goto done;
950
951         if (colon)
952                 *colon = ':';
953
954         in_dev = __in_dev_get_rtnl(dev);
955         if (in_dev) {
956                 if (tryaddrmatch) {
957                         /* Matthias Andree */
958                         /* compare label and address (4.4BSD style) */
959                         /* note: we only do this for a limited set of ioctls
960                            and only if the original address family was AF_INET.
961                            This is checked above. */
962                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
963                              ifap = &ifa->ifa_next) {
964                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
965                                     sin_orig.sin_addr.s_addr ==
966                                                         ifa->ifa_local) {
967                                         break; /* found */
968                                 }
969                         }
970                 }
971                 /* we didn't get a match, maybe the application is
972                    4.3BSD-style and passed in junk so we fall back to
973                    comparing just the label */
974                 if (!ifa) {
975                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
976                              ifap = &ifa->ifa_next)
977                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
978                                         break;
979                 }
980         }
981
982         ret = -EADDRNOTAVAIL;
983         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
984                 goto done;
985
986         switch (cmd) {
987         case SIOCGIFADDR:       /* Get interface address */
988                 sin->sin_addr.s_addr = ifa->ifa_local;
989                 goto rarok;
990
991         case SIOCGIFBRDADDR:    /* Get the broadcast address */
992                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
993                 goto rarok;
994
995         case SIOCGIFDSTADDR:    /* Get the destination address */
996                 sin->sin_addr.s_addr = ifa->ifa_address;
997                 goto rarok;
998
999         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
1000                 sin->sin_addr.s_addr = ifa->ifa_mask;
1001                 goto rarok;
1002
1003         case SIOCSIFFLAGS:
1004                 if (colon) {
1005                         ret = -EADDRNOTAVAIL;
1006                         if (!ifa)
1007                                 break;
1008                         ret = 0;
1009                         if (!(ifr.ifr_flags & IFF_UP))
1010                                 inet_del_ifa(in_dev, ifap, 1);
1011                         break;
1012                 }
1013                 ret = dev_change_flags(dev, ifr.ifr_flags);
1014                 break;
1015
1016         case SIOCSIFADDR:       /* Set interface address (and family) */
1017                 ret = -EINVAL;
1018                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1019                         break;
1020
1021                 if (!ifa) {
1022                         ret = -ENOBUFS;
1023                         ifa = inet_alloc_ifa();
1024                         if (!ifa)
1025                                 break;
1026                         INIT_HLIST_NODE(&ifa->hash);
1027                         if (colon)
1028                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1029                         else
1030                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1031                 } else {
1032                         ret = 0;
1033                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1034                                 break;
1035                         inet_del_ifa(in_dev, ifap, 0);
1036                         ifa->ifa_broadcast = 0;
1037                         ifa->ifa_scope = 0;
1038                 }
1039
1040                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1041
1042                 if (!(dev->flags & IFF_POINTOPOINT)) {
1043                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1044                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1045                         if ((dev->flags & IFF_BROADCAST) &&
1046                             ifa->ifa_prefixlen < 31)
1047                                 ifa->ifa_broadcast = ifa->ifa_address |
1048                                                      ~ifa->ifa_mask;
1049                 } else {
1050                         ifa->ifa_prefixlen = 32;
1051                         ifa->ifa_mask = inet_make_mask(32);
1052                 }
1053                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1054                 ret = inet_set_ifa(dev, ifa);
1055                 break;
1056
1057         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1058                 ret = 0;
1059                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1060                         inet_del_ifa(in_dev, ifap, 0);
1061                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1062                         inet_insert_ifa(ifa);
1063                 }
1064                 break;
1065
1066         case SIOCSIFDSTADDR:    /* Set the destination address */
1067                 ret = 0;
1068                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1069                         break;
1070                 ret = -EINVAL;
1071                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1072                         break;
1073                 ret = 0;
1074                 inet_del_ifa(in_dev, ifap, 0);
1075                 ifa->ifa_address = sin->sin_addr.s_addr;
1076                 inet_insert_ifa(ifa);
1077                 break;
1078
1079         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1080
1081                 /*
1082                  *      The mask we set must be legal.
1083                  */
1084                 ret = -EINVAL;
1085                 if (bad_mask(sin->sin_addr.s_addr, 0))
1086                         break;
1087                 ret = 0;
1088                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1089                         __be32 old_mask = ifa->ifa_mask;
1090                         inet_del_ifa(in_dev, ifap, 0);
1091                         ifa->ifa_mask = sin->sin_addr.s_addr;
1092                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1093
1094                         /* See if current broadcast address matches
1095                          * with current netmask, then recalculate
1096                          * the broadcast address. Otherwise it's a
1097                          * funny address, so don't touch it since
1098                          * the user seems to know what (s)he's doing...
1099                          */
1100                         if ((dev->flags & IFF_BROADCAST) &&
1101                             (ifa->ifa_prefixlen < 31) &&
1102                             (ifa->ifa_broadcast ==
1103                              (ifa->ifa_local|~old_mask))) {
1104                                 ifa->ifa_broadcast = (ifa->ifa_local |
1105                                                       ~sin->sin_addr.s_addr);
1106                         }
1107                         inet_insert_ifa(ifa);
1108                 }
1109                 break;
1110         }
1111 done:
1112         rtnl_unlock();
1113 out:
1114         return ret;
1115 rarok:
1116         rtnl_unlock();
1117         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1118         goto out;
1119 }
1120
1121 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1122 {
1123         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1124         struct in_ifaddr *ifa;
1125         struct ifreq ifr;
1126         int done = 0;
1127
1128         if (!in_dev)
1129                 goto out;
1130
1131         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1132                 if (!buf) {
1133                         done += sizeof(ifr);
1134                         continue;
1135                 }
1136                 if (len < (int) sizeof(ifr))
1137                         break;
1138                 memset(&ifr, 0, sizeof(struct ifreq));
1139                 strcpy(ifr.ifr_name, ifa->ifa_label);
1140
1141                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1142                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1143                                                                 ifa->ifa_local;
1144
1145                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1146                         done = -EFAULT;
1147                         break;
1148                 }
1149                 buf  += sizeof(struct ifreq);
1150                 len  -= sizeof(struct ifreq);
1151                 done += sizeof(struct ifreq);
1152         }
1153 out:
1154         return done;
1155 }
1156
1157 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1158 {
1159         __be32 addr = 0;
1160         struct in_device *in_dev;
1161         struct net *net = dev_net(dev);
1162
1163         rcu_read_lock();
1164         in_dev = __in_dev_get_rcu(dev);
1165         if (!in_dev)
1166                 goto no_in_dev;
1167
1168         for_primary_ifa(in_dev) {
1169                 if (ifa->ifa_scope > scope)
1170                         continue;
1171                 if (!dst || inet_ifa_match(dst, ifa)) {
1172                         addr = ifa->ifa_local;
1173                         break;
1174                 }
1175                 if (!addr)
1176                         addr = ifa->ifa_local;
1177         } endfor_ifa(in_dev);
1178
1179         if (addr)
1180                 goto out_unlock;
1181 no_in_dev:
1182
1183         /* Not loopback addresses on loopback should be preferred
1184            in this case. It is importnat that lo is the first interface
1185            in dev_base list.
1186          */
1187         for_each_netdev_rcu(net, dev) {
1188                 in_dev = __in_dev_get_rcu(dev);
1189                 if (!in_dev)
1190                         continue;
1191
1192                 for_primary_ifa(in_dev) {
1193                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1194                             ifa->ifa_scope <= scope) {
1195                                 addr = ifa->ifa_local;
1196                                 goto out_unlock;
1197                         }
1198                 } endfor_ifa(in_dev);
1199         }
1200 out_unlock:
1201         rcu_read_unlock();
1202         return addr;
1203 }
1204 EXPORT_SYMBOL(inet_select_addr);
1205
1206 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1207                               __be32 local, int scope)
1208 {
1209         int same = 0;
1210         __be32 addr = 0;
1211
1212         for_ifa(in_dev) {
1213                 if (!addr &&
1214                     (local == ifa->ifa_local || !local) &&
1215                     ifa->ifa_scope <= scope) {
1216                         addr = ifa->ifa_local;
1217                         if (same)
1218                                 break;
1219                 }
1220                 if (!same) {
1221                         same = (!local || inet_ifa_match(local, ifa)) &&
1222                                 (!dst || inet_ifa_match(dst, ifa));
1223                         if (same && addr) {
1224                                 if (local || !dst)
1225                                         break;
1226                                 /* Is the selected addr into dst subnet? */
1227                                 if (inet_ifa_match(addr, ifa))
1228                                         break;
1229                                 /* No, then can we use new local src? */
1230                                 if (ifa->ifa_scope <= scope) {
1231                                         addr = ifa->ifa_local;
1232                                         break;
1233                                 }
1234                                 /* search for large dst subnet for addr */
1235                                 same = 0;
1236                         }
1237                 }
1238         } endfor_ifa(in_dev);
1239
1240         return same ? addr : 0;
1241 }
1242
1243 /*
1244  * Confirm that local IP address exists using wildcards:
1245  * - net: netns to check, cannot be NULL
1246  * - in_dev: only on this interface, NULL=any interface
1247  * - dst: only in the same subnet as dst, 0=any dst
1248  * - local: address, 0=autoselect the local address
1249  * - scope: maximum allowed scope value for the local address
1250  */
1251 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1252                          __be32 dst, __be32 local, int scope)
1253 {
1254         __be32 addr = 0;
1255         struct net_device *dev;
1256
1257         if (in_dev != NULL)
1258                 return confirm_addr_indev(in_dev, dst, local, scope);
1259
1260         rcu_read_lock();
1261         for_each_netdev_rcu(net, dev) {
1262                 in_dev = __in_dev_get_rcu(dev);
1263                 if (in_dev) {
1264                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1265                         if (addr)
1266                                 break;
1267                 }
1268         }
1269         rcu_read_unlock();
1270
1271         return addr;
1272 }
1273 EXPORT_SYMBOL(inet_confirm_addr);
1274
1275 /*
1276  *      Device notifier
1277  */
1278
1279 int register_inetaddr_notifier(struct notifier_block *nb)
1280 {
1281         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1282 }
1283 EXPORT_SYMBOL(register_inetaddr_notifier);
1284
1285 int unregister_inetaddr_notifier(struct notifier_block *nb)
1286 {
1287         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1288 }
1289 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1290
1291 /* Rename ifa_labels for a device name change. Make some effort to preserve
1292  * existing alias numbering and to create unique labels if possible.
1293 */
1294 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1295 {
1296         struct in_ifaddr *ifa;
1297         int named = 0;
1298
1299         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1300                 char old[IFNAMSIZ], *dot;
1301
1302                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1303                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1304                 if (named++ == 0)
1305                         goto skip;
1306                 dot = strchr(old, ':');
1307                 if (dot == NULL) {
1308                         sprintf(old, ":%d", named);
1309                         dot = old;
1310                 }
1311                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1312                         strcat(ifa->ifa_label, dot);
1313                 else
1314                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1315 skip:
1316                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1317         }
1318 }
1319
1320 static bool inetdev_valid_mtu(unsigned int mtu)
1321 {
1322         return mtu >= 68;
1323 }
1324
1325 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1326                                         struct in_device *in_dev)
1327
1328 {
1329         struct in_ifaddr *ifa;
1330
1331         for (ifa = in_dev->ifa_list; ifa;
1332              ifa = ifa->ifa_next) {
1333                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1334                          ifa->ifa_local, dev,
1335                          ifa->ifa_local, NULL,
1336                          dev->dev_addr, NULL);
1337         }
1338 }
1339
1340 /* Called only under RTNL semaphore */
1341
1342 static int inetdev_event(struct notifier_block *this, unsigned long event,
1343                          void *ptr)
1344 {
1345         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1346         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1347
1348         ASSERT_RTNL();
1349
1350         if (!in_dev) {
1351                 if (event == NETDEV_REGISTER) {
1352                         in_dev = inetdev_init(dev);
1353                         if (!in_dev)
1354                                 return notifier_from_errno(-ENOMEM);
1355                         if (dev->flags & IFF_LOOPBACK) {
1356                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1357                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1358                         }
1359                 } else if (event == NETDEV_CHANGEMTU) {
1360                         /* Re-enabling IP */
1361                         if (inetdev_valid_mtu(dev->mtu))
1362                                 in_dev = inetdev_init(dev);
1363                 }
1364                 goto out;
1365         }
1366
1367         switch (event) {
1368         case NETDEV_REGISTER:
1369                 pr_debug("%s: bug\n", __func__);
1370                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1371                 break;
1372         case NETDEV_UP:
1373                 if (!inetdev_valid_mtu(dev->mtu))
1374                         break;
1375                 if (dev->flags & IFF_LOOPBACK) {
1376                         struct in_ifaddr *ifa = inet_alloc_ifa();
1377
1378                         if (ifa) {
1379                                 INIT_HLIST_NODE(&ifa->hash);
1380                                 ifa->ifa_local =
1381                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1382                                 ifa->ifa_prefixlen = 8;
1383                                 ifa->ifa_mask = inet_make_mask(8);
1384                                 in_dev_hold(in_dev);
1385                                 ifa->ifa_dev = in_dev;
1386                                 ifa->ifa_scope = RT_SCOPE_HOST;
1387                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1388                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1389                                                  INFINITY_LIFE_TIME);
1390                                 ipv4_devconf_setall(in_dev);
1391                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1392                                 inet_insert_ifa(ifa);
1393                         }
1394                 }
1395                 ip_mc_up(in_dev);
1396                 /* fall through */
1397         case NETDEV_CHANGEADDR:
1398                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1399                         break;
1400                 /* fall through */
1401         case NETDEV_NOTIFY_PEERS:
1402                 /* Send gratuitous ARP to notify of link change */
1403                 inetdev_send_gratuitous_arp(dev, in_dev);
1404                 break;
1405         case NETDEV_DOWN:
1406                 ip_mc_down(in_dev);
1407                 break;
1408         case NETDEV_PRE_TYPE_CHANGE:
1409                 ip_mc_unmap(in_dev);
1410                 break;
1411         case NETDEV_POST_TYPE_CHANGE:
1412                 ip_mc_remap(in_dev);
1413                 break;
1414         case NETDEV_CHANGEMTU:
1415                 if (inetdev_valid_mtu(dev->mtu))
1416                         break;
1417                 /* disable IP when MTU is not enough */
1418         case NETDEV_UNREGISTER:
1419                 inetdev_destroy(in_dev);
1420                 break;
1421         case NETDEV_CHANGENAME:
1422                 /* Do not notify about label change, this event is
1423                  * not interesting to applications using netlink.
1424                  */
1425                 inetdev_changename(dev, in_dev);
1426
1427                 devinet_sysctl_unregister(in_dev);
1428                 devinet_sysctl_register(in_dev);
1429                 break;
1430         }
1431 out:
1432         return NOTIFY_DONE;
1433 }
1434
1435 static struct notifier_block ip_netdev_notifier = {
1436         .notifier_call = inetdev_event,
1437 };
1438
1439 static size_t inet_nlmsg_size(void)
1440 {
1441         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1442                + nla_total_size(4) /* IFA_ADDRESS */
1443                + nla_total_size(4) /* IFA_LOCAL */
1444                + nla_total_size(4) /* IFA_BROADCAST */
1445                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1446                + nla_total_size(4)  /* IFA_FLAGS */
1447                + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1448 }
1449
1450 static inline u32 cstamp_delta(unsigned long cstamp)
1451 {
1452         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1453 }
1454
1455 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1456                          unsigned long tstamp, u32 preferred, u32 valid)
1457 {
1458         struct ifa_cacheinfo ci;
1459
1460         ci.cstamp = cstamp_delta(cstamp);
1461         ci.tstamp = cstamp_delta(tstamp);
1462         ci.ifa_prefered = preferred;
1463         ci.ifa_valid = valid;
1464
1465         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1466 }
1467
1468 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1469                             u32 portid, u32 seq, int event, unsigned int flags)
1470 {
1471         struct ifaddrmsg *ifm;
1472         struct nlmsghdr  *nlh;
1473         u32 preferred, valid;
1474
1475         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1476         if (nlh == NULL)
1477                 return -EMSGSIZE;
1478
1479         ifm = nlmsg_data(nlh);
1480         ifm->ifa_family = AF_INET;
1481         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1482         ifm->ifa_flags = ifa->ifa_flags;
1483         ifm->ifa_scope = ifa->ifa_scope;
1484         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1485
1486         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1487                 preferred = ifa->ifa_preferred_lft;
1488                 valid = ifa->ifa_valid_lft;
1489                 if (preferred != INFINITY_LIFE_TIME) {
1490                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1491
1492                         if (preferred > tval)
1493                                 preferred -= tval;
1494                         else
1495                                 preferred = 0;
1496                         if (valid != INFINITY_LIFE_TIME) {
1497                                 if (valid > tval)
1498                                         valid -= tval;
1499                                 else
1500                                         valid = 0;
1501                         }
1502                 }
1503         } else {
1504                 preferred = INFINITY_LIFE_TIME;
1505                 valid = INFINITY_LIFE_TIME;
1506         }
1507         if ((ifa->ifa_address &&
1508              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1509             (ifa->ifa_local &&
1510              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1511             (ifa->ifa_broadcast &&
1512              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1513             (ifa->ifa_label[0] &&
1514              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1515             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1516             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1517                           preferred, valid))
1518                 goto nla_put_failure;
1519
1520         return nlmsg_end(skb, nlh);
1521
1522 nla_put_failure:
1523         nlmsg_cancel(skb, nlh);
1524         return -EMSGSIZE;
1525 }
1526
1527 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1528 {
1529         struct net *net = sock_net(skb->sk);
1530         int h, s_h;
1531         int idx, s_idx;
1532         int ip_idx, s_ip_idx;
1533         struct net_device *dev;
1534         struct in_device *in_dev;
1535         struct in_ifaddr *ifa;
1536         struct hlist_head *head;
1537
1538         s_h = cb->args[0];
1539         s_idx = idx = cb->args[1];
1540         s_ip_idx = ip_idx = cb->args[2];
1541
1542         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1543                 idx = 0;
1544                 head = &net->dev_index_head[h];
1545                 rcu_read_lock();
1546                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1547                           net->dev_base_seq;
1548                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1549                         if (idx < s_idx)
1550                                 goto cont;
1551                         if (h > s_h || idx > s_idx)
1552                                 s_ip_idx = 0;
1553                         in_dev = __in_dev_get_rcu(dev);
1554                         if (!in_dev)
1555                                 goto cont;
1556
1557                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1558                              ifa = ifa->ifa_next, ip_idx++) {
1559                                 if (ip_idx < s_ip_idx)
1560                                         continue;
1561                                 if (inet_fill_ifaddr(skb, ifa,
1562                                              NETLINK_CB(cb->skb).portid,
1563                                              cb->nlh->nlmsg_seq,
1564                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1565                                         rcu_read_unlock();
1566                                         goto done;
1567                                 }
1568                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1569                         }
1570 cont:
1571                         idx++;
1572                 }
1573                 rcu_read_unlock();
1574         }
1575
1576 done:
1577         cb->args[0] = h;
1578         cb->args[1] = idx;
1579         cb->args[2] = ip_idx;
1580
1581         return skb->len;
1582 }
1583
1584 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1585                       u32 portid)
1586 {
1587         struct sk_buff *skb;
1588         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1589         int err = -ENOBUFS;
1590         struct net *net;
1591
1592         net = dev_net(ifa->ifa_dev->dev);
1593         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1594         if (skb == NULL)
1595                 goto errout;
1596
1597         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1598         if (err < 0) {
1599                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1600                 WARN_ON(err == -EMSGSIZE);
1601                 kfree_skb(skb);
1602                 goto errout;
1603         }
1604         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1605         return;
1606 errout:
1607         if (err < 0)
1608                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1609 }
1610
1611 static size_t inet_get_link_af_size(const struct net_device *dev)
1612 {
1613         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1614
1615         if (!in_dev)
1616                 return 0;
1617
1618         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1619 }
1620
1621 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1622 {
1623         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1624         struct nlattr *nla;
1625         int i;
1626
1627         if (!in_dev)
1628                 return -ENODATA;
1629
1630         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1631         if (nla == NULL)
1632                 return -EMSGSIZE;
1633
1634         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1635                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1636
1637         return 0;
1638 }
1639
1640 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1641         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1642 };
1643
1644 static int inet_validate_link_af(const struct net_device *dev,
1645                                  const struct nlattr *nla)
1646 {
1647         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1648         int err, rem;
1649
1650         if (dev && !__in_dev_get_rtnl(dev))
1651                 return -EAFNOSUPPORT;
1652
1653         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1654         if (err < 0)
1655                 return err;
1656
1657         if (tb[IFLA_INET_CONF]) {
1658                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1659                         int cfgid = nla_type(a);
1660
1661                         if (nla_len(a) < 4)
1662                                 return -EINVAL;
1663
1664                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1665                                 return -EINVAL;
1666                 }
1667         }
1668
1669         return 0;
1670 }
1671
1672 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1673 {
1674         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1675         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1676         int rem;
1677
1678         if (!in_dev)
1679                 return -EAFNOSUPPORT;
1680
1681         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1682                 BUG();
1683
1684         if (tb[IFLA_INET_CONF]) {
1685                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1686                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1687         }
1688
1689         return 0;
1690 }
1691
1692 static int inet_netconf_msgsize_devconf(int type)
1693 {
1694         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1695                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1696
1697         /* type -1 is used for ALL */
1698         if (type == -1 || type == NETCONFA_FORWARDING)
1699                 size += nla_total_size(4);
1700         if (type == -1 || type == NETCONFA_RP_FILTER)
1701                 size += nla_total_size(4);
1702         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1703                 size += nla_total_size(4);
1704         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1705                 size += nla_total_size(4);
1706
1707         return size;
1708 }
1709
1710 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1711                                      struct ipv4_devconf *devconf, u32 portid,
1712                                      u32 seq, int event, unsigned int flags,
1713                                      int type)
1714 {
1715         struct nlmsghdr  *nlh;
1716         struct netconfmsg *ncm;
1717
1718         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1719                         flags);
1720         if (nlh == NULL)
1721                 return -EMSGSIZE;
1722
1723         ncm = nlmsg_data(nlh);
1724         ncm->ncm_family = AF_INET;
1725
1726         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1727                 goto nla_put_failure;
1728
1729         /* type -1 is used for ALL */
1730         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1731             nla_put_s32(skb, NETCONFA_FORWARDING,
1732                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1733                 goto nla_put_failure;
1734         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1735             nla_put_s32(skb, NETCONFA_RP_FILTER,
1736                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1737                 goto nla_put_failure;
1738         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1739             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1740                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1741                 goto nla_put_failure;
1742         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1743             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1744                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1745                 goto nla_put_failure;
1746
1747         return nlmsg_end(skb, nlh);
1748
1749 nla_put_failure:
1750         nlmsg_cancel(skb, nlh);
1751         return -EMSGSIZE;
1752 }
1753
1754 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1755                                  struct ipv4_devconf *devconf)
1756 {
1757         struct sk_buff *skb;
1758         int err = -ENOBUFS;
1759
1760         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1761         if (skb == NULL)
1762                 goto errout;
1763
1764         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1765                                         RTM_NEWNETCONF, 0, type);
1766         if (err < 0) {
1767                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1768                 WARN_ON(err == -EMSGSIZE);
1769                 kfree_skb(skb);
1770                 goto errout;
1771         }
1772         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1773         return;
1774 errout:
1775         if (err < 0)
1776                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1777 }
1778
1779 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1780         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1781         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1782         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1783         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1784 };
1785
1786 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1787                                     struct nlmsghdr *nlh)
1788 {
1789         struct net *net = sock_net(in_skb->sk);
1790         struct nlattr *tb[NETCONFA_MAX+1];
1791         struct netconfmsg *ncm;
1792         struct sk_buff *skb;
1793         struct ipv4_devconf *devconf;
1794         struct in_device *in_dev;
1795         struct net_device *dev;
1796         int ifindex;
1797         int err;
1798
1799         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1800                           devconf_ipv4_policy);
1801         if (err < 0)
1802                 goto errout;
1803
1804         err = EINVAL;
1805         if (!tb[NETCONFA_IFINDEX])
1806                 goto errout;
1807
1808         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1809         switch (ifindex) {
1810         case NETCONFA_IFINDEX_ALL:
1811                 devconf = net->ipv4.devconf_all;
1812                 break;
1813         case NETCONFA_IFINDEX_DEFAULT:
1814                 devconf = net->ipv4.devconf_dflt;
1815                 break;
1816         default:
1817                 dev = __dev_get_by_index(net, ifindex);
1818                 if (dev == NULL)
1819                         goto errout;
1820                 in_dev = __in_dev_get_rtnl(dev);
1821                 if (in_dev == NULL)
1822                         goto errout;
1823                 devconf = &in_dev->cnf;
1824                 break;
1825         }
1826
1827         err = -ENOBUFS;
1828         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1829         if (skb == NULL)
1830                 goto errout;
1831
1832         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1833                                         NETLINK_CB(in_skb).portid,
1834                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1835                                         -1);
1836         if (err < 0) {
1837                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1838                 WARN_ON(err == -EMSGSIZE);
1839                 kfree_skb(skb);
1840                 goto errout;
1841         }
1842         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1843 errout:
1844         return err;
1845 }
1846
1847 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1848                                      struct netlink_callback *cb)
1849 {
1850         struct net *net = sock_net(skb->sk);
1851         int h, s_h;
1852         int idx, s_idx;
1853         struct net_device *dev;
1854         struct in_device *in_dev;
1855         struct hlist_head *head;
1856
1857         s_h = cb->args[0];
1858         s_idx = idx = cb->args[1];
1859
1860         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1861                 idx = 0;
1862                 head = &net->dev_index_head[h];
1863                 rcu_read_lock();
1864                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1865                           net->dev_base_seq;
1866                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1867                         if (idx < s_idx)
1868                                 goto cont;
1869                         in_dev = __in_dev_get_rcu(dev);
1870                         if (!in_dev)
1871                                 goto cont;
1872
1873                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1874                                                       &in_dev->cnf,
1875                                                       NETLINK_CB(cb->skb).portid,
1876                                                       cb->nlh->nlmsg_seq,
1877                                                       RTM_NEWNETCONF,
1878                                                       NLM_F_MULTI,
1879                                                       -1) <= 0) {
1880                                 rcu_read_unlock();
1881                                 goto done;
1882                         }
1883                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1884 cont:
1885                         idx++;
1886                 }
1887                 rcu_read_unlock();
1888         }
1889         if (h == NETDEV_HASHENTRIES) {
1890                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1891                                               net->ipv4.devconf_all,
1892                                               NETLINK_CB(cb->skb).portid,
1893                                               cb->nlh->nlmsg_seq,
1894                                               RTM_NEWNETCONF, NLM_F_MULTI,
1895                                               -1) <= 0)
1896                         goto done;
1897                 else
1898                         h++;
1899         }
1900         if (h == NETDEV_HASHENTRIES + 1) {
1901                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1902                                               net->ipv4.devconf_dflt,
1903                                               NETLINK_CB(cb->skb).portid,
1904                                               cb->nlh->nlmsg_seq,
1905                                               RTM_NEWNETCONF, NLM_F_MULTI,
1906                                               -1) <= 0)
1907                         goto done;
1908                 else
1909                         h++;
1910         }
1911 done:
1912         cb->args[0] = h;
1913         cb->args[1] = idx;
1914
1915         return skb->len;
1916 }
1917
1918 #ifdef CONFIG_SYSCTL
1919
1920 static void devinet_copy_dflt_conf(struct net *net, int i)
1921 {
1922         struct net_device *dev;
1923
1924         rcu_read_lock();
1925         for_each_netdev_rcu(net, dev) {
1926                 struct in_device *in_dev;
1927
1928                 in_dev = __in_dev_get_rcu(dev);
1929                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1930                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1931         }
1932         rcu_read_unlock();
1933 }
1934
1935 /* called with RTNL locked */
1936 static void inet_forward_change(struct net *net)
1937 {
1938         struct net_device *dev;
1939         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1940
1941         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1942         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1943         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944                                     NETCONFA_IFINDEX_ALL,
1945                                     net->ipv4.devconf_all);
1946         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1947                                     NETCONFA_IFINDEX_DEFAULT,
1948                                     net->ipv4.devconf_dflt);
1949
1950         for_each_netdev(net, dev) {
1951                 struct in_device *in_dev;
1952                 if (on)
1953                         dev_disable_lro(dev);
1954                 rcu_read_lock();
1955                 in_dev = __in_dev_get_rcu(dev);
1956                 if (in_dev) {
1957                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1958                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1959                                                     dev->ifindex, &in_dev->cnf);
1960                 }
1961                 rcu_read_unlock();
1962         }
1963 }
1964
1965 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1966 {
1967         if (cnf == net->ipv4.devconf_dflt)
1968                 return NETCONFA_IFINDEX_DEFAULT;
1969         else if (cnf == net->ipv4.devconf_all)
1970                 return NETCONFA_IFINDEX_ALL;
1971         else {
1972                 struct in_device *idev
1973                         = container_of(cnf, struct in_device, cnf);
1974                 return idev->dev->ifindex;
1975         }
1976 }
1977
1978 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1979                              void __user *buffer,
1980                              size_t *lenp, loff_t *ppos)
1981 {
1982         int old_value = *(int *)ctl->data;
1983         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1984         int new_value = *(int *)ctl->data;
1985
1986         if (write) {
1987                 struct ipv4_devconf *cnf = ctl->extra1;
1988                 struct net *net = ctl->extra2;
1989                 int i = (int *)ctl->data - cnf->data;
1990                 int ifindex;
1991
1992                 set_bit(i, cnf->state);
1993
1994                 if (cnf == net->ipv4.devconf_dflt)
1995                         devinet_copy_dflt_conf(net, i);
1996                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1997                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1998                         if ((new_value == 0) && (old_value != 0))
1999                                 rt_cache_flush(net);
2000
2001                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2002                     new_value != old_value) {
2003                         ifindex = devinet_conf_ifindex(net, cnf);
2004                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2005                                                     ifindex, cnf);
2006                 }
2007                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2008                     new_value != old_value) {
2009                         ifindex = devinet_conf_ifindex(net, cnf);
2010                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2011                                                     ifindex, cnf);
2012                 }
2013         }
2014
2015         return ret;
2016 }
2017
2018 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2019                                   void __user *buffer,
2020                                   size_t *lenp, loff_t *ppos)
2021 {
2022         int *valp = ctl->data;
2023         int val = *valp;
2024         loff_t pos = *ppos;
2025         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2026
2027         if (write && *valp != val) {
2028                 struct net *net = ctl->extra2;
2029
2030                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2031                         if (!rtnl_trylock()) {
2032                                 /* Restore the original values before restarting */
2033                                 *valp = val;
2034                                 *ppos = pos;
2035                                 return restart_syscall();
2036                         }
2037                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2038                                 inet_forward_change(net);
2039                         } else {
2040                                 struct ipv4_devconf *cnf = ctl->extra1;
2041                                 struct in_device *idev =
2042                                         container_of(cnf, struct in_device, cnf);
2043                                 if (*valp)
2044                                         dev_disable_lro(idev->dev);
2045                                 inet_netconf_notify_devconf(net,
2046                                                             NETCONFA_FORWARDING,
2047                                                             idev->dev->ifindex,
2048                                                             cnf);
2049                         }
2050                         rtnl_unlock();
2051                         rt_cache_flush(net);
2052                 } else
2053                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2054                                                     NETCONFA_IFINDEX_DEFAULT,
2055                                                     net->ipv4.devconf_dflt);
2056         }
2057
2058         return ret;
2059 }
2060
2061 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2062                                 void __user *buffer,
2063                                 size_t *lenp, loff_t *ppos)
2064 {
2065         int *valp = ctl->data;
2066         int val = *valp;
2067         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2068         struct net *net = ctl->extra2;
2069
2070         if (write && *valp != val)
2071                 rt_cache_flush(net);
2072
2073         return ret;
2074 }
2075
2076 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2077         { \
2078                 .procname       = name, \
2079                 .data           = ipv4_devconf.data + \
2080                                   IPV4_DEVCONF_ ## attr - 1, \
2081                 .maxlen         = sizeof(int), \
2082                 .mode           = mval, \
2083                 .proc_handler   = proc, \
2084                 .extra1         = &ipv4_devconf, \
2085         }
2086
2087 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2088         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2089
2090 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2091         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2092
2093 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2094         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2095
2096 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2097         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2098
2099 static struct devinet_sysctl_table {
2100         struct ctl_table_header *sysctl_header;
2101         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2102 } devinet_sysctl = {
2103         .devinet_vars = {
2104                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2105                                              devinet_sysctl_forward),
2106                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2107
2108                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2109                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2110                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2111                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2112                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2113                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2114                                         "accept_source_route"),
2115                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2116                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2117                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2118                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2119                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2120                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2121                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2122                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2123                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2124                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2125                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2126                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2127                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2128                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2129                                         "force_igmp_version"),
2130                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2131                                         "igmpv2_unsolicited_report_interval"),
2132                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2133                                         "igmpv3_unsolicited_report_interval"),
2134
2135                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2136                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2137                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2138                                               "promote_secondaries"),
2139                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2140                                               "route_localnet"),
2141         },
2142 };
2143
2144 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2145                                         struct ipv4_devconf *p)
2146 {
2147         int i;
2148         struct devinet_sysctl_table *t;
2149         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2150
2151         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2152         if (!t)
2153                 goto out;
2154
2155         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2156                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2157                 t->devinet_vars[i].extra1 = p;
2158                 t->devinet_vars[i].extra2 = net;
2159         }
2160
2161         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2162
2163         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2164         if (!t->sysctl_header)
2165                 goto free;
2166
2167         p->sysctl = t;
2168         return 0;
2169
2170 free:
2171         kfree(t);
2172 out:
2173         return -ENOBUFS;
2174 }
2175
2176 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2177 {
2178         struct devinet_sysctl_table *t = cnf->sysctl;
2179
2180         if (t == NULL)
2181                 return;
2182
2183         cnf->sysctl = NULL;
2184         unregister_net_sysctl_table(t->sysctl_header);
2185         kfree(t);
2186 }
2187
2188 static void devinet_sysctl_register(struct in_device *idev)
2189 {
2190         neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2191         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2192                                         &idev->cnf);
2193 }
2194
2195 static void devinet_sysctl_unregister(struct in_device *idev)
2196 {
2197         __devinet_sysctl_unregister(&idev->cnf);
2198         neigh_sysctl_unregister(idev->arp_parms);
2199 }
2200
2201 static struct ctl_table ctl_forward_entry[] = {
2202         {
2203                 .procname       = "ip_forward",
2204                 .data           = &ipv4_devconf.data[
2205                                         IPV4_DEVCONF_FORWARDING - 1],
2206                 .maxlen         = sizeof(int),
2207                 .mode           = 0644,
2208                 .proc_handler   = devinet_sysctl_forward,
2209                 .extra1         = &ipv4_devconf,
2210                 .extra2         = &init_net,
2211         },
2212         { },
2213 };
2214 #endif
2215
2216 static __net_init int devinet_init_net(struct net *net)
2217 {
2218         int err;
2219         struct ipv4_devconf *all, *dflt;
2220 #ifdef CONFIG_SYSCTL
2221         struct ctl_table *tbl = ctl_forward_entry;
2222         struct ctl_table_header *forw_hdr;
2223 #endif
2224
2225         err = -ENOMEM;
2226         all = &ipv4_devconf;
2227         dflt = &ipv4_devconf_dflt;
2228
2229         if (!net_eq(net, &init_net)) {
2230                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2231                 if (all == NULL)
2232                         goto err_alloc_all;
2233
2234                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2235                 if (dflt == NULL)
2236                         goto err_alloc_dflt;
2237
2238 #ifdef CONFIG_SYSCTL
2239                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2240                 if (tbl == NULL)
2241                         goto err_alloc_ctl;
2242
2243                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2244                 tbl[0].extra1 = all;
2245                 tbl[0].extra2 = net;
2246 #endif
2247         }
2248
2249 #ifdef CONFIG_SYSCTL
2250         err = __devinet_sysctl_register(net, "all", all);
2251         if (err < 0)
2252                 goto err_reg_all;
2253
2254         err = __devinet_sysctl_register(net, "default", dflt);
2255         if (err < 0)
2256                 goto err_reg_dflt;
2257
2258         err = -ENOMEM;
2259         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2260         if (forw_hdr == NULL)
2261                 goto err_reg_ctl;
2262         net->ipv4.forw_hdr = forw_hdr;
2263 #endif
2264
2265         net->ipv4.devconf_all = all;
2266         net->ipv4.devconf_dflt = dflt;
2267         return 0;
2268
2269 #ifdef CONFIG_SYSCTL
2270 err_reg_ctl:
2271         __devinet_sysctl_unregister(dflt);
2272 err_reg_dflt:
2273         __devinet_sysctl_unregister(all);
2274 err_reg_all:
2275         if (tbl != ctl_forward_entry)
2276                 kfree(tbl);
2277 err_alloc_ctl:
2278 #endif
2279         if (dflt != &ipv4_devconf_dflt)
2280                 kfree(dflt);
2281 err_alloc_dflt:
2282         if (all != &ipv4_devconf)
2283                 kfree(all);
2284 err_alloc_all:
2285         return err;
2286 }
2287
2288 static __net_exit void devinet_exit_net(struct net *net)
2289 {
2290 #ifdef CONFIG_SYSCTL
2291         struct ctl_table *tbl;
2292
2293         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2294         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2295         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2296         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2297         kfree(tbl);
2298 #endif
2299         kfree(net->ipv4.devconf_dflt);
2300         kfree(net->ipv4.devconf_all);
2301 }
2302
2303 static __net_initdata struct pernet_operations devinet_ops = {
2304         .init = devinet_init_net,
2305         .exit = devinet_exit_net,
2306 };
2307
2308 static struct rtnl_af_ops inet_af_ops = {
2309         .family           = AF_INET,
2310         .fill_link_af     = inet_fill_link_af,
2311         .get_link_af_size = inet_get_link_af_size,
2312         .validate_link_af = inet_validate_link_af,
2313         .set_link_af      = inet_set_link_af,
2314 };
2315
2316 void __init devinet_init(void)
2317 {
2318         int i;
2319
2320         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2321                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2322
2323         register_pernet_subsys(&devinet_ops);
2324
2325         register_gifconf(PF_INET, inet_gifconf);
2326         register_netdevice_notifier(&ip_netdev_notifier);
2327
2328         queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2329
2330         rtnl_af_register(&inet_af_ops);
2331
2332         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2333         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2334         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2335         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2336                       inet_netconf_dump_devconf, NULL);
2337 }
2338