]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
9809f7b69728cb335e0963c92ea4c9522842849f
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78         },
79 };
80
81 static struct ipv4_devconf ipv4_devconf_dflt = {
82         .data = {
83                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
85                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
86                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
87                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
88                 [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
89                 [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
90         },
91 };
92
93 #define IPV4_DEVCONF_DFLT(net, attr) \
94         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
95
96 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
97         [IFA_LOCAL]             = { .type = NLA_U32 },
98         [IFA_ADDRESS]           = { .type = NLA_U32 },
99         [IFA_BROADCAST]         = { .type = NLA_U32 },
100         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
101         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
102         [IFA_FLAGS]             = { .type = NLA_U32 },
103 };
104
105 #define IN4_ADDR_HSIZE_SHIFT    8
106 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
107
108 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
109 static DEFINE_SPINLOCK(inet_addr_hash_lock);
110
111 static u32 inet_addr_hash(struct net *net, __be32 addr)
112 {
113         u32 val = (__force u32) addr ^ net_hash_mix(net);
114
115         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
116 }
117
118 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
119 {
120         u32 hash = inet_addr_hash(net, ifa->ifa_local);
121
122         spin_lock(&inet_addr_hash_lock);
123         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
124         spin_unlock(&inet_addr_hash_lock);
125 }
126
127 static void inet_hash_remove(struct in_ifaddr *ifa)
128 {
129         spin_lock(&inet_addr_hash_lock);
130         hlist_del_init_rcu(&ifa->hash);
131         spin_unlock(&inet_addr_hash_lock);
132 }
133
134 /**
135  * __ip_dev_find - find the first device with a given source address.
136  * @net: the net namespace
137  * @addr: the source address
138  * @devref: if true, take a reference on the found device
139  *
140  * If a caller uses devref=false, it should be protected by RCU, or RTNL
141  */
142 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
143 {
144         u32 hash = inet_addr_hash(net, addr);
145         struct net_device *result = NULL;
146         struct in_ifaddr *ifa;
147
148         rcu_read_lock();
149         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
150                 if (ifa->ifa_local == addr) {
151                         struct net_device *dev = ifa->ifa_dev->dev;
152
153                         if (!net_eq(dev_net(dev), net))
154                                 continue;
155                         result = dev;
156                         break;
157                 }
158         }
159         if (!result) {
160                 struct flowi4 fl4 = { .daddr = addr };
161                 struct fib_result res = { 0 };
162                 struct fib_table *local;
163
164                 /* Fallback to FIB local table so that communication
165                  * over loopback subnets work.
166                  */
167                 local = fib_get_table(net, RT_TABLE_LOCAL);
168                 if (local &&
169                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
170                     res.type == RTN_LOCAL)
171                         result = FIB_RES_DEV(res);
172         }
173         if (result && devref)
174                 dev_hold(result);
175         rcu_read_unlock();
176         return result;
177 }
178 EXPORT_SYMBOL(__ip_dev_find);
179
180 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
181
182 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
183 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
184                          int destroy);
185 #ifdef CONFIG_SYSCTL
186 static void devinet_sysctl_register(struct in_device *idev);
187 static void devinet_sysctl_unregister(struct in_device *idev);
188 #else
189 static void devinet_sysctl_register(struct in_device *idev)
190 {
191 }
192 static void devinet_sysctl_unregister(struct in_device *idev)
193 {
194 }
195 #endif
196
197 /* Locks all the inet devices. */
198
199 static struct in_ifaddr *inet_alloc_ifa(void)
200 {
201         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
202 }
203
204 static void inet_rcu_free_ifa(struct rcu_head *head)
205 {
206         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
207         if (ifa->ifa_dev)
208                 in_dev_put(ifa->ifa_dev);
209         kfree(ifa);
210 }
211
212 static void inet_free_ifa(struct in_ifaddr *ifa)
213 {
214         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
215 }
216
217 void in_dev_finish_destroy(struct in_device *idev)
218 {
219         struct net_device *dev = idev->dev;
220
221         WARN_ON(idev->ifa_list);
222         WARN_ON(idev->mc_list);
223         kfree(rcu_dereference_protected(idev->mc_hash, 1));
224 #ifdef NET_REFCNT_DEBUG
225         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
226 #endif
227         dev_put(dev);
228         if (!idev->dead)
229                 pr_err("Freeing alive in_device %p\n", idev);
230         else
231                 kfree(idev);
232 }
233 EXPORT_SYMBOL(in_dev_finish_destroy);
234
235 static struct in_device *inetdev_init(struct net_device *dev)
236 {
237         struct in_device *in_dev;
238
239         ASSERT_RTNL();
240
241         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
242         if (!in_dev)
243                 goto out;
244         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
245                         sizeof(in_dev->cnf));
246         in_dev->cnf.sysctl = NULL;
247         in_dev->dev = dev;
248         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
249         if (!in_dev->arp_parms)
250                 goto out_kfree;
251         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
252                 dev_disable_lro(dev);
253         /* Reference in_dev->dev */
254         dev_hold(dev);
255         /* Account for reference dev->ip_ptr (below) */
256         in_dev_hold(in_dev);
257
258         devinet_sysctl_register(in_dev);
259         ip_mc_init_dev(in_dev);
260         if (dev->flags & IFF_UP)
261                 ip_mc_up(in_dev);
262
263         /* we can receive as soon as ip_ptr is set -- do this last */
264         rcu_assign_pointer(dev->ip_ptr, in_dev);
265 out:
266         return in_dev;
267 out_kfree:
268         kfree(in_dev);
269         in_dev = NULL;
270         goto out;
271 }
272
273 static void in_dev_rcu_put(struct rcu_head *head)
274 {
275         struct in_device *idev = container_of(head, struct in_device, rcu_head);
276         in_dev_put(idev);
277 }
278
279 static void inetdev_destroy(struct in_device *in_dev)
280 {
281         struct in_ifaddr *ifa;
282         struct net_device *dev;
283
284         ASSERT_RTNL();
285
286         dev = in_dev->dev;
287
288         in_dev->dead = 1;
289
290         ip_mc_destroy_dev(in_dev);
291
292         while ((ifa = in_dev->ifa_list) != NULL) {
293                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
294                 inet_free_ifa(ifa);
295         }
296
297         RCU_INIT_POINTER(dev->ip_ptr, NULL);
298
299         devinet_sysctl_unregister(in_dev);
300         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
301         arp_ifdown(dev);
302
303         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
304 }
305
306 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
307 {
308         rcu_read_lock();
309         for_primary_ifa(in_dev) {
310                 if (inet_ifa_match(a, ifa)) {
311                         if (!b || inet_ifa_match(b, ifa)) {
312                                 rcu_read_unlock();
313                                 return 1;
314                         }
315                 }
316         } endfor_ifa(in_dev);
317         rcu_read_unlock();
318         return 0;
319 }
320
321 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
322                          int destroy, struct nlmsghdr *nlh, u32 portid)
323 {
324         struct in_ifaddr *promote = NULL;
325         struct in_ifaddr *ifa, *ifa1 = *ifap;
326         struct in_ifaddr *last_prim = in_dev->ifa_list;
327         struct in_ifaddr *prev_prom = NULL;
328         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
329
330         ASSERT_RTNL();
331
332         /* 1. Deleting primary ifaddr forces deletion all secondaries
333          * unless alias promotion is set
334          **/
335
336         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
337                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
338
339                 while ((ifa = *ifap1) != NULL) {
340                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
341                             ifa1->ifa_scope <= ifa->ifa_scope)
342                                 last_prim = ifa;
343
344                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
345                             ifa1->ifa_mask != ifa->ifa_mask ||
346                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
347                                 ifap1 = &ifa->ifa_next;
348                                 prev_prom = ifa;
349                                 continue;
350                         }
351
352                         if (!do_promote) {
353                                 inet_hash_remove(ifa);
354                                 *ifap1 = ifa->ifa_next;
355
356                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
357                                 blocking_notifier_call_chain(&inetaddr_chain,
358                                                 NETDEV_DOWN, ifa);
359                                 inet_free_ifa(ifa);
360                         } else {
361                                 promote = ifa;
362                                 break;
363                         }
364                 }
365         }
366
367         /* On promotion all secondaries from subnet are changing
368          * the primary IP, we must remove all their routes silently
369          * and later to add them back with new prefsrc. Do this
370          * while all addresses are on the device list.
371          */
372         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
373                 if (ifa1->ifa_mask == ifa->ifa_mask &&
374                     inet_ifa_match(ifa1->ifa_address, ifa))
375                         fib_del_ifaddr(ifa, ifa1);
376         }
377
378         /* 2. Unlink it */
379
380         *ifap = ifa1->ifa_next;
381         inet_hash_remove(ifa1);
382
383         /* 3. Announce address deletion */
384
385         /* Send message first, then call notifier.
386            At first sight, FIB update triggered by notifier
387            will refer to already deleted ifaddr, that could confuse
388            netlink listeners. It is not true: look, gated sees
389            that route deleted and if it still thinks that ifaddr
390            is valid, it will try to restore deleted routes... Grr.
391            So that, this order is correct.
392          */
393         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
394         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
395
396         if (promote) {
397                 struct in_ifaddr *next_sec = promote->ifa_next;
398
399                 if (prev_prom) {
400                         prev_prom->ifa_next = promote->ifa_next;
401                         promote->ifa_next = last_prim->ifa_next;
402                         last_prim->ifa_next = promote;
403                 }
404
405                 promote->ifa_flags &= ~IFA_F_SECONDARY;
406                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
407                 blocking_notifier_call_chain(&inetaddr_chain,
408                                 NETDEV_UP, promote);
409                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
410                         if (ifa1->ifa_mask != ifa->ifa_mask ||
411                             !inet_ifa_match(ifa1->ifa_address, ifa))
412                                         continue;
413                         fib_add_ifaddr(ifa);
414                 }
415
416         }
417         if (destroy)
418                 inet_free_ifa(ifa1);
419 }
420
421 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
422                          int destroy)
423 {
424         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
425 }
426
427 static void check_lifetime(struct work_struct *work);
428
429 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
430
431 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
432                              u32 portid)
433 {
434         struct in_device *in_dev = ifa->ifa_dev;
435         struct in_ifaddr *ifa1, **ifap, **last_primary;
436
437         ASSERT_RTNL();
438
439         if (!ifa->ifa_local) {
440                 inet_free_ifa(ifa);
441                 return 0;
442         }
443
444         ifa->ifa_flags &= ~IFA_F_SECONDARY;
445         last_primary = &in_dev->ifa_list;
446
447         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
448              ifap = &ifa1->ifa_next) {
449                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
450                     ifa->ifa_scope <= ifa1->ifa_scope)
451                         last_primary = &ifa1->ifa_next;
452                 if (ifa1->ifa_mask == ifa->ifa_mask &&
453                     inet_ifa_match(ifa1->ifa_address, ifa)) {
454                         if (ifa1->ifa_local == ifa->ifa_local) {
455                                 inet_free_ifa(ifa);
456                                 return -EEXIST;
457                         }
458                         if (ifa1->ifa_scope != ifa->ifa_scope) {
459                                 inet_free_ifa(ifa);
460                                 return -EINVAL;
461                         }
462                         ifa->ifa_flags |= IFA_F_SECONDARY;
463                 }
464         }
465
466         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
467                 net_srandom(ifa->ifa_local);
468                 ifap = last_primary;
469         }
470
471         ifa->ifa_next = *ifap;
472         *ifap = ifa;
473
474         inet_hash_insert(dev_net(in_dev->dev), ifa);
475
476         cancel_delayed_work(&check_lifetime_work);
477         schedule_delayed_work(&check_lifetime_work, 0);
478
479         /* Send message first, then call notifier.
480            Notifier will trigger FIB update, so that
481            listeners of netlink will know about new ifaddr */
482         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
483         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
484
485         return 0;
486 }
487
488 static int inet_insert_ifa(struct in_ifaddr *ifa)
489 {
490         return __inet_insert_ifa(ifa, NULL, 0);
491 }
492
493 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
494 {
495         struct in_device *in_dev = __in_dev_get_rtnl(dev);
496
497         ASSERT_RTNL();
498
499         if (!in_dev) {
500                 inet_free_ifa(ifa);
501                 return -ENOBUFS;
502         }
503         ipv4_devconf_setall(in_dev);
504         neigh_parms_data_state_setall(in_dev->arp_parms);
505         if (ifa->ifa_dev != in_dev) {
506                 WARN_ON(ifa->ifa_dev);
507                 in_dev_hold(in_dev);
508                 ifa->ifa_dev = in_dev;
509         }
510         if (ipv4_is_loopback(ifa->ifa_local))
511                 ifa->ifa_scope = RT_SCOPE_HOST;
512         return inet_insert_ifa(ifa);
513 }
514
515 /* Caller must hold RCU or RTNL :
516  * We dont take a reference on found in_device
517  */
518 struct in_device *inetdev_by_index(struct net *net, int ifindex)
519 {
520         struct net_device *dev;
521         struct in_device *in_dev = NULL;
522
523         rcu_read_lock();
524         dev = dev_get_by_index_rcu(net, ifindex);
525         if (dev)
526                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
527         rcu_read_unlock();
528         return in_dev;
529 }
530 EXPORT_SYMBOL(inetdev_by_index);
531
532 /* Called only from RTNL semaphored context. No locks. */
533
534 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
535                                     __be32 mask)
536 {
537         ASSERT_RTNL();
538
539         for_primary_ifa(in_dev) {
540                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
541                         return ifa;
542         } endfor_ifa(in_dev);
543         return NULL;
544 }
545
546 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
547 {
548         struct net *net = sock_net(skb->sk);
549         struct nlattr *tb[IFA_MAX+1];
550         struct in_device *in_dev;
551         struct ifaddrmsg *ifm;
552         struct in_ifaddr *ifa, **ifap;
553         int err = -EINVAL;
554
555         ASSERT_RTNL();
556
557         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
558         if (err < 0)
559                 goto errout;
560
561         ifm = nlmsg_data(nlh);
562         in_dev = inetdev_by_index(net, ifm->ifa_index);
563         if (in_dev == NULL) {
564                 err = -ENODEV;
565                 goto errout;
566         }
567
568         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
569              ifap = &ifa->ifa_next) {
570                 if (tb[IFA_LOCAL] &&
571                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
572                         continue;
573
574                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
575                         continue;
576
577                 if (tb[IFA_ADDRESS] &&
578                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
579                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
580                         continue;
581
582                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
583                 return 0;
584         }
585
586         err = -EADDRNOTAVAIL;
587 errout:
588         return err;
589 }
590
591 #define INFINITY_LIFE_TIME      0xFFFFFFFF
592
593 static void check_lifetime(struct work_struct *work)
594 {
595         unsigned long now, next, next_sec, next_sched;
596         struct in_ifaddr *ifa;
597         struct hlist_node *n;
598         int i;
599
600         now = jiffies;
601         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
602
603         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
604                 bool change_needed = false;
605
606                 rcu_read_lock();
607                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
608                         unsigned long age;
609
610                         if (ifa->ifa_flags & IFA_F_PERMANENT)
611                                 continue;
612
613                         /* We try to batch several events at once. */
614                         age = (now - ifa->ifa_tstamp +
615                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
616
617                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
618                             age >= ifa->ifa_valid_lft) {
619                                 change_needed = true;
620                         } else if (ifa->ifa_preferred_lft ==
621                                    INFINITY_LIFE_TIME) {
622                                 continue;
623                         } else if (age >= ifa->ifa_preferred_lft) {
624                                 if (time_before(ifa->ifa_tstamp +
625                                                 ifa->ifa_valid_lft * HZ, next))
626                                         next = ifa->ifa_tstamp +
627                                                ifa->ifa_valid_lft * HZ;
628
629                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
630                                         change_needed = true;
631                         } else if (time_before(ifa->ifa_tstamp +
632                                                ifa->ifa_preferred_lft * HZ,
633                                                next)) {
634                                 next = ifa->ifa_tstamp +
635                                        ifa->ifa_preferred_lft * HZ;
636                         }
637                 }
638                 rcu_read_unlock();
639                 if (!change_needed)
640                         continue;
641                 rtnl_lock();
642                 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
643                         unsigned long age;
644
645                         if (ifa->ifa_flags & IFA_F_PERMANENT)
646                                 continue;
647
648                         /* We try to batch several events at once. */
649                         age = (now - ifa->ifa_tstamp +
650                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
651
652                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
653                             age >= ifa->ifa_valid_lft) {
654                                 struct in_ifaddr **ifap;
655
656                                 for (ifap = &ifa->ifa_dev->ifa_list;
657                                      *ifap != NULL; ifap = &(*ifap)->ifa_next) {
658                                         if (*ifap == ifa) {
659                                                 inet_del_ifa(ifa->ifa_dev,
660                                                              ifap, 1);
661                                                 break;
662                                         }
663                                 }
664                         } else if (ifa->ifa_preferred_lft !=
665                                    INFINITY_LIFE_TIME &&
666                                    age >= ifa->ifa_preferred_lft &&
667                                    !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
668                                 ifa->ifa_flags |= IFA_F_DEPRECATED;
669                                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
670                         }
671                 }
672                 rtnl_unlock();
673         }
674
675         next_sec = round_jiffies_up(next);
676         next_sched = next;
677
678         /* If rounded timeout is accurate enough, accept it. */
679         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
680                 next_sched = next_sec;
681
682         now = jiffies;
683         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
684         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
685                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
686
687         schedule_delayed_work(&check_lifetime_work, next_sched - now);
688 }
689
690 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
691                              __u32 prefered_lft)
692 {
693         unsigned long timeout;
694
695         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
696
697         timeout = addrconf_timeout_fixup(valid_lft, HZ);
698         if (addrconf_finite_timeout(timeout))
699                 ifa->ifa_valid_lft = timeout;
700         else
701                 ifa->ifa_flags |= IFA_F_PERMANENT;
702
703         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
704         if (addrconf_finite_timeout(timeout)) {
705                 if (timeout == 0)
706                         ifa->ifa_flags |= IFA_F_DEPRECATED;
707                 ifa->ifa_preferred_lft = timeout;
708         }
709         ifa->ifa_tstamp = jiffies;
710         if (!ifa->ifa_cstamp)
711                 ifa->ifa_cstamp = ifa->ifa_tstamp;
712 }
713
714 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
715                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
716 {
717         struct nlattr *tb[IFA_MAX+1];
718         struct in_ifaddr *ifa;
719         struct ifaddrmsg *ifm;
720         struct net_device *dev;
721         struct in_device *in_dev;
722         int err;
723
724         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
725         if (err < 0)
726                 goto errout;
727
728         ifm = nlmsg_data(nlh);
729         err = -EINVAL;
730         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
731                 goto errout;
732
733         dev = __dev_get_by_index(net, ifm->ifa_index);
734         err = -ENODEV;
735         if (dev == NULL)
736                 goto errout;
737
738         in_dev = __in_dev_get_rtnl(dev);
739         err = -ENOBUFS;
740         if (in_dev == NULL)
741                 goto errout;
742
743         ifa = inet_alloc_ifa();
744         if (ifa == NULL)
745                 /*
746                  * A potential indev allocation can be left alive, it stays
747                  * assigned to its device and is destroy with it.
748                  */
749                 goto errout;
750
751         ipv4_devconf_setall(in_dev);
752         neigh_parms_data_state_setall(in_dev->arp_parms);
753         in_dev_hold(in_dev);
754
755         if (tb[IFA_ADDRESS] == NULL)
756                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
757
758         INIT_HLIST_NODE(&ifa->hash);
759         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
760         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
761         ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
762                                          ifm->ifa_flags;
763         ifa->ifa_scope = ifm->ifa_scope;
764         ifa->ifa_dev = in_dev;
765
766         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
767         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
768
769         if (tb[IFA_BROADCAST])
770                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
771
772         if (tb[IFA_LABEL])
773                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
774         else
775                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
776
777         if (tb[IFA_CACHEINFO]) {
778                 struct ifa_cacheinfo *ci;
779
780                 ci = nla_data(tb[IFA_CACHEINFO]);
781                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
782                         err = -EINVAL;
783                         goto errout_free;
784                 }
785                 *pvalid_lft = ci->ifa_valid;
786                 *pprefered_lft = ci->ifa_prefered;
787         }
788
789         return ifa;
790
791 errout_free:
792         inet_free_ifa(ifa);
793 errout:
794         return ERR_PTR(err);
795 }
796
797 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
798 {
799         struct in_device *in_dev = ifa->ifa_dev;
800         struct in_ifaddr *ifa1, **ifap;
801
802         if (!ifa->ifa_local)
803                 return NULL;
804
805         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
806              ifap = &ifa1->ifa_next) {
807                 if (ifa1->ifa_mask == ifa->ifa_mask &&
808                     inet_ifa_match(ifa1->ifa_address, ifa) &&
809                     ifa1->ifa_local == ifa->ifa_local)
810                         return ifa1;
811         }
812         return NULL;
813 }
814
815 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
816 {
817         struct net *net = sock_net(skb->sk);
818         struct in_ifaddr *ifa;
819         struct in_ifaddr *ifa_existing;
820         __u32 valid_lft = INFINITY_LIFE_TIME;
821         __u32 prefered_lft = INFINITY_LIFE_TIME;
822
823         ASSERT_RTNL();
824
825         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
826         if (IS_ERR(ifa))
827                 return PTR_ERR(ifa);
828
829         ifa_existing = find_matching_ifa(ifa);
830         if (!ifa_existing) {
831                 /* It would be best to check for !NLM_F_CREATE here but
832                  * userspace alreay relies on not having to provide this.
833                  */
834                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
835                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
836         } else {
837                 inet_free_ifa(ifa);
838
839                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
840                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
841                         return -EEXIST;
842                 ifa = ifa_existing;
843                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
844                 cancel_delayed_work(&check_lifetime_work);
845                 schedule_delayed_work(&check_lifetime_work, 0);
846                 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
847                 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
848         }
849         return 0;
850 }
851
852 /*
853  *      Determine a default network mask, based on the IP address.
854  */
855
856 static int inet_abc_len(__be32 addr)
857 {
858         int rc = -1;    /* Something else, probably a multicast. */
859
860         if (ipv4_is_zeronet(addr))
861                 rc = 0;
862         else {
863                 __u32 haddr = ntohl(addr);
864
865                 if (IN_CLASSA(haddr))
866                         rc = 8;
867                 else if (IN_CLASSB(haddr))
868                         rc = 16;
869                 else if (IN_CLASSC(haddr))
870                         rc = 24;
871         }
872
873         return rc;
874 }
875
876
877 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
878 {
879         struct ifreq ifr;
880         struct sockaddr_in sin_orig;
881         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
882         struct in_device *in_dev;
883         struct in_ifaddr **ifap = NULL;
884         struct in_ifaddr *ifa = NULL;
885         struct net_device *dev;
886         char *colon;
887         int ret = -EFAULT;
888         int tryaddrmatch = 0;
889
890         /*
891          *      Fetch the caller's info block into kernel space
892          */
893
894         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
895                 goto out;
896         ifr.ifr_name[IFNAMSIZ - 1] = 0;
897
898         /* save original address for comparison */
899         memcpy(&sin_orig, sin, sizeof(*sin));
900
901         colon = strchr(ifr.ifr_name, ':');
902         if (colon)
903                 *colon = 0;
904
905         dev_load(net, ifr.ifr_name);
906
907         switch (cmd) {
908         case SIOCGIFADDR:       /* Get interface address */
909         case SIOCGIFBRDADDR:    /* Get the broadcast address */
910         case SIOCGIFDSTADDR:    /* Get the destination address */
911         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
912                 /* Note that these ioctls will not sleep,
913                    so that we do not impose a lock.
914                    One day we will be forced to put shlock here (I mean SMP)
915                  */
916                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
917                 memset(sin, 0, sizeof(*sin));
918                 sin->sin_family = AF_INET;
919                 break;
920
921         case SIOCSIFFLAGS:
922                 ret = -EPERM;
923                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
924                         goto out;
925                 break;
926         case SIOCSIFADDR:       /* Set interface address (and family) */
927         case SIOCSIFBRDADDR:    /* Set the broadcast address */
928         case SIOCSIFDSTADDR:    /* Set the destination address */
929         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
930                 ret = -EPERM;
931                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
932                         goto out;
933                 ret = -EINVAL;
934                 if (sin->sin_family != AF_INET)
935                         goto out;
936                 break;
937         default:
938                 ret = -EINVAL;
939                 goto out;
940         }
941
942         rtnl_lock();
943
944         ret = -ENODEV;
945         dev = __dev_get_by_name(net, ifr.ifr_name);
946         if (!dev)
947                 goto done;
948
949         if (colon)
950                 *colon = ':';
951
952         in_dev = __in_dev_get_rtnl(dev);
953         if (in_dev) {
954                 if (tryaddrmatch) {
955                         /* Matthias Andree */
956                         /* compare label and address (4.4BSD style) */
957                         /* note: we only do this for a limited set of ioctls
958                            and only if the original address family was AF_INET.
959                            This is checked above. */
960                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
961                              ifap = &ifa->ifa_next) {
962                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
963                                     sin_orig.sin_addr.s_addr ==
964                                                         ifa->ifa_local) {
965                                         break; /* found */
966                                 }
967                         }
968                 }
969                 /* we didn't get a match, maybe the application is
970                    4.3BSD-style and passed in junk so we fall back to
971                    comparing just the label */
972                 if (!ifa) {
973                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
974                              ifap = &ifa->ifa_next)
975                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
976                                         break;
977                 }
978         }
979
980         ret = -EADDRNOTAVAIL;
981         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
982                 goto done;
983
984         switch (cmd) {
985         case SIOCGIFADDR:       /* Get interface address */
986                 sin->sin_addr.s_addr = ifa->ifa_local;
987                 goto rarok;
988
989         case SIOCGIFBRDADDR:    /* Get the broadcast address */
990                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
991                 goto rarok;
992
993         case SIOCGIFDSTADDR:    /* Get the destination address */
994                 sin->sin_addr.s_addr = ifa->ifa_address;
995                 goto rarok;
996
997         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
998                 sin->sin_addr.s_addr = ifa->ifa_mask;
999                 goto rarok;
1000
1001         case SIOCSIFFLAGS:
1002                 if (colon) {
1003                         ret = -EADDRNOTAVAIL;
1004                         if (!ifa)
1005                                 break;
1006                         ret = 0;
1007                         if (!(ifr.ifr_flags & IFF_UP))
1008                                 inet_del_ifa(in_dev, ifap, 1);
1009                         break;
1010                 }
1011                 ret = dev_change_flags(dev, ifr.ifr_flags);
1012                 break;
1013
1014         case SIOCSIFADDR:       /* Set interface address (and family) */
1015                 ret = -EINVAL;
1016                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1017                         break;
1018
1019                 if (!ifa) {
1020                         ret = -ENOBUFS;
1021                         ifa = inet_alloc_ifa();
1022                         if (!ifa)
1023                                 break;
1024                         INIT_HLIST_NODE(&ifa->hash);
1025                         if (colon)
1026                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
1027                         else
1028                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1029                 } else {
1030                         ret = 0;
1031                         if (ifa->ifa_local == sin->sin_addr.s_addr)
1032                                 break;
1033                         inet_del_ifa(in_dev, ifap, 0);
1034                         ifa->ifa_broadcast = 0;
1035                         ifa->ifa_scope = 0;
1036                 }
1037
1038                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1039
1040                 if (!(dev->flags & IFF_POINTOPOINT)) {
1041                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1042                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1043                         if ((dev->flags & IFF_BROADCAST) &&
1044                             ifa->ifa_prefixlen < 31)
1045                                 ifa->ifa_broadcast = ifa->ifa_address |
1046                                                      ~ifa->ifa_mask;
1047                 } else {
1048                         ifa->ifa_prefixlen = 32;
1049                         ifa->ifa_mask = inet_make_mask(32);
1050                 }
1051                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1052                 ret = inet_set_ifa(dev, ifa);
1053                 break;
1054
1055         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1056                 ret = 0;
1057                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1058                         inet_del_ifa(in_dev, ifap, 0);
1059                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1060                         inet_insert_ifa(ifa);
1061                 }
1062                 break;
1063
1064         case SIOCSIFDSTADDR:    /* Set the destination address */
1065                 ret = 0;
1066                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1067                         break;
1068                 ret = -EINVAL;
1069                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1070                         break;
1071                 ret = 0;
1072                 inet_del_ifa(in_dev, ifap, 0);
1073                 ifa->ifa_address = sin->sin_addr.s_addr;
1074                 inet_insert_ifa(ifa);
1075                 break;
1076
1077         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1078
1079                 /*
1080                  *      The mask we set must be legal.
1081                  */
1082                 ret = -EINVAL;
1083                 if (bad_mask(sin->sin_addr.s_addr, 0))
1084                         break;
1085                 ret = 0;
1086                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1087                         __be32 old_mask = ifa->ifa_mask;
1088                         inet_del_ifa(in_dev, ifap, 0);
1089                         ifa->ifa_mask = sin->sin_addr.s_addr;
1090                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1091
1092                         /* See if current broadcast address matches
1093                          * with current netmask, then recalculate
1094                          * the broadcast address. Otherwise it's a
1095                          * funny address, so don't touch it since
1096                          * the user seems to know what (s)he's doing...
1097                          */
1098                         if ((dev->flags & IFF_BROADCAST) &&
1099                             (ifa->ifa_prefixlen < 31) &&
1100                             (ifa->ifa_broadcast ==
1101                              (ifa->ifa_local|~old_mask))) {
1102                                 ifa->ifa_broadcast = (ifa->ifa_local |
1103                                                       ~sin->sin_addr.s_addr);
1104                         }
1105                         inet_insert_ifa(ifa);
1106                 }
1107                 break;
1108         }
1109 done:
1110         rtnl_unlock();
1111 out:
1112         return ret;
1113 rarok:
1114         rtnl_unlock();
1115         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1116         goto out;
1117 }
1118
1119 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1120 {
1121         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1122         struct in_ifaddr *ifa;
1123         struct ifreq ifr;
1124         int done = 0;
1125
1126         if (!in_dev)
1127                 goto out;
1128
1129         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1130                 if (!buf) {
1131                         done += sizeof(ifr);
1132                         continue;
1133                 }
1134                 if (len < (int) sizeof(ifr))
1135                         break;
1136                 memset(&ifr, 0, sizeof(struct ifreq));
1137                 strcpy(ifr.ifr_name, ifa->ifa_label);
1138
1139                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1140                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1141                                                                 ifa->ifa_local;
1142
1143                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1144                         done = -EFAULT;
1145                         break;
1146                 }
1147                 buf  += sizeof(struct ifreq);
1148                 len  -= sizeof(struct ifreq);
1149                 done += sizeof(struct ifreq);
1150         }
1151 out:
1152         return done;
1153 }
1154
1155 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1156 {
1157         __be32 addr = 0;
1158         struct in_device *in_dev;
1159         struct net *net = dev_net(dev);
1160
1161         rcu_read_lock();
1162         in_dev = __in_dev_get_rcu(dev);
1163         if (!in_dev)
1164                 goto no_in_dev;
1165
1166         for_primary_ifa(in_dev) {
1167                 if (ifa->ifa_scope > scope)
1168                         continue;
1169                 if (!dst || inet_ifa_match(dst, ifa)) {
1170                         addr = ifa->ifa_local;
1171                         break;
1172                 }
1173                 if (!addr)
1174                         addr = ifa->ifa_local;
1175         } endfor_ifa(in_dev);
1176
1177         if (addr)
1178                 goto out_unlock;
1179 no_in_dev:
1180
1181         /* Not loopback addresses on loopback should be preferred
1182            in this case. It is importnat that lo is the first interface
1183            in dev_base list.
1184          */
1185         for_each_netdev_rcu(net, dev) {
1186                 in_dev = __in_dev_get_rcu(dev);
1187                 if (!in_dev)
1188                         continue;
1189
1190                 for_primary_ifa(in_dev) {
1191                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1192                             ifa->ifa_scope <= scope) {
1193                                 addr = ifa->ifa_local;
1194                                 goto out_unlock;
1195                         }
1196                 } endfor_ifa(in_dev);
1197         }
1198 out_unlock:
1199         rcu_read_unlock();
1200         return addr;
1201 }
1202 EXPORT_SYMBOL(inet_select_addr);
1203
1204 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1205                               __be32 local, int scope)
1206 {
1207         int same = 0;
1208         __be32 addr = 0;
1209
1210         for_ifa(in_dev) {
1211                 if (!addr &&
1212                     (local == ifa->ifa_local || !local) &&
1213                     ifa->ifa_scope <= scope) {
1214                         addr = ifa->ifa_local;
1215                         if (same)
1216                                 break;
1217                 }
1218                 if (!same) {
1219                         same = (!local || inet_ifa_match(local, ifa)) &&
1220                                 (!dst || inet_ifa_match(dst, ifa));
1221                         if (same && addr) {
1222                                 if (local || !dst)
1223                                         break;
1224                                 /* Is the selected addr into dst subnet? */
1225                                 if (inet_ifa_match(addr, ifa))
1226                                         break;
1227                                 /* No, then can we use new local src? */
1228                                 if (ifa->ifa_scope <= scope) {
1229                                         addr = ifa->ifa_local;
1230                                         break;
1231                                 }
1232                                 /* search for large dst subnet for addr */
1233                                 same = 0;
1234                         }
1235                 }
1236         } endfor_ifa(in_dev);
1237
1238         return same ? addr : 0;
1239 }
1240
1241 /*
1242  * Confirm that local IP address exists using wildcards:
1243  * - net: netns to check, cannot be NULL
1244  * - in_dev: only on this interface, NULL=any interface
1245  * - dst: only in the same subnet as dst, 0=any dst
1246  * - local: address, 0=autoselect the local address
1247  * - scope: maximum allowed scope value for the local address
1248  */
1249 __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1250                          __be32 dst, __be32 local, int scope)
1251 {
1252         __be32 addr = 0;
1253         struct net_device *dev;
1254
1255         if (in_dev != NULL)
1256                 return confirm_addr_indev(in_dev, dst, local, scope);
1257
1258         rcu_read_lock();
1259         for_each_netdev_rcu(net, dev) {
1260                 in_dev = __in_dev_get_rcu(dev);
1261                 if (in_dev) {
1262                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1263                         if (addr)
1264                                 break;
1265                 }
1266         }
1267         rcu_read_unlock();
1268
1269         return addr;
1270 }
1271 EXPORT_SYMBOL(inet_confirm_addr);
1272
1273 /*
1274  *      Device notifier
1275  */
1276
1277 int register_inetaddr_notifier(struct notifier_block *nb)
1278 {
1279         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1280 }
1281 EXPORT_SYMBOL(register_inetaddr_notifier);
1282
1283 int unregister_inetaddr_notifier(struct notifier_block *nb)
1284 {
1285         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1286 }
1287 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1288
1289 /* Rename ifa_labels for a device name change. Make some effort to preserve
1290  * existing alias numbering and to create unique labels if possible.
1291 */
1292 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1293 {
1294         struct in_ifaddr *ifa;
1295         int named = 0;
1296
1297         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1298                 char old[IFNAMSIZ], *dot;
1299
1300                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1301                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1302                 if (named++ == 0)
1303                         goto skip;
1304                 dot = strchr(old, ':');
1305                 if (dot == NULL) {
1306                         sprintf(old, ":%d", named);
1307                         dot = old;
1308                 }
1309                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1310                         strcat(ifa->ifa_label, dot);
1311                 else
1312                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1313 skip:
1314                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1315         }
1316 }
1317
1318 static bool inetdev_valid_mtu(unsigned int mtu)
1319 {
1320         return mtu >= 68;
1321 }
1322
1323 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1324                                         struct in_device *in_dev)
1325
1326 {
1327         struct in_ifaddr *ifa;
1328
1329         for (ifa = in_dev->ifa_list; ifa;
1330              ifa = ifa->ifa_next) {
1331                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1332                          ifa->ifa_local, dev,
1333                          ifa->ifa_local, NULL,
1334                          dev->dev_addr, NULL);
1335         }
1336 }
1337
1338 /* Called only under RTNL semaphore */
1339
1340 static int inetdev_event(struct notifier_block *this, unsigned long event,
1341                          void *ptr)
1342 {
1343         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1344         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1345
1346         ASSERT_RTNL();
1347
1348         if (!in_dev) {
1349                 if (event == NETDEV_REGISTER) {
1350                         in_dev = inetdev_init(dev);
1351                         if (!in_dev)
1352                                 return notifier_from_errno(-ENOMEM);
1353                         if (dev->flags & IFF_LOOPBACK) {
1354                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1355                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1356                         }
1357                 } else if (event == NETDEV_CHANGEMTU) {
1358                         /* Re-enabling IP */
1359                         if (inetdev_valid_mtu(dev->mtu))
1360                                 in_dev = inetdev_init(dev);
1361                 }
1362                 goto out;
1363         }
1364
1365         switch (event) {
1366         case NETDEV_REGISTER:
1367                 pr_debug("%s: bug\n", __func__);
1368                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1369                 break;
1370         case NETDEV_UP:
1371                 if (!inetdev_valid_mtu(dev->mtu))
1372                         break;
1373                 if (dev->flags & IFF_LOOPBACK) {
1374                         struct in_ifaddr *ifa = inet_alloc_ifa();
1375
1376                         if (ifa) {
1377                                 INIT_HLIST_NODE(&ifa->hash);
1378                                 ifa->ifa_local =
1379                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1380                                 ifa->ifa_prefixlen = 8;
1381                                 ifa->ifa_mask = inet_make_mask(8);
1382                                 in_dev_hold(in_dev);
1383                                 ifa->ifa_dev = in_dev;
1384                                 ifa->ifa_scope = RT_SCOPE_HOST;
1385                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1386                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1387                                                  INFINITY_LIFE_TIME);
1388                                 ipv4_devconf_setall(in_dev);
1389                                 neigh_parms_data_state_setall(in_dev->arp_parms);
1390                                 inet_insert_ifa(ifa);
1391                         }
1392                 }
1393                 ip_mc_up(in_dev);
1394                 /* fall through */
1395         case NETDEV_CHANGEADDR:
1396                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1397                         break;
1398                 /* fall through */
1399         case NETDEV_NOTIFY_PEERS:
1400                 /* Send gratuitous ARP to notify of link change */
1401                 inetdev_send_gratuitous_arp(dev, in_dev);
1402                 break;
1403         case NETDEV_DOWN:
1404                 ip_mc_down(in_dev);
1405                 break;
1406         case NETDEV_PRE_TYPE_CHANGE:
1407                 ip_mc_unmap(in_dev);
1408                 break;
1409         case NETDEV_POST_TYPE_CHANGE:
1410                 ip_mc_remap(in_dev);
1411                 break;
1412         case NETDEV_CHANGEMTU:
1413                 if (inetdev_valid_mtu(dev->mtu))
1414                         break;
1415                 /* disable IP when MTU is not enough */
1416         case NETDEV_UNREGISTER:
1417                 inetdev_destroy(in_dev);
1418                 break;
1419         case NETDEV_CHANGENAME:
1420                 /* Do not notify about label change, this event is
1421                  * not interesting to applications using netlink.
1422                  */
1423                 inetdev_changename(dev, in_dev);
1424
1425                 devinet_sysctl_unregister(in_dev);
1426                 devinet_sysctl_register(in_dev);
1427                 break;
1428         }
1429 out:
1430         return NOTIFY_DONE;
1431 }
1432
1433 static struct notifier_block ip_netdev_notifier = {
1434         .notifier_call = inetdev_event,
1435 };
1436
1437 static size_t inet_nlmsg_size(void)
1438 {
1439         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1440                + nla_total_size(4) /* IFA_ADDRESS */
1441                + nla_total_size(4) /* IFA_LOCAL */
1442                + nla_total_size(4) /* IFA_BROADCAST */
1443                + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1444                + nla_total_size(4);  /* IFA_FLAGS */
1445 }
1446
1447 static inline u32 cstamp_delta(unsigned long cstamp)
1448 {
1449         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1450 }
1451
1452 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1453                          unsigned long tstamp, u32 preferred, u32 valid)
1454 {
1455         struct ifa_cacheinfo ci;
1456
1457         ci.cstamp = cstamp_delta(cstamp);
1458         ci.tstamp = cstamp_delta(tstamp);
1459         ci.ifa_prefered = preferred;
1460         ci.ifa_valid = valid;
1461
1462         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1463 }
1464
1465 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1466                             u32 portid, u32 seq, int event, unsigned int flags)
1467 {
1468         struct ifaddrmsg *ifm;
1469         struct nlmsghdr  *nlh;
1470         u32 preferred, valid;
1471
1472         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1473         if (nlh == NULL)
1474                 return -EMSGSIZE;
1475
1476         ifm = nlmsg_data(nlh);
1477         ifm->ifa_family = AF_INET;
1478         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1479         ifm->ifa_flags = ifa->ifa_flags;
1480         ifm->ifa_scope = ifa->ifa_scope;
1481         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1482
1483         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1484                 preferred = ifa->ifa_preferred_lft;
1485                 valid = ifa->ifa_valid_lft;
1486                 if (preferred != INFINITY_LIFE_TIME) {
1487                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1488
1489                         if (preferred > tval)
1490                                 preferred -= tval;
1491                         else
1492                                 preferred = 0;
1493                         if (valid != INFINITY_LIFE_TIME) {
1494                                 if (valid > tval)
1495                                         valid -= tval;
1496                                 else
1497                                         valid = 0;
1498                         }
1499                 }
1500         } else {
1501                 preferred = INFINITY_LIFE_TIME;
1502                 valid = INFINITY_LIFE_TIME;
1503         }
1504         if ((ifa->ifa_address &&
1505              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1506             (ifa->ifa_local &&
1507              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1508             (ifa->ifa_broadcast &&
1509              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1510             (ifa->ifa_label[0] &&
1511              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1512             nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1513             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1514                           preferred, valid))
1515                 goto nla_put_failure;
1516
1517         return nlmsg_end(skb, nlh);
1518
1519 nla_put_failure:
1520         nlmsg_cancel(skb, nlh);
1521         return -EMSGSIZE;
1522 }
1523
1524 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1525 {
1526         struct net *net = sock_net(skb->sk);
1527         int h, s_h;
1528         int idx, s_idx;
1529         int ip_idx, s_ip_idx;
1530         struct net_device *dev;
1531         struct in_device *in_dev;
1532         struct in_ifaddr *ifa;
1533         struct hlist_head *head;
1534
1535         s_h = cb->args[0];
1536         s_idx = idx = cb->args[1];
1537         s_ip_idx = ip_idx = cb->args[2];
1538
1539         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1540                 idx = 0;
1541                 head = &net->dev_index_head[h];
1542                 rcu_read_lock();
1543                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1544                           net->dev_base_seq;
1545                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1546                         if (idx < s_idx)
1547                                 goto cont;
1548                         if (h > s_h || idx > s_idx)
1549                                 s_ip_idx = 0;
1550                         in_dev = __in_dev_get_rcu(dev);
1551                         if (!in_dev)
1552                                 goto cont;
1553
1554                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1555                              ifa = ifa->ifa_next, ip_idx++) {
1556                                 if (ip_idx < s_ip_idx)
1557                                         continue;
1558                                 if (inet_fill_ifaddr(skb, ifa,
1559                                              NETLINK_CB(cb->skb).portid,
1560                                              cb->nlh->nlmsg_seq,
1561                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1562                                         rcu_read_unlock();
1563                                         goto done;
1564                                 }
1565                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1566                         }
1567 cont:
1568                         idx++;
1569                 }
1570                 rcu_read_unlock();
1571         }
1572
1573 done:
1574         cb->args[0] = h;
1575         cb->args[1] = idx;
1576         cb->args[2] = ip_idx;
1577
1578         return skb->len;
1579 }
1580
1581 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1582                       u32 portid)
1583 {
1584         struct sk_buff *skb;
1585         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1586         int err = -ENOBUFS;
1587         struct net *net;
1588
1589         net = dev_net(ifa->ifa_dev->dev);
1590         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1591         if (skb == NULL)
1592                 goto errout;
1593
1594         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1595         if (err < 0) {
1596                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1597                 WARN_ON(err == -EMSGSIZE);
1598                 kfree_skb(skb);
1599                 goto errout;
1600         }
1601         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1602         return;
1603 errout:
1604         if (err < 0)
1605                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1606 }
1607
1608 static size_t inet_get_link_af_size(const struct net_device *dev)
1609 {
1610         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1611
1612         if (!in_dev)
1613                 return 0;
1614
1615         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1616 }
1617
1618 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1619 {
1620         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1621         struct nlattr *nla;
1622         int i;
1623
1624         if (!in_dev)
1625                 return -ENODATA;
1626
1627         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1628         if (nla == NULL)
1629                 return -EMSGSIZE;
1630
1631         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1632                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1633
1634         return 0;
1635 }
1636
1637 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1638         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1639 };
1640
1641 static int inet_validate_link_af(const struct net_device *dev,
1642                                  const struct nlattr *nla)
1643 {
1644         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1645         int err, rem;
1646
1647         if (dev && !__in_dev_get_rtnl(dev))
1648                 return -EAFNOSUPPORT;
1649
1650         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1651         if (err < 0)
1652                 return err;
1653
1654         if (tb[IFLA_INET_CONF]) {
1655                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1656                         int cfgid = nla_type(a);
1657
1658                         if (nla_len(a) < 4)
1659                                 return -EINVAL;
1660
1661                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1662                                 return -EINVAL;
1663                 }
1664         }
1665
1666         return 0;
1667 }
1668
1669 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1670 {
1671         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1672         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1673         int rem;
1674
1675         if (!in_dev)
1676                 return -EAFNOSUPPORT;
1677
1678         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1679                 BUG();
1680
1681         if (tb[IFLA_INET_CONF]) {
1682                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1683                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1684         }
1685
1686         return 0;
1687 }
1688
1689 static int inet_netconf_msgsize_devconf(int type)
1690 {
1691         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1692                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1693
1694         /* type -1 is used for ALL */
1695         if (type == -1 || type == NETCONFA_FORWARDING)
1696                 size += nla_total_size(4);
1697         if (type == -1 || type == NETCONFA_RP_FILTER)
1698                 size += nla_total_size(4);
1699         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1700                 size += nla_total_size(4);
1701         if (type == -1 || type == NETCONFA_PROXY_NEIGH)
1702                 size += nla_total_size(4);
1703
1704         return size;
1705 }
1706
1707 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1708                                      struct ipv4_devconf *devconf, u32 portid,
1709                                      u32 seq, int event, unsigned int flags,
1710                                      int type)
1711 {
1712         struct nlmsghdr  *nlh;
1713         struct netconfmsg *ncm;
1714
1715         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1716                         flags);
1717         if (nlh == NULL)
1718                 return -EMSGSIZE;
1719
1720         ncm = nlmsg_data(nlh);
1721         ncm->ncm_family = AF_INET;
1722
1723         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1724                 goto nla_put_failure;
1725
1726         /* type -1 is used for ALL */
1727         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1728             nla_put_s32(skb, NETCONFA_FORWARDING,
1729                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1730                 goto nla_put_failure;
1731         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1732             nla_put_s32(skb, NETCONFA_RP_FILTER,
1733                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1734                 goto nla_put_failure;
1735         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1736             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1737                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1738                 goto nla_put_failure;
1739         if ((type == -1 || type == NETCONFA_PROXY_NEIGH) &&
1740             nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
1741                         IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
1742                 goto nla_put_failure;
1743
1744         return nlmsg_end(skb, nlh);
1745
1746 nla_put_failure:
1747         nlmsg_cancel(skb, nlh);
1748         return -EMSGSIZE;
1749 }
1750
1751 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1752                                  struct ipv4_devconf *devconf)
1753 {
1754         struct sk_buff *skb;
1755         int err = -ENOBUFS;
1756
1757         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1758         if (skb == NULL)
1759                 goto errout;
1760
1761         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1762                                         RTM_NEWNETCONF, 0, type);
1763         if (err < 0) {
1764                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1765                 WARN_ON(err == -EMSGSIZE);
1766                 kfree_skb(skb);
1767                 goto errout;
1768         }
1769         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1770         return;
1771 errout:
1772         if (err < 0)
1773                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1774 }
1775
1776 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1777         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1778         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1779         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1780         [NETCONFA_PROXY_NEIGH]  = { .len = sizeof(int) },
1781 };
1782
1783 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1784                                     struct nlmsghdr *nlh)
1785 {
1786         struct net *net = sock_net(in_skb->sk);
1787         struct nlattr *tb[NETCONFA_MAX+1];
1788         struct netconfmsg *ncm;
1789         struct sk_buff *skb;
1790         struct ipv4_devconf *devconf;
1791         struct in_device *in_dev;
1792         struct net_device *dev;
1793         int ifindex;
1794         int err;
1795
1796         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1797                           devconf_ipv4_policy);
1798         if (err < 0)
1799                 goto errout;
1800
1801         err = EINVAL;
1802         if (!tb[NETCONFA_IFINDEX])
1803                 goto errout;
1804
1805         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1806         switch (ifindex) {
1807         case NETCONFA_IFINDEX_ALL:
1808                 devconf = net->ipv4.devconf_all;
1809                 break;
1810         case NETCONFA_IFINDEX_DEFAULT:
1811                 devconf = net->ipv4.devconf_dflt;
1812                 break;
1813         default:
1814                 dev = __dev_get_by_index(net, ifindex);
1815                 if (dev == NULL)
1816                         goto errout;
1817                 in_dev = __in_dev_get_rtnl(dev);
1818                 if (in_dev == NULL)
1819                         goto errout;
1820                 devconf = &in_dev->cnf;
1821                 break;
1822         }
1823
1824         err = -ENOBUFS;
1825         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1826         if (skb == NULL)
1827                 goto errout;
1828
1829         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1830                                         NETLINK_CB(in_skb).portid,
1831                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1832                                         -1);
1833         if (err < 0) {
1834                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1835                 WARN_ON(err == -EMSGSIZE);
1836                 kfree_skb(skb);
1837                 goto errout;
1838         }
1839         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1840 errout:
1841         return err;
1842 }
1843
1844 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1845                                      struct netlink_callback *cb)
1846 {
1847         struct net *net = sock_net(skb->sk);
1848         int h, s_h;
1849         int idx, s_idx;
1850         struct net_device *dev;
1851         struct in_device *in_dev;
1852         struct hlist_head *head;
1853
1854         s_h = cb->args[0];
1855         s_idx = idx = cb->args[1];
1856
1857         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1858                 idx = 0;
1859                 head = &net->dev_index_head[h];
1860                 rcu_read_lock();
1861                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1862                           net->dev_base_seq;
1863                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1864                         if (idx < s_idx)
1865                                 goto cont;
1866                         in_dev = __in_dev_get_rcu(dev);
1867                         if (!in_dev)
1868                                 goto cont;
1869
1870                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1871                                                       &in_dev->cnf,
1872                                                       NETLINK_CB(cb->skb).portid,
1873                                                       cb->nlh->nlmsg_seq,
1874                                                       RTM_NEWNETCONF,
1875                                                       NLM_F_MULTI,
1876                                                       -1) <= 0) {
1877                                 rcu_read_unlock();
1878                                 goto done;
1879                         }
1880                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1881 cont:
1882                         idx++;
1883                 }
1884                 rcu_read_unlock();
1885         }
1886         if (h == NETDEV_HASHENTRIES) {
1887                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1888                                               net->ipv4.devconf_all,
1889                                               NETLINK_CB(cb->skb).portid,
1890                                               cb->nlh->nlmsg_seq,
1891                                               RTM_NEWNETCONF, NLM_F_MULTI,
1892                                               -1) <= 0)
1893                         goto done;
1894                 else
1895                         h++;
1896         }
1897         if (h == NETDEV_HASHENTRIES + 1) {
1898                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1899                                               net->ipv4.devconf_dflt,
1900                                               NETLINK_CB(cb->skb).portid,
1901                                               cb->nlh->nlmsg_seq,
1902                                               RTM_NEWNETCONF, NLM_F_MULTI,
1903                                               -1) <= 0)
1904                         goto done;
1905                 else
1906                         h++;
1907         }
1908 done:
1909         cb->args[0] = h;
1910         cb->args[1] = idx;
1911
1912         return skb->len;
1913 }
1914
1915 #ifdef CONFIG_SYSCTL
1916
1917 static void devinet_copy_dflt_conf(struct net *net, int i)
1918 {
1919         struct net_device *dev;
1920
1921         rcu_read_lock();
1922         for_each_netdev_rcu(net, dev) {
1923                 struct in_device *in_dev;
1924
1925                 in_dev = __in_dev_get_rcu(dev);
1926                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1927                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1928         }
1929         rcu_read_unlock();
1930 }
1931
1932 /* called with RTNL locked */
1933 static void inet_forward_change(struct net *net)
1934 {
1935         struct net_device *dev;
1936         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1937
1938         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1939         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1940         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1941                                     NETCONFA_IFINDEX_ALL,
1942                                     net->ipv4.devconf_all);
1943         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1944                                     NETCONFA_IFINDEX_DEFAULT,
1945                                     net->ipv4.devconf_dflt);
1946
1947         for_each_netdev(net, dev) {
1948                 struct in_device *in_dev;
1949                 if (on)
1950                         dev_disable_lro(dev);
1951                 rcu_read_lock();
1952                 in_dev = __in_dev_get_rcu(dev);
1953                 if (in_dev) {
1954                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1955                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1956                                                     dev->ifindex, &in_dev->cnf);
1957                 }
1958                 rcu_read_unlock();
1959         }
1960 }
1961
1962 static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
1963 {
1964         if (cnf == net->ipv4.devconf_dflt)
1965                 return NETCONFA_IFINDEX_DEFAULT;
1966         else if (cnf == net->ipv4.devconf_all)
1967                 return NETCONFA_IFINDEX_ALL;
1968         else {
1969                 struct in_device *idev
1970                         = container_of(cnf, struct in_device, cnf);
1971                 return idev->dev->ifindex;
1972         }
1973 }
1974
1975 static int devinet_conf_proc(struct ctl_table *ctl, int write,
1976                              void __user *buffer,
1977                              size_t *lenp, loff_t *ppos)
1978 {
1979         int old_value = *(int *)ctl->data;
1980         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1981         int new_value = *(int *)ctl->data;
1982
1983         if (write) {
1984                 struct ipv4_devconf *cnf = ctl->extra1;
1985                 struct net *net = ctl->extra2;
1986                 int i = (int *)ctl->data - cnf->data;
1987                 int ifindex;
1988
1989                 set_bit(i, cnf->state);
1990
1991                 if (cnf == net->ipv4.devconf_dflt)
1992                         devinet_copy_dflt_conf(net, i);
1993                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1994                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1995                         if ((new_value == 0) && (old_value != 0))
1996                                 rt_cache_flush(net);
1997
1998                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1999                     new_value != old_value) {
2000                         ifindex = devinet_conf_ifindex(net, cnf);
2001                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
2002                                                     ifindex, cnf);
2003                 }
2004                 if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2005                     new_value != old_value) {
2006                         ifindex = devinet_conf_ifindex(net, cnf);
2007                         inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
2008                                                     ifindex, cnf);
2009                 }
2010         }
2011
2012         return ret;
2013 }
2014
2015 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2016                                   void __user *buffer,
2017                                   size_t *lenp, loff_t *ppos)
2018 {
2019         int *valp = ctl->data;
2020         int val = *valp;
2021         loff_t pos = *ppos;
2022         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2023
2024         if (write && *valp != val) {
2025                 struct net *net = ctl->extra2;
2026
2027                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2028                         if (!rtnl_trylock()) {
2029                                 /* Restore the original values before restarting */
2030                                 *valp = val;
2031                                 *ppos = pos;
2032                                 return restart_syscall();
2033                         }
2034                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2035                                 inet_forward_change(net);
2036                         } else {
2037                                 struct ipv4_devconf *cnf = ctl->extra1;
2038                                 struct in_device *idev =
2039                                         container_of(cnf, struct in_device, cnf);
2040                                 if (*valp)
2041                                         dev_disable_lro(idev->dev);
2042                                 inet_netconf_notify_devconf(net,
2043                                                             NETCONFA_FORWARDING,
2044                                                             idev->dev->ifindex,
2045                                                             cnf);
2046                         }
2047                         rtnl_unlock();
2048                         rt_cache_flush(net);
2049                 } else
2050                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
2051                                                     NETCONFA_IFINDEX_DEFAULT,
2052                                                     net->ipv4.devconf_dflt);
2053         }
2054
2055         return ret;
2056 }
2057
2058 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2059                                 void __user *buffer,
2060                                 size_t *lenp, loff_t *ppos)
2061 {
2062         int *valp = ctl->data;
2063         int val = *valp;
2064         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2065         struct net *net = ctl->extra2;
2066
2067         if (write && *valp != val)
2068                 rt_cache_flush(net);
2069
2070         return ret;
2071 }
2072
2073 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2074         { \
2075                 .procname       = name, \
2076                 .data           = ipv4_devconf.data + \
2077                                   IPV4_DEVCONF_ ## attr - 1, \
2078                 .maxlen         = sizeof(int), \
2079                 .mode           = mval, \
2080                 .proc_handler   = proc, \
2081                 .extra1         = &ipv4_devconf, \
2082         }
2083
2084 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2085         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2086
2087 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2088         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2089
2090 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2091         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2092
2093 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2094         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2095
2096 static struct devinet_sysctl_table {
2097         struct ctl_table_header *sysctl_header;
2098         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2099 } devinet_sysctl = {
2100         .devinet_vars = {
2101                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2102                                              devinet_sysctl_forward),
2103                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2104
2105                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2106                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2107                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2108                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2109                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2110                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2111                                         "accept_source_route"),
2112                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2113                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2114                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2115                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2116                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2117                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2118                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2119                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2120                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2121                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2122                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2123                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2124                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2125                 DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2126                                         "force_igmp_version"),
2127                 DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2128                                         "igmpv2_unsolicited_report_interval"),
2129                 DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2130                                         "igmpv3_unsolicited_report_interval"),
2131
2132                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2133                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2134                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2135                                               "promote_secondaries"),
2136                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2137                                               "route_localnet"),
2138         },
2139 };
2140
2141 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2142                                         struct ipv4_devconf *p)
2143 {
2144         int i;
2145         struct devinet_sysctl_table *t;
2146         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2147
2148         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2149         if (!t)
2150                 goto out;
2151
2152         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2153                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2154                 t->devinet_vars[i].extra1 = p;
2155                 t->devinet_vars[i].extra2 = net;
2156         }
2157
2158         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2159
2160         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2161         if (!t->sysctl_header)
2162                 goto free;
2163
2164         p->sysctl = t;
2165         return 0;
2166
2167 free:
2168         kfree(t);
2169 out:
2170         return -ENOBUFS;
2171 }
2172
2173 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2174 {
2175         struct devinet_sysctl_table *t = cnf->sysctl;
2176
2177         if (t == NULL)
2178                 return;
2179
2180         cnf->sysctl = NULL;
2181         unregister_net_sysctl_table(t->sysctl_header);
2182         kfree(t);
2183 }
2184
2185 static void devinet_sysctl_register(struct in_device *idev)
2186 {
2187         neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2188         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2189                                         &idev->cnf);
2190 }
2191
2192 static void devinet_sysctl_unregister(struct in_device *idev)
2193 {
2194         __devinet_sysctl_unregister(&idev->cnf);
2195         neigh_sysctl_unregister(idev->arp_parms);
2196 }
2197
2198 static struct ctl_table ctl_forward_entry[] = {
2199         {
2200                 .procname       = "ip_forward",
2201                 .data           = &ipv4_devconf.data[
2202                                         IPV4_DEVCONF_FORWARDING - 1],
2203                 .maxlen         = sizeof(int),
2204                 .mode           = 0644,
2205                 .proc_handler   = devinet_sysctl_forward,
2206                 .extra1         = &ipv4_devconf,
2207                 .extra2         = &init_net,
2208         },
2209         { },
2210 };
2211 #endif
2212
2213 static __net_init int devinet_init_net(struct net *net)
2214 {
2215         int err;
2216         struct ipv4_devconf *all, *dflt;
2217 #ifdef CONFIG_SYSCTL
2218         struct ctl_table *tbl = ctl_forward_entry;
2219         struct ctl_table_header *forw_hdr;
2220 #endif
2221
2222         err = -ENOMEM;
2223         all = &ipv4_devconf;
2224         dflt = &ipv4_devconf_dflt;
2225
2226         if (!net_eq(net, &init_net)) {
2227                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2228                 if (all == NULL)
2229                         goto err_alloc_all;
2230
2231                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2232                 if (dflt == NULL)
2233                         goto err_alloc_dflt;
2234
2235 #ifdef CONFIG_SYSCTL
2236                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2237                 if (tbl == NULL)
2238                         goto err_alloc_ctl;
2239
2240                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2241                 tbl[0].extra1 = all;
2242                 tbl[0].extra2 = net;
2243 #endif
2244         }
2245
2246 #ifdef CONFIG_SYSCTL
2247         err = __devinet_sysctl_register(net, "all", all);
2248         if (err < 0)
2249                 goto err_reg_all;
2250
2251         err = __devinet_sysctl_register(net, "default", dflt);
2252         if (err < 0)
2253                 goto err_reg_dflt;
2254
2255         err = -ENOMEM;
2256         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2257         if (forw_hdr == NULL)
2258                 goto err_reg_ctl;
2259         net->ipv4.forw_hdr = forw_hdr;
2260 #endif
2261
2262         net->ipv4.devconf_all = all;
2263         net->ipv4.devconf_dflt = dflt;
2264         return 0;
2265
2266 #ifdef CONFIG_SYSCTL
2267 err_reg_ctl:
2268         __devinet_sysctl_unregister(dflt);
2269 err_reg_dflt:
2270         __devinet_sysctl_unregister(all);
2271 err_reg_all:
2272         if (tbl != ctl_forward_entry)
2273                 kfree(tbl);
2274 err_alloc_ctl:
2275 #endif
2276         if (dflt != &ipv4_devconf_dflt)
2277                 kfree(dflt);
2278 err_alloc_dflt:
2279         if (all != &ipv4_devconf)
2280                 kfree(all);
2281 err_alloc_all:
2282         return err;
2283 }
2284
2285 static __net_exit void devinet_exit_net(struct net *net)
2286 {
2287 #ifdef CONFIG_SYSCTL
2288         struct ctl_table *tbl;
2289
2290         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2291         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2292         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2293         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2294         kfree(tbl);
2295 #endif
2296         kfree(net->ipv4.devconf_dflt);
2297         kfree(net->ipv4.devconf_all);
2298 }
2299
2300 static __net_initdata struct pernet_operations devinet_ops = {
2301         .init = devinet_init_net,
2302         .exit = devinet_exit_net,
2303 };
2304
2305 static struct rtnl_af_ops inet_af_ops = {
2306         .family           = AF_INET,
2307         .fill_link_af     = inet_fill_link_af,
2308         .get_link_af_size = inet_get_link_af_size,
2309         .validate_link_af = inet_validate_link_af,
2310         .set_link_af      = inet_set_link_af,
2311 };
2312
2313 void __init devinet_init(void)
2314 {
2315         int i;
2316
2317         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2318                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2319
2320         register_pernet_subsys(&devinet_ops);
2321
2322         register_gifconf(PF_INET, inet_gifconf);
2323         register_netdevice_notifier(&ip_netdev_notifier);
2324
2325         schedule_delayed_work(&check_lifetime_work, 0);
2326
2327         rtnl_af_register(&inet_af_ops);
2328
2329         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2330         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2331         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2332         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2333                       inet_netconf_dump_devconf, NULL);
2334 }
2335