]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
ipv4: provide addr and netconf dump consistency info
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66 #include <net/addrconf.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97         [IFA_CACHEINFO]         = { .len = sizeof(struct ifa_cacheinfo) },
98 };
99
100 #define IN4_ADDR_HSIZE_SHIFT    8
101 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
102
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static u32 inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ net_hash_mix(net);
109
110         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
111 }
112
113 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
114 {
115         u32 hash = inet_addr_hash(net, ifa->ifa_local);
116
117         spin_lock(&inet_addr_hash_lock);
118         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
119         spin_unlock(&inet_addr_hash_lock);
120 }
121
122 static void inet_hash_remove(struct in_ifaddr *ifa)
123 {
124         spin_lock(&inet_addr_hash_lock);
125         hlist_del_init_rcu(&ifa->hash);
126         spin_unlock(&inet_addr_hash_lock);
127 }
128
129 /**
130  * __ip_dev_find - find the first device with a given source address.
131  * @net: the net namespace
132  * @addr: the source address
133  * @devref: if true, take a reference on the found device
134  *
135  * If a caller uses devref=false, it should be protected by RCU, or RTNL
136  */
137 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
138 {
139         u32 hash = inet_addr_hash(net, addr);
140         struct net_device *result = NULL;
141         struct in_ifaddr *ifa;
142
143         rcu_read_lock();
144         hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
145                 if (ifa->ifa_local == addr) {
146                         struct net_device *dev = ifa->ifa_dev->dev;
147
148                         if (!net_eq(dev_net(dev), net))
149                                 continue;
150                         result = dev;
151                         break;
152                 }
153         }
154         if (!result) {
155                 struct flowi4 fl4 = { .daddr = addr };
156                 struct fib_result res = { 0 };
157                 struct fib_table *local;
158
159                 /* Fallback to FIB local table so that communication
160                  * over loopback subnets work.
161                  */
162                 local = fib_get_table(net, RT_TABLE_LOCAL);
163                 if (local &&
164                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
165                     res.type == RTN_LOCAL)
166                         result = FIB_RES_DEV(res);
167         }
168         if (result && devref)
169                 dev_hold(result);
170         rcu_read_unlock();
171         return result;
172 }
173 EXPORT_SYMBOL(__ip_dev_find);
174
175 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
176
177 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
178 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
179                          int destroy);
180 #ifdef CONFIG_SYSCTL
181 static void devinet_sysctl_register(struct in_device *idev);
182 static void devinet_sysctl_unregister(struct in_device *idev);
183 #else
184 static void devinet_sysctl_register(struct in_device *idev)
185 {
186 }
187 static void devinet_sysctl_unregister(struct in_device *idev)
188 {
189 }
190 #endif
191
192 /* Locks all the inet devices. */
193
194 static struct in_ifaddr *inet_alloc_ifa(void)
195 {
196         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
197 }
198
199 static void inet_rcu_free_ifa(struct rcu_head *head)
200 {
201         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
202         if (ifa->ifa_dev)
203                 in_dev_put(ifa->ifa_dev);
204         kfree(ifa);
205 }
206
207 static void inet_free_ifa(struct in_ifaddr *ifa)
208 {
209         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
210 }
211
212 void in_dev_finish_destroy(struct in_device *idev)
213 {
214         struct net_device *dev = idev->dev;
215
216         WARN_ON(idev->ifa_list);
217         WARN_ON(idev->mc_list);
218 #ifdef NET_REFCNT_DEBUG
219         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
220 #endif
221         dev_put(dev);
222         if (!idev->dead)
223                 pr_err("Freeing alive in_device %p\n", idev);
224         else
225                 kfree(idev);
226 }
227 EXPORT_SYMBOL(in_dev_finish_destroy);
228
229 static struct in_device *inetdev_init(struct net_device *dev)
230 {
231         struct in_device *in_dev;
232
233         ASSERT_RTNL();
234
235         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
236         if (!in_dev)
237                 goto out;
238         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
239                         sizeof(in_dev->cnf));
240         in_dev->cnf.sysctl = NULL;
241         in_dev->dev = dev;
242         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
243         if (!in_dev->arp_parms)
244                 goto out_kfree;
245         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
246                 dev_disable_lro(dev);
247         /* Reference in_dev->dev */
248         dev_hold(dev);
249         /* Account for reference dev->ip_ptr (below) */
250         in_dev_hold(in_dev);
251
252         devinet_sysctl_register(in_dev);
253         ip_mc_init_dev(in_dev);
254         if (dev->flags & IFF_UP)
255                 ip_mc_up(in_dev);
256
257         /* we can receive as soon as ip_ptr is set -- do this last */
258         rcu_assign_pointer(dev->ip_ptr, in_dev);
259 out:
260         return in_dev;
261 out_kfree:
262         kfree(in_dev);
263         in_dev = NULL;
264         goto out;
265 }
266
267 static void in_dev_rcu_put(struct rcu_head *head)
268 {
269         struct in_device *idev = container_of(head, struct in_device, rcu_head);
270         in_dev_put(idev);
271 }
272
273 static void inetdev_destroy(struct in_device *in_dev)
274 {
275         struct in_ifaddr *ifa;
276         struct net_device *dev;
277
278         ASSERT_RTNL();
279
280         dev = in_dev->dev;
281
282         in_dev->dead = 1;
283
284         ip_mc_destroy_dev(in_dev);
285
286         while ((ifa = in_dev->ifa_list) != NULL) {
287                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
288                 inet_free_ifa(ifa);
289         }
290
291         RCU_INIT_POINTER(dev->ip_ptr, NULL);
292
293         devinet_sysctl_unregister(in_dev);
294         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295         arp_ifdown(dev);
296
297         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
298 }
299
300 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
301 {
302         rcu_read_lock();
303         for_primary_ifa(in_dev) {
304                 if (inet_ifa_match(a, ifa)) {
305                         if (!b || inet_ifa_match(b, ifa)) {
306                                 rcu_read_unlock();
307                                 return 1;
308                         }
309                 }
310         } endfor_ifa(in_dev);
311         rcu_read_unlock();
312         return 0;
313 }
314
315 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
316                          int destroy, struct nlmsghdr *nlh, u32 portid)
317 {
318         struct in_ifaddr *promote = NULL;
319         struct in_ifaddr *ifa, *ifa1 = *ifap;
320         struct in_ifaddr *last_prim = in_dev->ifa_list;
321         struct in_ifaddr *prev_prom = NULL;
322         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
323
324         ASSERT_RTNL();
325
326         /* 1. Deleting primary ifaddr forces deletion all secondaries
327          * unless alias promotion is set
328          **/
329
330         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
331                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
332
333                 while ((ifa = *ifap1) != NULL) {
334                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
335                             ifa1->ifa_scope <= ifa->ifa_scope)
336                                 last_prim = ifa;
337
338                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
339                             ifa1->ifa_mask != ifa->ifa_mask ||
340                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
341                                 ifap1 = &ifa->ifa_next;
342                                 prev_prom = ifa;
343                                 continue;
344                         }
345
346                         if (!do_promote) {
347                                 inet_hash_remove(ifa);
348                                 *ifap1 = ifa->ifa_next;
349
350                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
351                                 blocking_notifier_call_chain(&inetaddr_chain,
352                                                 NETDEV_DOWN, ifa);
353                                 inet_free_ifa(ifa);
354                         } else {
355                                 promote = ifa;
356                                 break;
357                         }
358                 }
359         }
360
361         /* On promotion all secondaries from subnet are changing
362          * the primary IP, we must remove all their routes silently
363          * and later to add them back with new prefsrc. Do this
364          * while all addresses are on the device list.
365          */
366         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
367                 if (ifa1->ifa_mask == ifa->ifa_mask &&
368                     inet_ifa_match(ifa1->ifa_address, ifa))
369                         fib_del_ifaddr(ifa, ifa1);
370         }
371
372         /* 2. Unlink it */
373
374         *ifap = ifa1->ifa_next;
375         inet_hash_remove(ifa1);
376
377         /* 3. Announce address deletion */
378
379         /* Send message first, then call notifier.
380            At first sight, FIB update triggered by notifier
381            will refer to already deleted ifaddr, that could confuse
382            netlink listeners. It is not true: look, gated sees
383            that route deleted and if it still thinks that ifaddr
384            is valid, it will try to restore deleted routes... Grr.
385            So that, this order is correct.
386          */
387         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
388         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
389
390         if (promote) {
391                 struct in_ifaddr *next_sec = promote->ifa_next;
392
393                 if (prev_prom) {
394                         prev_prom->ifa_next = promote->ifa_next;
395                         promote->ifa_next = last_prim->ifa_next;
396                         last_prim->ifa_next = promote;
397                 }
398
399                 promote->ifa_flags &= ~IFA_F_SECONDARY;
400                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
401                 blocking_notifier_call_chain(&inetaddr_chain,
402                                 NETDEV_UP, promote);
403                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
404                         if (ifa1->ifa_mask != ifa->ifa_mask ||
405                             !inet_ifa_match(ifa1->ifa_address, ifa))
406                                         continue;
407                         fib_add_ifaddr(ifa);
408                 }
409
410         }
411         if (destroy)
412                 inet_free_ifa(ifa1);
413 }
414
415 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
416                          int destroy)
417 {
418         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
419 }
420
421 static void check_lifetime(struct work_struct *work);
422
423 static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
424
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426                              u32 portid)
427 {
428         struct in_device *in_dev = ifa->ifa_dev;
429         struct in_ifaddr *ifa1, **ifap, **last_primary;
430
431         ASSERT_RTNL();
432
433         if (!ifa->ifa_local) {
434                 inet_free_ifa(ifa);
435                 return 0;
436         }
437
438         ifa->ifa_flags &= ~IFA_F_SECONDARY;
439         last_primary = &in_dev->ifa_list;
440
441         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442              ifap = &ifa1->ifa_next) {
443                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444                     ifa->ifa_scope <= ifa1->ifa_scope)
445                         last_primary = &ifa1->ifa_next;
446                 if (ifa1->ifa_mask == ifa->ifa_mask &&
447                     inet_ifa_match(ifa1->ifa_address, ifa)) {
448                         if (ifa1->ifa_local == ifa->ifa_local) {
449                                 inet_free_ifa(ifa);
450                                 return -EEXIST;
451                         }
452                         if (ifa1->ifa_scope != ifa->ifa_scope) {
453                                 inet_free_ifa(ifa);
454                                 return -EINVAL;
455                         }
456                         ifa->ifa_flags |= IFA_F_SECONDARY;
457                 }
458         }
459
460         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461                 net_srandom(ifa->ifa_local);
462                 ifap = last_primary;
463         }
464
465         ifa->ifa_next = *ifap;
466         *ifap = ifa;
467
468         inet_hash_insert(dev_net(in_dev->dev), ifa);
469
470         cancel_delayed_work(&check_lifetime_work);
471         schedule_delayed_work(&check_lifetime_work, 0);
472
473         /* Send message first, then call notifier.
474            Notifier will trigger FIB update, so that
475            listeners of netlink will know about new ifaddr */
476         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
477         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
478
479         return 0;
480 }
481
482 static int inet_insert_ifa(struct in_ifaddr *ifa)
483 {
484         return __inet_insert_ifa(ifa, NULL, 0);
485 }
486
487 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
488 {
489         struct in_device *in_dev = __in_dev_get_rtnl(dev);
490
491         ASSERT_RTNL();
492
493         if (!in_dev) {
494                 inet_free_ifa(ifa);
495                 return -ENOBUFS;
496         }
497         ipv4_devconf_setall(in_dev);
498         if (ifa->ifa_dev != in_dev) {
499                 WARN_ON(ifa->ifa_dev);
500                 in_dev_hold(in_dev);
501                 ifa->ifa_dev = in_dev;
502         }
503         if (ipv4_is_loopback(ifa->ifa_local))
504                 ifa->ifa_scope = RT_SCOPE_HOST;
505         return inet_insert_ifa(ifa);
506 }
507
508 /* Caller must hold RCU or RTNL :
509  * We dont take a reference on found in_device
510  */
511 struct in_device *inetdev_by_index(struct net *net, int ifindex)
512 {
513         struct net_device *dev;
514         struct in_device *in_dev = NULL;
515
516         rcu_read_lock();
517         dev = dev_get_by_index_rcu(net, ifindex);
518         if (dev)
519                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
520         rcu_read_unlock();
521         return in_dev;
522 }
523 EXPORT_SYMBOL(inetdev_by_index);
524
525 /* Called only from RTNL semaphored context. No locks. */
526
527 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
528                                     __be32 mask)
529 {
530         ASSERT_RTNL();
531
532         for_primary_ifa(in_dev) {
533                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
534                         return ifa;
535         } endfor_ifa(in_dev);
536         return NULL;
537 }
538
539 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540 {
541         struct net *net = sock_net(skb->sk);
542         struct nlattr *tb[IFA_MAX+1];
543         struct in_device *in_dev;
544         struct ifaddrmsg *ifm;
545         struct in_ifaddr *ifa, **ifap;
546         int err = -EINVAL;
547
548         ASSERT_RTNL();
549
550         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
551         if (err < 0)
552                 goto errout;
553
554         ifm = nlmsg_data(nlh);
555         in_dev = inetdev_by_index(net, ifm->ifa_index);
556         if (in_dev == NULL) {
557                 err = -ENODEV;
558                 goto errout;
559         }
560
561         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
562              ifap = &ifa->ifa_next) {
563                 if (tb[IFA_LOCAL] &&
564                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
565                         continue;
566
567                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
568                         continue;
569
570                 if (tb[IFA_ADDRESS] &&
571                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
572                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
573                         continue;
574
575                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
576                 return 0;
577         }
578
579         err = -EADDRNOTAVAIL;
580 errout:
581         return err;
582 }
583
584 #define INFINITY_LIFE_TIME      0xFFFFFFFF
585
586 static void check_lifetime(struct work_struct *work)
587 {
588         unsigned long now, next, next_sec, next_sched;
589         struct in_ifaddr *ifa;
590         int i;
591
592         now = jiffies;
593         next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
594
595         rcu_read_lock();
596         for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597                 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
598                         unsigned long age;
599
600                         if (ifa->ifa_flags & IFA_F_PERMANENT)
601                                 continue;
602
603                         /* We try to batch several events at once. */
604                         age = (now - ifa->ifa_tstamp +
605                                ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
606
607                         if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608                             age >= ifa->ifa_valid_lft) {
609                                 struct in_ifaddr **ifap ;
610
611                                 rtnl_lock();
612                                 for (ifap = &ifa->ifa_dev->ifa_list;
613                                      *ifap != NULL; ifap = &ifa->ifa_next) {
614                                         if (*ifap == ifa)
615                                                 inet_del_ifa(ifa->ifa_dev,
616                                                              ifap, 1);
617                                 }
618                                 rtnl_unlock();
619                         } else if (ifa->ifa_preferred_lft ==
620                                    INFINITY_LIFE_TIME) {
621                                 continue;
622                         } else if (age >= ifa->ifa_preferred_lft) {
623                                 if (time_before(ifa->ifa_tstamp +
624                                                 ifa->ifa_valid_lft * HZ, next))
625                                         next = ifa->ifa_tstamp +
626                                                ifa->ifa_valid_lft * HZ;
627
628                                 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) {
629                                         ifa->ifa_flags |= IFA_F_DEPRECATED;
630                                         rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
631                                 }
632                         } else if (time_before(ifa->ifa_tstamp +
633                                                ifa->ifa_preferred_lft * HZ,
634                                                next)) {
635                                 next = ifa->ifa_tstamp +
636                                        ifa->ifa_preferred_lft * HZ;
637                         }
638                 }
639         }
640         rcu_read_unlock();
641
642         next_sec = round_jiffies_up(next);
643         next_sched = next;
644
645         /* If rounded timeout is accurate enough, accept it. */
646         if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
647                 next_sched = next_sec;
648
649         now = jiffies;
650         /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
651         if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
652                 next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
653
654         schedule_delayed_work(&check_lifetime_work, next_sched - now);
655 }
656
657 static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
658                              __u32 prefered_lft)
659 {
660         unsigned long timeout;
661
662         ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
663
664         timeout = addrconf_timeout_fixup(valid_lft, HZ);
665         if (addrconf_finite_timeout(timeout))
666                 ifa->ifa_valid_lft = timeout;
667         else
668                 ifa->ifa_flags |= IFA_F_PERMANENT;
669
670         timeout = addrconf_timeout_fixup(prefered_lft, HZ);
671         if (addrconf_finite_timeout(timeout)) {
672                 if (timeout == 0)
673                         ifa->ifa_flags |= IFA_F_DEPRECATED;
674                 ifa->ifa_preferred_lft = timeout;
675         }
676         ifa->ifa_tstamp = jiffies;
677         if (!ifa->ifa_cstamp)
678                 ifa->ifa_cstamp = ifa->ifa_tstamp;
679 }
680
681 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
682                                        __u32 *pvalid_lft, __u32 *pprefered_lft)
683 {
684         struct nlattr *tb[IFA_MAX+1];
685         struct in_ifaddr *ifa;
686         struct ifaddrmsg *ifm;
687         struct net_device *dev;
688         struct in_device *in_dev;
689         int err;
690
691         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
692         if (err < 0)
693                 goto errout;
694
695         ifm = nlmsg_data(nlh);
696         err = -EINVAL;
697         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
698                 goto errout;
699
700         dev = __dev_get_by_index(net, ifm->ifa_index);
701         err = -ENODEV;
702         if (dev == NULL)
703                 goto errout;
704
705         in_dev = __in_dev_get_rtnl(dev);
706         err = -ENOBUFS;
707         if (in_dev == NULL)
708                 goto errout;
709
710         ifa = inet_alloc_ifa();
711         if (ifa == NULL)
712                 /*
713                  * A potential indev allocation can be left alive, it stays
714                  * assigned to its device and is destroy with it.
715                  */
716                 goto errout;
717
718         ipv4_devconf_setall(in_dev);
719         in_dev_hold(in_dev);
720
721         if (tb[IFA_ADDRESS] == NULL)
722                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
723
724         INIT_HLIST_NODE(&ifa->hash);
725         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
726         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
727         ifa->ifa_flags = ifm->ifa_flags;
728         ifa->ifa_scope = ifm->ifa_scope;
729         ifa->ifa_dev = in_dev;
730
731         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
732         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
733
734         if (tb[IFA_BROADCAST])
735                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
736
737         if (tb[IFA_LABEL])
738                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
739         else
740                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
741
742         if (tb[IFA_CACHEINFO]) {
743                 struct ifa_cacheinfo *ci;
744
745                 ci = nla_data(tb[IFA_CACHEINFO]);
746                 if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
747                         err = -EINVAL;
748                         goto errout;
749                 }
750                 *pvalid_lft = ci->ifa_valid;
751                 *pprefered_lft = ci->ifa_prefered;
752         }
753
754         return ifa;
755
756 errout:
757         return ERR_PTR(err);
758 }
759
760 static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
761 {
762         struct in_device *in_dev = ifa->ifa_dev;
763         struct in_ifaddr *ifa1, **ifap;
764
765         if (!ifa->ifa_local)
766                 return NULL;
767
768         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
769              ifap = &ifa1->ifa_next) {
770                 if (ifa1->ifa_mask == ifa->ifa_mask &&
771                     inet_ifa_match(ifa1->ifa_address, ifa) &&
772                     ifa1->ifa_local == ifa->ifa_local)
773                         return ifa1;
774         }
775         return NULL;
776 }
777
778 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
779 {
780         struct net *net = sock_net(skb->sk);
781         struct in_ifaddr *ifa;
782         struct in_ifaddr *ifa_existing;
783         __u32 valid_lft = INFINITY_LIFE_TIME;
784         __u32 prefered_lft = INFINITY_LIFE_TIME;
785
786         ASSERT_RTNL();
787
788         ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft);
789         if (IS_ERR(ifa))
790                 return PTR_ERR(ifa);
791
792         ifa_existing = find_matching_ifa(ifa);
793         if (!ifa_existing) {
794                 /* It would be best to check for !NLM_F_CREATE here but
795                  * userspace alreay relies on not having to provide this.
796                  */
797                 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
798                 return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
799         } else {
800                 inet_free_ifa(ifa);
801
802                 if (nlh->nlmsg_flags & NLM_F_EXCL ||
803                     !(nlh->nlmsg_flags & NLM_F_REPLACE))
804                         return -EEXIST;
805
806                 set_ifa_lifetime(ifa_existing, valid_lft, prefered_lft);
807         }
808         return 0;
809 }
810
811 /*
812  *      Determine a default network mask, based on the IP address.
813  */
814
815 static int inet_abc_len(__be32 addr)
816 {
817         int rc = -1;    /* Something else, probably a multicast. */
818
819         if (ipv4_is_zeronet(addr))
820                 rc = 0;
821         else {
822                 __u32 haddr = ntohl(addr);
823
824                 if (IN_CLASSA(haddr))
825                         rc = 8;
826                 else if (IN_CLASSB(haddr))
827                         rc = 16;
828                 else if (IN_CLASSC(haddr))
829                         rc = 24;
830         }
831
832         return rc;
833 }
834
835
836 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
837 {
838         struct ifreq ifr;
839         struct sockaddr_in sin_orig;
840         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
841         struct in_device *in_dev;
842         struct in_ifaddr **ifap = NULL;
843         struct in_ifaddr *ifa = NULL;
844         struct net_device *dev;
845         char *colon;
846         int ret = -EFAULT;
847         int tryaddrmatch = 0;
848
849         /*
850          *      Fetch the caller's info block into kernel space
851          */
852
853         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
854                 goto out;
855         ifr.ifr_name[IFNAMSIZ - 1] = 0;
856
857         /* save original address for comparison */
858         memcpy(&sin_orig, sin, sizeof(*sin));
859
860         colon = strchr(ifr.ifr_name, ':');
861         if (colon)
862                 *colon = 0;
863
864         dev_load(net, ifr.ifr_name);
865
866         switch (cmd) {
867         case SIOCGIFADDR:       /* Get interface address */
868         case SIOCGIFBRDADDR:    /* Get the broadcast address */
869         case SIOCGIFDSTADDR:    /* Get the destination address */
870         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
871                 /* Note that these ioctls will not sleep,
872                    so that we do not impose a lock.
873                    One day we will be forced to put shlock here (I mean SMP)
874                  */
875                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
876                 memset(sin, 0, sizeof(*sin));
877                 sin->sin_family = AF_INET;
878                 break;
879
880         case SIOCSIFFLAGS:
881                 ret = -EPERM;
882                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
883                         goto out;
884                 break;
885         case SIOCSIFADDR:       /* Set interface address (and family) */
886         case SIOCSIFBRDADDR:    /* Set the broadcast address */
887         case SIOCSIFDSTADDR:    /* Set the destination address */
888         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
889                 ret = -EPERM;
890                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
891                         goto out;
892                 ret = -EINVAL;
893                 if (sin->sin_family != AF_INET)
894                         goto out;
895                 break;
896         default:
897                 ret = -EINVAL;
898                 goto out;
899         }
900
901         rtnl_lock();
902
903         ret = -ENODEV;
904         dev = __dev_get_by_name(net, ifr.ifr_name);
905         if (!dev)
906                 goto done;
907
908         if (colon)
909                 *colon = ':';
910
911         in_dev = __in_dev_get_rtnl(dev);
912         if (in_dev) {
913                 if (tryaddrmatch) {
914                         /* Matthias Andree */
915                         /* compare label and address (4.4BSD style) */
916                         /* note: we only do this for a limited set of ioctls
917                            and only if the original address family was AF_INET.
918                            This is checked above. */
919                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
920                              ifap = &ifa->ifa_next) {
921                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
922                                     sin_orig.sin_addr.s_addr ==
923                                                         ifa->ifa_local) {
924                                         break; /* found */
925                                 }
926                         }
927                 }
928                 /* we didn't get a match, maybe the application is
929                    4.3BSD-style and passed in junk so we fall back to
930                    comparing just the label */
931                 if (!ifa) {
932                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
933                              ifap = &ifa->ifa_next)
934                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
935                                         break;
936                 }
937         }
938
939         ret = -EADDRNOTAVAIL;
940         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
941                 goto done;
942
943         switch (cmd) {
944         case SIOCGIFADDR:       /* Get interface address */
945                 sin->sin_addr.s_addr = ifa->ifa_local;
946                 goto rarok;
947
948         case SIOCGIFBRDADDR:    /* Get the broadcast address */
949                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
950                 goto rarok;
951
952         case SIOCGIFDSTADDR:    /* Get the destination address */
953                 sin->sin_addr.s_addr = ifa->ifa_address;
954                 goto rarok;
955
956         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
957                 sin->sin_addr.s_addr = ifa->ifa_mask;
958                 goto rarok;
959
960         case SIOCSIFFLAGS:
961                 if (colon) {
962                         ret = -EADDRNOTAVAIL;
963                         if (!ifa)
964                                 break;
965                         ret = 0;
966                         if (!(ifr.ifr_flags & IFF_UP))
967                                 inet_del_ifa(in_dev, ifap, 1);
968                         break;
969                 }
970                 ret = dev_change_flags(dev, ifr.ifr_flags);
971                 break;
972
973         case SIOCSIFADDR:       /* Set interface address (and family) */
974                 ret = -EINVAL;
975                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
976                         break;
977
978                 if (!ifa) {
979                         ret = -ENOBUFS;
980                         ifa = inet_alloc_ifa();
981                         if (!ifa)
982                                 break;
983                         INIT_HLIST_NODE(&ifa->hash);
984                         if (colon)
985                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
986                         else
987                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
988                 } else {
989                         ret = 0;
990                         if (ifa->ifa_local == sin->sin_addr.s_addr)
991                                 break;
992                         inet_del_ifa(in_dev, ifap, 0);
993                         ifa->ifa_broadcast = 0;
994                         ifa->ifa_scope = 0;
995                 }
996
997                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
998
999                 if (!(dev->flags & IFF_POINTOPOINT)) {
1000                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1001                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1002                         if ((dev->flags & IFF_BROADCAST) &&
1003                             ifa->ifa_prefixlen < 31)
1004                                 ifa->ifa_broadcast = ifa->ifa_address |
1005                                                      ~ifa->ifa_mask;
1006                 } else {
1007                         ifa->ifa_prefixlen = 32;
1008                         ifa->ifa_mask = inet_make_mask(32);
1009                 }
1010                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1011                 ret = inet_set_ifa(dev, ifa);
1012                 break;
1013
1014         case SIOCSIFBRDADDR:    /* Set the broadcast address */
1015                 ret = 0;
1016                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1017                         inet_del_ifa(in_dev, ifap, 0);
1018                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
1019                         inet_insert_ifa(ifa);
1020                 }
1021                 break;
1022
1023         case SIOCSIFDSTADDR:    /* Set the destination address */
1024                 ret = 0;
1025                 if (ifa->ifa_address == sin->sin_addr.s_addr)
1026                         break;
1027                 ret = -EINVAL;
1028                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1029                         break;
1030                 ret = 0;
1031                 inet_del_ifa(in_dev, ifap, 0);
1032                 ifa->ifa_address = sin->sin_addr.s_addr;
1033                 inet_insert_ifa(ifa);
1034                 break;
1035
1036         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
1037
1038                 /*
1039                  *      The mask we set must be legal.
1040                  */
1041                 ret = -EINVAL;
1042                 if (bad_mask(sin->sin_addr.s_addr, 0))
1043                         break;
1044                 ret = 0;
1045                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1046                         __be32 old_mask = ifa->ifa_mask;
1047                         inet_del_ifa(in_dev, ifap, 0);
1048                         ifa->ifa_mask = sin->sin_addr.s_addr;
1049                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1050
1051                         /* See if current broadcast address matches
1052                          * with current netmask, then recalculate
1053                          * the broadcast address. Otherwise it's a
1054                          * funny address, so don't touch it since
1055                          * the user seems to know what (s)he's doing...
1056                          */
1057                         if ((dev->flags & IFF_BROADCAST) &&
1058                             (ifa->ifa_prefixlen < 31) &&
1059                             (ifa->ifa_broadcast ==
1060                              (ifa->ifa_local|~old_mask))) {
1061                                 ifa->ifa_broadcast = (ifa->ifa_local |
1062                                                       ~sin->sin_addr.s_addr);
1063                         }
1064                         inet_insert_ifa(ifa);
1065                 }
1066                 break;
1067         }
1068 done:
1069         rtnl_unlock();
1070 out:
1071         return ret;
1072 rarok:
1073         rtnl_unlock();
1074         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
1075         goto out;
1076 }
1077
1078 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
1079 {
1080         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1081         struct in_ifaddr *ifa;
1082         struct ifreq ifr;
1083         int done = 0;
1084
1085         if (!in_dev)
1086                 goto out;
1087
1088         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1089                 if (!buf) {
1090                         done += sizeof(ifr);
1091                         continue;
1092                 }
1093                 if (len < (int) sizeof(ifr))
1094                         break;
1095                 memset(&ifr, 0, sizeof(struct ifreq));
1096                 if (ifa->ifa_label)
1097                         strcpy(ifr.ifr_name, ifa->ifa_label);
1098                 else
1099                         strcpy(ifr.ifr_name, dev->name);
1100
1101                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1102                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1103                                                                 ifa->ifa_local;
1104
1105                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
1106                         done = -EFAULT;
1107                         break;
1108                 }
1109                 buf  += sizeof(struct ifreq);
1110                 len  -= sizeof(struct ifreq);
1111                 done += sizeof(struct ifreq);
1112         }
1113 out:
1114         return done;
1115 }
1116
1117 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1118 {
1119         __be32 addr = 0;
1120         struct in_device *in_dev;
1121         struct net *net = dev_net(dev);
1122
1123         rcu_read_lock();
1124         in_dev = __in_dev_get_rcu(dev);
1125         if (!in_dev)
1126                 goto no_in_dev;
1127
1128         for_primary_ifa(in_dev) {
1129                 if (ifa->ifa_scope > scope)
1130                         continue;
1131                 if (!dst || inet_ifa_match(dst, ifa)) {
1132                         addr = ifa->ifa_local;
1133                         break;
1134                 }
1135                 if (!addr)
1136                         addr = ifa->ifa_local;
1137         } endfor_ifa(in_dev);
1138
1139         if (addr)
1140                 goto out_unlock;
1141 no_in_dev:
1142
1143         /* Not loopback addresses on loopback should be preferred
1144            in this case. It is importnat that lo is the first interface
1145            in dev_base list.
1146          */
1147         for_each_netdev_rcu(net, dev) {
1148                 in_dev = __in_dev_get_rcu(dev);
1149                 if (!in_dev)
1150                         continue;
1151
1152                 for_primary_ifa(in_dev) {
1153                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1154                             ifa->ifa_scope <= scope) {
1155                                 addr = ifa->ifa_local;
1156                                 goto out_unlock;
1157                         }
1158                 } endfor_ifa(in_dev);
1159         }
1160 out_unlock:
1161         rcu_read_unlock();
1162         return addr;
1163 }
1164 EXPORT_SYMBOL(inet_select_addr);
1165
1166 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1167                               __be32 local, int scope)
1168 {
1169         int same = 0;
1170         __be32 addr = 0;
1171
1172         for_ifa(in_dev) {
1173                 if (!addr &&
1174                     (local == ifa->ifa_local || !local) &&
1175                     ifa->ifa_scope <= scope) {
1176                         addr = ifa->ifa_local;
1177                         if (same)
1178                                 break;
1179                 }
1180                 if (!same) {
1181                         same = (!local || inet_ifa_match(local, ifa)) &&
1182                                 (!dst || inet_ifa_match(dst, ifa));
1183                         if (same && addr) {
1184                                 if (local || !dst)
1185                                         break;
1186                                 /* Is the selected addr into dst subnet? */
1187                                 if (inet_ifa_match(addr, ifa))
1188                                         break;
1189                                 /* No, then can we use new local src? */
1190                                 if (ifa->ifa_scope <= scope) {
1191                                         addr = ifa->ifa_local;
1192                                         break;
1193                                 }
1194                                 /* search for large dst subnet for addr */
1195                                 same = 0;
1196                         }
1197                 }
1198         } endfor_ifa(in_dev);
1199
1200         return same ? addr : 0;
1201 }
1202
1203 /*
1204  * Confirm that local IP address exists using wildcards:
1205  * - in_dev: only on this interface, 0=any interface
1206  * - dst: only in the same subnet as dst, 0=any dst
1207  * - local: address, 0=autoselect the local address
1208  * - scope: maximum allowed scope value for the local address
1209  */
1210 __be32 inet_confirm_addr(struct in_device *in_dev,
1211                          __be32 dst, __be32 local, int scope)
1212 {
1213         __be32 addr = 0;
1214         struct net_device *dev;
1215         struct net *net;
1216
1217         if (scope != RT_SCOPE_LINK)
1218                 return confirm_addr_indev(in_dev, dst, local, scope);
1219
1220         net = dev_net(in_dev->dev);
1221         rcu_read_lock();
1222         for_each_netdev_rcu(net, dev) {
1223                 in_dev = __in_dev_get_rcu(dev);
1224                 if (in_dev) {
1225                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1226                         if (addr)
1227                                 break;
1228                 }
1229         }
1230         rcu_read_unlock();
1231
1232         return addr;
1233 }
1234 EXPORT_SYMBOL(inet_confirm_addr);
1235
1236 /*
1237  *      Device notifier
1238  */
1239
1240 int register_inetaddr_notifier(struct notifier_block *nb)
1241 {
1242         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1243 }
1244 EXPORT_SYMBOL(register_inetaddr_notifier);
1245
1246 int unregister_inetaddr_notifier(struct notifier_block *nb)
1247 {
1248         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1249 }
1250 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1251
1252 /* Rename ifa_labels for a device name change. Make some effort to preserve
1253  * existing alias numbering and to create unique labels if possible.
1254 */
1255 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1256 {
1257         struct in_ifaddr *ifa;
1258         int named = 0;
1259
1260         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1261                 char old[IFNAMSIZ], *dot;
1262
1263                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1264                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1265                 if (named++ == 0)
1266                         goto skip;
1267                 dot = strchr(old, ':');
1268                 if (dot == NULL) {
1269                         sprintf(old, ":%d", named);
1270                         dot = old;
1271                 }
1272                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1273                         strcat(ifa->ifa_label, dot);
1274                 else
1275                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1276 skip:
1277                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1278         }
1279 }
1280
1281 static bool inetdev_valid_mtu(unsigned int mtu)
1282 {
1283         return mtu >= 68;
1284 }
1285
1286 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1287                                         struct in_device *in_dev)
1288
1289 {
1290         struct in_ifaddr *ifa;
1291
1292         for (ifa = in_dev->ifa_list; ifa;
1293              ifa = ifa->ifa_next) {
1294                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1295                          ifa->ifa_local, dev,
1296                          ifa->ifa_local, NULL,
1297                          dev->dev_addr, NULL);
1298         }
1299 }
1300
1301 /* Called only under RTNL semaphore */
1302
1303 static int inetdev_event(struct notifier_block *this, unsigned long event,
1304                          void *ptr)
1305 {
1306         struct net_device *dev = ptr;
1307         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1308
1309         ASSERT_RTNL();
1310
1311         if (!in_dev) {
1312                 if (event == NETDEV_REGISTER) {
1313                         in_dev = inetdev_init(dev);
1314                         if (!in_dev)
1315                                 return notifier_from_errno(-ENOMEM);
1316                         if (dev->flags & IFF_LOOPBACK) {
1317                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1318                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1319                         }
1320                 } else if (event == NETDEV_CHANGEMTU) {
1321                         /* Re-enabling IP */
1322                         if (inetdev_valid_mtu(dev->mtu))
1323                                 in_dev = inetdev_init(dev);
1324                 }
1325                 goto out;
1326         }
1327
1328         switch (event) {
1329         case NETDEV_REGISTER:
1330                 pr_debug("%s: bug\n", __func__);
1331                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1332                 break;
1333         case NETDEV_UP:
1334                 if (!inetdev_valid_mtu(dev->mtu))
1335                         break;
1336                 if (dev->flags & IFF_LOOPBACK) {
1337                         struct in_ifaddr *ifa = inet_alloc_ifa();
1338
1339                         if (ifa) {
1340                                 INIT_HLIST_NODE(&ifa->hash);
1341                                 ifa->ifa_local =
1342                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1343                                 ifa->ifa_prefixlen = 8;
1344                                 ifa->ifa_mask = inet_make_mask(8);
1345                                 in_dev_hold(in_dev);
1346                                 ifa->ifa_dev = in_dev;
1347                                 ifa->ifa_scope = RT_SCOPE_HOST;
1348                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1349                                 set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1350                                                  INFINITY_LIFE_TIME);
1351                                 inet_insert_ifa(ifa);
1352                         }
1353                 }
1354                 ip_mc_up(in_dev);
1355                 /* fall through */
1356         case NETDEV_CHANGEADDR:
1357                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1358                         break;
1359                 /* fall through */
1360         case NETDEV_NOTIFY_PEERS:
1361                 /* Send gratuitous ARP to notify of link change */
1362                 inetdev_send_gratuitous_arp(dev, in_dev);
1363                 break;
1364         case NETDEV_DOWN:
1365                 ip_mc_down(in_dev);
1366                 break;
1367         case NETDEV_PRE_TYPE_CHANGE:
1368                 ip_mc_unmap(in_dev);
1369                 break;
1370         case NETDEV_POST_TYPE_CHANGE:
1371                 ip_mc_remap(in_dev);
1372                 break;
1373         case NETDEV_CHANGEMTU:
1374                 if (inetdev_valid_mtu(dev->mtu))
1375                         break;
1376                 /* disable IP when MTU is not enough */
1377         case NETDEV_UNREGISTER:
1378                 inetdev_destroy(in_dev);
1379                 break;
1380         case NETDEV_CHANGENAME:
1381                 /* Do not notify about label change, this event is
1382                  * not interesting to applications using netlink.
1383                  */
1384                 inetdev_changename(dev, in_dev);
1385
1386                 devinet_sysctl_unregister(in_dev);
1387                 devinet_sysctl_register(in_dev);
1388                 break;
1389         }
1390 out:
1391         return NOTIFY_DONE;
1392 }
1393
1394 static struct notifier_block ip_netdev_notifier = {
1395         .notifier_call = inetdev_event,
1396 };
1397
1398 static size_t inet_nlmsg_size(void)
1399 {
1400         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1401                + nla_total_size(4) /* IFA_ADDRESS */
1402                + nla_total_size(4) /* IFA_LOCAL */
1403                + nla_total_size(4) /* IFA_BROADCAST */
1404                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1405 }
1406
1407 static inline u32 cstamp_delta(unsigned long cstamp)
1408 {
1409         return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1410 }
1411
1412 static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1413                          unsigned long tstamp, u32 preferred, u32 valid)
1414 {
1415         struct ifa_cacheinfo ci;
1416
1417         ci.cstamp = cstamp_delta(cstamp);
1418         ci.tstamp = cstamp_delta(tstamp);
1419         ci.ifa_prefered = preferred;
1420         ci.ifa_valid = valid;
1421
1422         return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1423 }
1424
1425 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1426                             u32 portid, u32 seq, int event, unsigned int flags)
1427 {
1428         struct ifaddrmsg *ifm;
1429         struct nlmsghdr  *nlh;
1430         u32 preferred, valid;
1431
1432         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1433         if (nlh == NULL)
1434                 return -EMSGSIZE;
1435
1436         ifm = nlmsg_data(nlh);
1437         ifm->ifa_family = AF_INET;
1438         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1439         ifm->ifa_flags = ifa->ifa_flags;
1440         ifm->ifa_scope = ifa->ifa_scope;
1441         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1442
1443         if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1444                 preferred = ifa->ifa_preferred_lft;
1445                 valid = ifa->ifa_valid_lft;
1446                 if (preferred != INFINITY_LIFE_TIME) {
1447                         long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1448
1449                         if (preferred > tval)
1450                                 preferred -= tval;
1451                         else
1452                                 preferred = 0;
1453                         if (valid != INFINITY_LIFE_TIME) {
1454                                 if (valid > tval)
1455                                         valid -= tval;
1456                                 else
1457                                         valid = 0;
1458                         }
1459                 }
1460         } else {
1461                 preferred = INFINITY_LIFE_TIME;
1462                 valid = INFINITY_LIFE_TIME;
1463         }
1464         if ((ifa->ifa_address &&
1465              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1466             (ifa->ifa_local &&
1467              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1468             (ifa->ifa_broadcast &&
1469              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1470             (ifa->ifa_label[0] &&
1471              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1472             put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1473                           preferred, valid))
1474                 goto nla_put_failure;
1475
1476         return nlmsg_end(skb, nlh);
1477
1478 nla_put_failure:
1479         nlmsg_cancel(skb, nlh);
1480         return -EMSGSIZE;
1481 }
1482
1483 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1484 {
1485         struct net *net = sock_net(skb->sk);
1486         int h, s_h;
1487         int idx, s_idx;
1488         int ip_idx, s_ip_idx;
1489         struct net_device *dev;
1490         struct in_device *in_dev;
1491         struct in_ifaddr *ifa;
1492         struct hlist_head *head;
1493
1494         s_h = cb->args[0];
1495         s_idx = idx = cb->args[1];
1496         s_ip_idx = ip_idx = cb->args[2];
1497
1498         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1499                 idx = 0;
1500                 head = &net->dev_index_head[h];
1501                 rcu_read_lock();
1502                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1503                           net->dev_base_seq;
1504                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1505                         if (idx < s_idx)
1506                                 goto cont;
1507                         if (h > s_h || idx > s_idx)
1508                                 s_ip_idx = 0;
1509                         in_dev = __in_dev_get_rcu(dev);
1510                         if (!in_dev)
1511                                 goto cont;
1512
1513                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1514                              ifa = ifa->ifa_next, ip_idx++) {
1515                                 if (ip_idx < s_ip_idx)
1516                                         continue;
1517                                 if (inet_fill_ifaddr(skb, ifa,
1518                                              NETLINK_CB(cb->skb).portid,
1519                                              cb->nlh->nlmsg_seq,
1520                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1521                                         rcu_read_unlock();
1522                                         goto done;
1523                                 }
1524                                 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1525                         }
1526 cont:
1527                         idx++;
1528                 }
1529                 rcu_read_unlock();
1530         }
1531
1532 done:
1533         cb->args[0] = h;
1534         cb->args[1] = idx;
1535         cb->args[2] = ip_idx;
1536
1537         return skb->len;
1538 }
1539
1540 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1541                       u32 portid)
1542 {
1543         struct sk_buff *skb;
1544         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1545         int err = -ENOBUFS;
1546         struct net *net;
1547
1548         net = dev_net(ifa->ifa_dev->dev);
1549         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1550         if (skb == NULL)
1551                 goto errout;
1552
1553         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1554         if (err < 0) {
1555                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1556                 WARN_ON(err == -EMSGSIZE);
1557                 kfree_skb(skb);
1558                 goto errout;
1559         }
1560         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1561         return;
1562 errout:
1563         if (err < 0)
1564                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1565 }
1566
1567 static size_t inet_get_link_af_size(const struct net_device *dev)
1568 {
1569         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1570
1571         if (!in_dev)
1572                 return 0;
1573
1574         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1575 }
1576
1577 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1578 {
1579         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1580         struct nlattr *nla;
1581         int i;
1582
1583         if (!in_dev)
1584                 return -ENODATA;
1585
1586         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1587         if (nla == NULL)
1588                 return -EMSGSIZE;
1589
1590         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1591                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1592
1593         return 0;
1594 }
1595
1596 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1597         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1598 };
1599
1600 static int inet_validate_link_af(const struct net_device *dev,
1601                                  const struct nlattr *nla)
1602 {
1603         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1604         int err, rem;
1605
1606         if (dev && !__in_dev_get_rtnl(dev))
1607                 return -EAFNOSUPPORT;
1608
1609         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1610         if (err < 0)
1611                 return err;
1612
1613         if (tb[IFLA_INET_CONF]) {
1614                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1615                         int cfgid = nla_type(a);
1616
1617                         if (nla_len(a) < 4)
1618                                 return -EINVAL;
1619
1620                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1621                                 return -EINVAL;
1622                 }
1623         }
1624
1625         return 0;
1626 }
1627
1628 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1629 {
1630         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1631         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1632         int rem;
1633
1634         if (!in_dev)
1635                 return -EAFNOSUPPORT;
1636
1637         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1638                 BUG();
1639
1640         if (tb[IFLA_INET_CONF]) {
1641                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1642                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1643         }
1644
1645         return 0;
1646 }
1647
1648 static int inet_netconf_msgsize_devconf(int type)
1649 {
1650         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1651                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1652
1653         /* type -1 is used for ALL */
1654         if (type == -1 || type == NETCONFA_FORWARDING)
1655                 size += nla_total_size(4);
1656         if (type == -1 || type == NETCONFA_RP_FILTER)
1657                 size += nla_total_size(4);
1658         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1659                 size += nla_total_size(4);
1660
1661         return size;
1662 }
1663
1664 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1665                                      struct ipv4_devconf *devconf, u32 portid,
1666                                      u32 seq, int event, unsigned int flags,
1667                                      int type)
1668 {
1669         struct nlmsghdr  *nlh;
1670         struct netconfmsg *ncm;
1671
1672         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1673                         flags);
1674         if (nlh == NULL)
1675                 return -EMSGSIZE;
1676
1677         ncm = nlmsg_data(nlh);
1678         ncm->ncm_family = AF_INET;
1679
1680         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1681                 goto nla_put_failure;
1682
1683         /* type -1 is used for ALL */
1684         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1685             nla_put_s32(skb, NETCONFA_FORWARDING,
1686                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1687                 goto nla_put_failure;
1688         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1689             nla_put_s32(skb, NETCONFA_RP_FILTER,
1690                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1691                 goto nla_put_failure;
1692         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1693             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1694                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1695                 goto nla_put_failure;
1696
1697         return nlmsg_end(skb, nlh);
1698
1699 nla_put_failure:
1700         nlmsg_cancel(skb, nlh);
1701         return -EMSGSIZE;
1702 }
1703
1704 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1705                                  struct ipv4_devconf *devconf)
1706 {
1707         struct sk_buff *skb;
1708         int err = -ENOBUFS;
1709
1710         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1711         if (skb == NULL)
1712                 goto errout;
1713
1714         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1715                                         RTM_NEWNETCONF, 0, type);
1716         if (err < 0) {
1717                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1718                 WARN_ON(err == -EMSGSIZE);
1719                 kfree_skb(skb);
1720                 goto errout;
1721         }
1722         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1723         return;
1724 errout:
1725         if (err < 0)
1726                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1727 }
1728
1729 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1730         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1731         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1732         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1733 };
1734
1735 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1736                                     struct nlmsghdr *nlh)
1737 {
1738         struct net *net = sock_net(in_skb->sk);
1739         struct nlattr *tb[NETCONFA_MAX+1];
1740         struct netconfmsg *ncm;
1741         struct sk_buff *skb;
1742         struct ipv4_devconf *devconf;
1743         struct in_device *in_dev;
1744         struct net_device *dev;
1745         int ifindex;
1746         int err;
1747
1748         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1749                           devconf_ipv4_policy);
1750         if (err < 0)
1751                 goto errout;
1752
1753         err = EINVAL;
1754         if (!tb[NETCONFA_IFINDEX])
1755                 goto errout;
1756
1757         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1758         switch (ifindex) {
1759         case NETCONFA_IFINDEX_ALL:
1760                 devconf = net->ipv4.devconf_all;
1761                 break;
1762         case NETCONFA_IFINDEX_DEFAULT:
1763                 devconf = net->ipv4.devconf_dflt;
1764                 break;
1765         default:
1766                 dev = __dev_get_by_index(net, ifindex);
1767                 if (dev == NULL)
1768                         goto errout;
1769                 in_dev = __in_dev_get_rtnl(dev);
1770                 if (in_dev == NULL)
1771                         goto errout;
1772                 devconf = &in_dev->cnf;
1773                 break;
1774         }
1775
1776         err = -ENOBUFS;
1777         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1778         if (skb == NULL)
1779                 goto errout;
1780
1781         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1782                                         NETLINK_CB(in_skb).portid,
1783                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1784                                         -1);
1785         if (err < 0) {
1786                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1787                 WARN_ON(err == -EMSGSIZE);
1788                 kfree_skb(skb);
1789                 goto errout;
1790         }
1791         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1792 errout:
1793         return err;
1794 }
1795
1796 static int inet_netconf_dump_devconf(struct sk_buff *skb,
1797                                      struct netlink_callback *cb)
1798 {
1799         struct net *net = sock_net(skb->sk);
1800         int h, s_h;
1801         int idx, s_idx;
1802         struct net_device *dev;
1803         struct in_device *in_dev;
1804         struct hlist_head *head;
1805
1806         s_h = cb->args[0];
1807         s_idx = idx = cb->args[1];
1808
1809         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1810                 idx = 0;
1811                 head = &net->dev_index_head[h];
1812                 rcu_read_lock();
1813                 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1814                           net->dev_base_seq;
1815                 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1816                         if (idx < s_idx)
1817                                 goto cont;
1818                         in_dev = __in_dev_get_rcu(dev);
1819                         if (!in_dev)
1820                                 goto cont;
1821
1822                         if (inet_netconf_fill_devconf(skb, dev->ifindex,
1823                                                       &in_dev->cnf,
1824                                                       NETLINK_CB(cb->skb).portid,
1825                                                       cb->nlh->nlmsg_seq,
1826                                                       RTM_NEWNETCONF,
1827                                                       NLM_F_MULTI,
1828                                                       -1) <= 0) {
1829                                 rcu_read_unlock();
1830                                 goto done;
1831                         }
1832                         nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1833 cont:
1834                         idx++;
1835                 }
1836                 rcu_read_unlock();
1837         }
1838         if (h == NETDEV_HASHENTRIES) {
1839                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1840                                               net->ipv4.devconf_all,
1841                                               NETLINK_CB(cb->skb).portid,
1842                                               cb->nlh->nlmsg_seq,
1843                                               RTM_NEWNETCONF, NLM_F_MULTI,
1844                                               -1) <= 0)
1845                         goto done;
1846                 else
1847                         h++;
1848         }
1849         if (h == NETDEV_HASHENTRIES + 1) {
1850                 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1851                                               net->ipv4.devconf_dflt,
1852                                               NETLINK_CB(cb->skb).portid,
1853                                               cb->nlh->nlmsg_seq,
1854                                               RTM_NEWNETCONF, NLM_F_MULTI,
1855                                               -1) <= 0)
1856                         goto done;
1857                 else
1858                         h++;
1859         }
1860 done:
1861         cb->args[0] = h;
1862         cb->args[1] = idx;
1863
1864         return skb->len;
1865 }
1866
1867 #ifdef CONFIG_SYSCTL
1868
1869 static void devinet_copy_dflt_conf(struct net *net, int i)
1870 {
1871         struct net_device *dev;
1872
1873         rcu_read_lock();
1874         for_each_netdev_rcu(net, dev) {
1875                 struct in_device *in_dev;
1876
1877                 in_dev = __in_dev_get_rcu(dev);
1878                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1879                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1880         }
1881         rcu_read_unlock();
1882 }
1883
1884 /* called with RTNL locked */
1885 static void inet_forward_change(struct net *net)
1886 {
1887         struct net_device *dev;
1888         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1889
1890         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1891         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1892         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1893                                     NETCONFA_IFINDEX_ALL,
1894                                     net->ipv4.devconf_all);
1895         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1896                                     NETCONFA_IFINDEX_DEFAULT,
1897                                     net->ipv4.devconf_dflt);
1898
1899         for_each_netdev(net, dev) {
1900                 struct in_device *in_dev;
1901                 if (on)
1902                         dev_disable_lro(dev);
1903                 rcu_read_lock();
1904                 in_dev = __in_dev_get_rcu(dev);
1905                 if (in_dev) {
1906                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1907                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1908                                                     dev->ifindex, &in_dev->cnf);
1909                 }
1910                 rcu_read_unlock();
1911         }
1912 }
1913
1914 static int devinet_conf_proc(ctl_table *ctl, int write,
1915                              void __user *buffer,
1916                              size_t *lenp, loff_t *ppos)
1917 {
1918         int old_value = *(int *)ctl->data;
1919         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1920         int new_value = *(int *)ctl->data;
1921
1922         if (write) {
1923                 struct ipv4_devconf *cnf = ctl->extra1;
1924                 struct net *net = ctl->extra2;
1925                 int i = (int *)ctl->data - cnf->data;
1926
1927                 set_bit(i, cnf->state);
1928
1929                 if (cnf == net->ipv4.devconf_dflt)
1930                         devinet_copy_dflt_conf(net, i);
1931                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1932                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1933                         if ((new_value == 0) && (old_value != 0))
1934                                 rt_cache_flush(net);
1935                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1936                     new_value != old_value) {
1937                         int ifindex;
1938
1939                         if (cnf == net->ipv4.devconf_dflt)
1940                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1941                         else if (cnf == net->ipv4.devconf_all)
1942                                 ifindex = NETCONFA_IFINDEX_ALL;
1943                         else {
1944                                 struct in_device *idev =
1945                                         container_of(cnf, struct in_device,
1946                                                      cnf);
1947                                 ifindex = idev->dev->ifindex;
1948                         }
1949                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1950                                                     ifindex, cnf);
1951                 }
1952         }
1953
1954         return ret;
1955 }
1956
1957 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1958                                   void __user *buffer,
1959                                   size_t *lenp, loff_t *ppos)
1960 {
1961         int *valp = ctl->data;
1962         int val = *valp;
1963         loff_t pos = *ppos;
1964         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1965
1966         if (write && *valp != val) {
1967                 struct net *net = ctl->extra2;
1968
1969                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1970                         if (!rtnl_trylock()) {
1971                                 /* Restore the original values before restarting */
1972                                 *valp = val;
1973                                 *ppos = pos;
1974                                 return restart_syscall();
1975                         }
1976                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1977                                 inet_forward_change(net);
1978                         } else {
1979                                 struct ipv4_devconf *cnf = ctl->extra1;
1980                                 struct in_device *idev =
1981                                         container_of(cnf, struct in_device, cnf);
1982                                 if (*valp)
1983                                         dev_disable_lro(idev->dev);
1984                                 inet_netconf_notify_devconf(net,
1985                                                             NETCONFA_FORWARDING,
1986                                                             idev->dev->ifindex,
1987                                                             cnf);
1988                         }
1989                         rtnl_unlock();
1990                         rt_cache_flush(net);
1991                 } else
1992                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1993                                                     NETCONFA_IFINDEX_DEFAULT,
1994                                                     net->ipv4.devconf_dflt);
1995         }
1996
1997         return ret;
1998 }
1999
2000 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
2001                                 void __user *buffer,
2002                                 size_t *lenp, loff_t *ppos)
2003 {
2004         int *valp = ctl->data;
2005         int val = *valp;
2006         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2007         struct net *net = ctl->extra2;
2008
2009         if (write && *valp != val)
2010                 rt_cache_flush(net);
2011
2012         return ret;
2013 }
2014
2015 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2016         { \
2017                 .procname       = name, \
2018                 .data           = ipv4_devconf.data + \
2019                                   IPV4_DEVCONF_ ## attr - 1, \
2020                 .maxlen         = sizeof(int), \
2021                 .mode           = mval, \
2022                 .proc_handler   = proc, \
2023                 .extra1         = &ipv4_devconf, \
2024         }
2025
2026 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2027         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2028
2029 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2030         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2031
2032 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2033         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2034
2035 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2036         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2037
2038 static struct devinet_sysctl_table {
2039         struct ctl_table_header *sysctl_header;
2040         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2041 } devinet_sysctl = {
2042         .devinet_vars = {
2043                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2044                                              devinet_sysctl_forward),
2045                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2046
2047                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2048                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2049                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2050                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2051                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2052                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2053                                         "accept_source_route"),
2054                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2055                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2056                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2057                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2058                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2059                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2060                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2061                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2062                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2063                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2064                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2065                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2066                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2067
2068                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2069                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2070                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
2071                                               "force_igmp_version"),
2072                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2073                                               "promote_secondaries"),
2074                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2075                                               "route_localnet"),
2076         },
2077 };
2078
2079 static int __devinet_sysctl_register(struct net *net, char *dev_name,
2080                                         struct ipv4_devconf *p)
2081 {
2082         int i;
2083         struct devinet_sysctl_table *t;
2084         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2085
2086         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2087         if (!t)
2088                 goto out;
2089
2090         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2091                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2092                 t->devinet_vars[i].extra1 = p;
2093                 t->devinet_vars[i].extra2 = net;
2094         }
2095
2096         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2097
2098         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2099         if (!t->sysctl_header)
2100                 goto free;
2101
2102         p->sysctl = t;
2103         return 0;
2104
2105 free:
2106         kfree(t);
2107 out:
2108         return -ENOBUFS;
2109 }
2110
2111 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
2112 {
2113         struct devinet_sysctl_table *t = cnf->sysctl;
2114
2115         if (t == NULL)
2116                 return;
2117
2118         cnf->sysctl = NULL;
2119         unregister_net_sysctl_table(t->sysctl_header);
2120         kfree(t);
2121 }
2122
2123 static void devinet_sysctl_register(struct in_device *idev)
2124 {
2125         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
2126         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2127                                         &idev->cnf);
2128 }
2129
2130 static void devinet_sysctl_unregister(struct in_device *idev)
2131 {
2132         __devinet_sysctl_unregister(&idev->cnf);
2133         neigh_sysctl_unregister(idev->arp_parms);
2134 }
2135
2136 static struct ctl_table ctl_forward_entry[] = {
2137         {
2138                 .procname       = "ip_forward",
2139                 .data           = &ipv4_devconf.data[
2140                                         IPV4_DEVCONF_FORWARDING - 1],
2141                 .maxlen         = sizeof(int),
2142                 .mode           = 0644,
2143                 .proc_handler   = devinet_sysctl_forward,
2144                 .extra1         = &ipv4_devconf,
2145                 .extra2         = &init_net,
2146         },
2147         { },
2148 };
2149 #endif
2150
2151 static __net_init int devinet_init_net(struct net *net)
2152 {
2153         int err;
2154         struct ipv4_devconf *all, *dflt;
2155 #ifdef CONFIG_SYSCTL
2156         struct ctl_table *tbl = ctl_forward_entry;
2157         struct ctl_table_header *forw_hdr;
2158 #endif
2159
2160         err = -ENOMEM;
2161         all = &ipv4_devconf;
2162         dflt = &ipv4_devconf_dflt;
2163
2164         if (!net_eq(net, &init_net)) {
2165                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
2166                 if (all == NULL)
2167                         goto err_alloc_all;
2168
2169                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2170                 if (dflt == NULL)
2171                         goto err_alloc_dflt;
2172
2173 #ifdef CONFIG_SYSCTL
2174                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
2175                 if (tbl == NULL)
2176                         goto err_alloc_ctl;
2177
2178                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2179                 tbl[0].extra1 = all;
2180                 tbl[0].extra2 = net;
2181 #endif
2182         }
2183
2184 #ifdef CONFIG_SYSCTL
2185         err = __devinet_sysctl_register(net, "all", all);
2186         if (err < 0)
2187                 goto err_reg_all;
2188
2189         err = __devinet_sysctl_register(net, "default", dflt);
2190         if (err < 0)
2191                 goto err_reg_dflt;
2192
2193         err = -ENOMEM;
2194         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2195         if (forw_hdr == NULL)
2196                 goto err_reg_ctl;
2197         net->ipv4.forw_hdr = forw_hdr;
2198 #endif
2199
2200         net->ipv4.devconf_all = all;
2201         net->ipv4.devconf_dflt = dflt;
2202         return 0;
2203
2204 #ifdef CONFIG_SYSCTL
2205 err_reg_ctl:
2206         __devinet_sysctl_unregister(dflt);
2207 err_reg_dflt:
2208         __devinet_sysctl_unregister(all);
2209 err_reg_all:
2210         if (tbl != ctl_forward_entry)
2211                 kfree(tbl);
2212 err_alloc_ctl:
2213 #endif
2214         if (dflt != &ipv4_devconf_dflt)
2215                 kfree(dflt);
2216 err_alloc_dflt:
2217         if (all != &ipv4_devconf)
2218                 kfree(all);
2219 err_alloc_all:
2220         return err;
2221 }
2222
2223 static __net_exit void devinet_exit_net(struct net *net)
2224 {
2225 #ifdef CONFIG_SYSCTL
2226         struct ctl_table *tbl;
2227
2228         tbl = net->ipv4.forw_hdr->ctl_table_arg;
2229         unregister_net_sysctl_table(net->ipv4.forw_hdr);
2230         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
2231         __devinet_sysctl_unregister(net->ipv4.devconf_all);
2232         kfree(tbl);
2233 #endif
2234         kfree(net->ipv4.devconf_dflt);
2235         kfree(net->ipv4.devconf_all);
2236 }
2237
2238 static __net_initdata struct pernet_operations devinet_ops = {
2239         .init = devinet_init_net,
2240         .exit = devinet_exit_net,
2241 };
2242
2243 static struct rtnl_af_ops inet_af_ops = {
2244         .family           = AF_INET,
2245         .fill_link_af     = inet_fill_link_af,
2246         .get_link_af_size = inet_get_link_af_size,
2247         .validate_link_af = inet_validate_link_af,
2248         .set_link_af      = inet_set_link_af,
2249 };
2250
2251 void __init devinet_init(void)
2252 {
2253         int i;
2254
2255         for (i = 0; i < IN4_ADDR_HSIZE; i++)
2256                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
2257
2258         register_pernet_subsys(&devinet_ops);
2259
2260         register_gifconf(PF_INET, inet_gifconf);
2261         register_netdevice_notifier(&ip_netdev_notifier);
2262
2263         schedule_delayed_work(&check_lifetime_work, 0);
2264
2265         rtnl_af_register(&inet_af_ops);
2266
2267         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2268         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2269         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2270         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2271                       inet_netconf_dump_devconf, NULL);
2272 }
2273