]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
[SCSI] Merge tag 'fcoe-02-19-13' into for-linus
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 #include "fib_lookup.h"
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75         },
76 };
77
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79         .data = {
80                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85         },
86 };
87
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92         [IFA_LOCAL]             = { .type = NLA_U32 },
93         [IFA_ADDRESS]           = { .type = NLA_U32 },
94         [IFA_BROADCAST]         = { .type = NLA_U32 },
95         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97
98 #define IN4_ADDR_HSIZE_SHIFT    8
99 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
100
101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104 static u32 inet_addr_hash(struct net *net, __be32 addr)
105 {
106         u32 val = (__force u32) addr ^ net_hash_mix(net);
107
108         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
109 }
110
111 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
112 {
113         u32 hash = inet_addr_hash(net, ifa->ifa_local);
114
115         spin_lock(&inet_addr_hash_lock);
116         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
117         spin_unlock(&inet_addr_hash_lock);
118 }
119
120 static void inet_hash_remove(struct in_ifaddr *ifa)
121 {
122         spin_lock(&inet_addr_hash_lock);
123         hlist_del_init_rcu(&ifa->hash);
124         spin_unlock(&inet_addr_hash_lock);
125 }
126
127 /**
128  * __ip_dev_find - find the first device with a given source address.
129  * @net: the net namespace
130  * @addr: the source address
131  * @devref: if true, take a reference on the found device
132  *
133  * If a caller uses devref=false, it should be protected by RCU, or RTNL
134  */
135 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
136 {
137         u32 hash = inet_addr_hash(net, addr);
138         struct net_device *result = NULL;
139         struct in_ifaddr *ifa;
140         struct hlist_node *node;
141
142         rcu_read_lock();
143         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
144                 if (ifa->ifa_local == addr) {
145                         struct net_device *dev = ifa->ifa_dev->dev;
146
147                         if (!net_eq(dev_net(dev), net))
148                                 continue;
149                         result = dev;
150                         break;
151                 }
152         }
153         if (!result) {
154                 struct flowi4 fl4 = { .daddr = addr };
155                 struct fib_result res = { 0 };
156                 struct fib_table *local;
157
158                 /* Fallback to FIB local table so that communication
159                  * over loopback subnets work.
160                  */
161                 local = fib_get_table(net, RT_TABLE_LOCAL);
162                 if (local &&
163                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
164                     res.type == RTN_LOCAL)
165                         result = FIB_RES_DEV(res);
166         }
167         if (result && devref)
168                 dev_hold(result);
169         rcu_read_unlock();
170         return result;
171 }
172 EXPORT_SYMBOL(__ip_dev_find);
173
174 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
175
176 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
177 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
178                          int destroy);
179 #ifdef CONFIG_SYSCTL
180 static void devinet_sysctl_register(struct in_device *idev);
181 static void devinet_sysctl_unregister(struct in_device *idev);
182 #else
183 static void devinet_sysctl_register(struct in_device *idev)
184 {
185 }
186 static void devinet_sysctl_unregister(struct in_device *idev)
187 {
188 }
189 #endif
190
191 /* Locks all the inet devices. */
192
193 static struct in_ifaddr *inet_alloc_ifa(void)
194 {
195         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
196 }
197
198 static void inet_rcu_free_ifa(struct rcu_head *head)
199 {
200         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
201         if (ifa->ifa_dev)
202                 in_dev_put(ifa->ifa_dev);
203         kfree(ifa);
204 }
205
206 static void inet_free_ifa(struct in_ifaddr *ifa)
207 {
208         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
209 }
210
211 void in_dev_finish_destroy(struct in_device *idev)
212 {
213         struct net_device *dev = idev->dev;
214
215         WARN_ON(idev->ifa_list);
216         WARN_ON(idev->mc_list);
217 #ifdef NET_REFCNT_DEBUG
218         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
219 #endif
220         dev_put(dev);
221         if (!idev->dead)
222                 pr_err("Freeing alive in_device %p\n", idev);
223         else
224                 kfree(idev);
225 }
226 EXPORT_SYMBOL(in_dev_finish_destroy);
227
228 static struct in_device *inetdev_init(struct net_device *dev)
229 {
230         struct in_device *in_dev;
231
232         ASSERT_RTNL();
233
234         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
235         if (!in_dev)
236                 goto out;
237         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
238                         sizeof(in_dev->cnf));
239         in_dev->cnf.sysctl = NULL;
240         in_dev->dev = dev;
241         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
242         if (!in_dev->arp_parms)
243                 goto out_kfree;
244         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
245                 dev_disable_lro(dev);
246         /* Reference in_dev->dev */
247         dev_hold(dev);
248         /* Account for reference dev->ip_ptr (below) */
249         in_dev_hold(in_dev);
250
251         devinet_sysctl_register(in_dev);
252         ip_mc_init_dev(in_dev);
253         if (dev->flags & IFF_UP)
254                 ip_mc_up(in_dev);
255
256         /* we can receive as soon as ip_ptr is set -- do this last */
257         rcu_assign_pointer(dev->ip_ptr, in_dev);
258 out:
259         return in_dev;
260 out_kfree:
261         kfree(in_dev);
262         in_dev = NULL;
263         goto out;
264 }
265
266 static void in_dev_rcu_put(struct rcu_head *head)
267 {
268         struct in_device *idev = container_of(head, struct in_device, rcu_head);
269         in_dev_put(idev);
270 }
271
272 static void inetdev_destroy(struct in_device *in_dev)
273 {
274         struct in_ifaddr *ifa;
275         struct net_device *dev;
276
277         ASSERT_RTNL();
278
279         dev = in_dev->dev;
280
281         in_dev->dead = 1;
282
283         ip_mc_destroy_dev(in_dev);
284
285         while ((ifa = in_dev->ifa_list) != NULL) {
286                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
287                 inet_free_ifa(ifa);
288         }
289
290         RCU_INIT_POINTER(dev->ip_ptr, NULL);
291
292         devinet_sysctl_unregister(in_dev);
293         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
294         arp_ifdown(dev);
295
296         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
297 }
298
299 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
300 {
301         rcu_read_lock();
302         for_primary_ifa(in_dev) {
303                 if (inet_ifa_match(a, ifa)) {
304                         if (!b || inet_ifa_match(b, ifa)) {
305                                 rcu_read_unlock();
306                                 return 1;
307                         }
308                 }
309         } endfor_ifa(in_dev);
310         rcu_read_unlock();
311         return 0;
312 }
313
314 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
315                          int destroy, struct nlmsghdr *nlh, u32 portid)
316 {
317         struct in_ifaddr *promote = NULL;
318         struct in_ifaddr *ifa, *ifa1 = *ifap;
319         struct in_ifaddr *last_prim = in_dev->ifa_list;
320         struct in_ifaddr *prev_prom = NULL;
321         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
322
323         ASSERT_RTNL();
324
325         /* 1. Deleting primary ifaddr forces deletion all secondaries
326          * unless alias promotion is set
327          **/
328
329         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
330                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
331
332                 while ((ifa = *ifap1) != NULL) {
333                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
334                             ifa1->ifa_scope <= ifa->ifa_scope)
335                                 last_prim = ifa;
336
337                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
338                             ifa1->ifa_mask != ifa->ifa_mask ||
339                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
340                                 ifap1 = &ifa->ifa_next;
341                                 prev_prom = ifa;
342                                 continue;
343                         }
344
345                         if (!do_promote) {
346                                 inet_hash_remove(ifa);
347                                 *ifap1 = ifa->ifa_next;
348
349                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
350                                 blocking_notifier_call_chain(&inetaddr_chain,
351                                                 NETDEV_DOWN, ifa);
352                                 inet_free_ifa(ifa);
353                         } else {
354                                 promote = ifa;
355                                 break;
356                         }
357                 }
358         }
359
360         /* On promotion all secondaries from subnet are changing
361          * the primary IP, we must remove all their routes silently
362          * and later to add them back with new prefsrc. Do this
363          * while all addresses are on the device list.
364          */
365         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
366                 if (ifa1->ifa_mask == ifa->ifa_mask &&
367                     inet_ifa_match(ifa1->ifa_address, ifa))
368                         fib_del_ifaddr(ifa, ifa1);
369         }
370
371         /* 2. Unlink it */
372
373         *ifap = ifa1->ifa_next;
374         inet_hash_remove(ifa1);
375
376         /* 3. Announce address deletion */
377
378         /* Send message first, then call notifier.
379            At first sight, FIB update triggered by notifier
380            will refer to already deleted ifaddr, that could confuse
381            netlink listeners. It is not true: look, gated sees
382            that route deleted and if it still thinks that ifaddr
383            is valid, it will try to restore deleted routes... Grr.
384            So that, this order is correct.
385          */
386         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
387         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
388
389         if (promote) {
390                 struct in_ifaddr *next_sec = promote->ifa_next;
391
392                 if (prev_prom) {
393                         prev_prom->ifa_next = promote->ifa_next;
394                         promote->ifa_next = last_prim->ifa_next;
395                         last_prim->ifa_next = promote;
396                 }
397
398                 promote->ifa_flags &= ~IFA_F_SECONDARY;
399                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
400                 blocking_notifier_call_chain(&inetaddr_chain,
401                                 NETDEV_UP, promote);
402                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
403                         if (ifa1->ifa_mask != ifa->ifa_mask ||
404                             !inet_ifa_match(ifa1->ifa_address, ifa))
405                                         continue;
406                         fib_add_ifaddr(ifa);
407                 }
408
409         }
410         if (destroy)
411                 inet_free_ifa(ifa1);
412 }
413
414 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
415                          int destroy)
416 {
417         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
418 }
419
420 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
421                              u32 portid)
422 {
423         struct in_device *in_dev = ifa->ifa_dev;
424         struct in_ifaddr *ifa1, **ifap, **last_primary;
425
426         ASSERT_RTNL();
427
428         if (!ifa->ifa_local) {
429                 inet_free_ifa(ifa);
430                 return 0;
431         }
432
433         ifa->ifa_flags &= ~IFA_F_SECONDARY;
434         last_primary = &in_dev->ifa_list;
435
436         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
437              ifap = &ifa1->ifa_next) {
438                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
439                     ifa->ifa_scope <= ifa1->ifa_scope)
440                         last_primary = &ifa1->ifa_next;
441                 if (ifa1->ifa_mask == ifa->ifa_mask &&
442                     inet_ifa_match(ifa1->ifa_address, ifa)) {
443                         if (ifa1->ifa_local == ifa->ifa_local) {
444                                 inet_free_ifa(ifa);
445                                 return -EEXIST;
446                         }
447                         if (ifa1->ifa_scope != ifa->ifa_scope) {
448                                 inet_free_ifa(ifa);
449                                 return -EINVAL;
450                         }
451                         ifa->ifa_flags |= IFA_F_SECONDARY;
452                 }
453         }
454
455         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
456                 net_srandom(ifa->ifa_local);
457                 ifap = last_primary;
458         }
459
460         ifa->ifa_next = *ifap;
461         *ifap = ifa;
462
463         inet_hash_insert(dev_net(in_dev->dev), ifa);
464
465         /* Send message first, then call notifier.
466            Notifier will trigger FIB update, so that
467            listeners of netlink will know about new ifaddr */
468         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
469         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
470
471         return 0;
472 }
473
474 static int inet_insert_ifa(struct in_ifaddr *ifa)
475 {
476         return __inet_insert_ifa(ifa, NULL, 0);
477 }
478
479 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
480 {
481         struct in_device *in_dev = __in_dev_get_rtnl(dev);
482
483         ASSERT_RTNL();
484
485         if (!in_dev) {
486                 inet_free_ifa(ifa);
487                 return -ENOBUFS;
488         }
489         ipv4_devconf_setall(in_dev);
490         if (ifa->ifa_dev != in_dev) {
491                 WARN_ON(ifa->ifa_dev);
492                 in_dev_hold(in_dev);
493                 ifa->ifa_dev = in_dev;
494         }
495         if (ipv4_is_loopback(ifa->ifa_local))
496                 ifa->ifa_scope = RT_SCOPE_HOST;
497         return inet_insert_ifa(ifa);
498 }
499
500 /* Caller must hold RCU or RTNL :
501  * We dont take a reference on found in_device
502  */
503 struct in_device *inetdev_by_index(struct net *net, int ifindex)
504 {
505         struct net_device *dev;
506         struct in_device *in_dev = NULL;
507
508         rcu_read_lock();
509         dev = dev_get_by_index_rcu(net, ifindex);
510         if (dev)
511                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
512         rcu_read_unlock();
513         return in_dev;
514 }
515 EXPORT_SYMBOL(inetdev_by_index);
516
517 /* Called only from RTNL semaphored context. No locks. */
518
519 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
520                                     __be32 mask)
521 {
522         ASSERT_RTNL();
523
524         for_primary_ifa(in_dev) {
525                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
526                         return ifa;
527         } endfor_ifa(in_dev);
528         return NULL;
529 }
530
531 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
532 {
533         struct net *net = sock_net(skb->sk);
534         struct nlattr *tb[IFA_MAX+1];
535         struct in_device *in_dev;
536         struct ifaddrmsg *ifm;
537         struct in_ifaddr *ifa, **ifap;
538         int err = -EINVAL;
539
540         ASSERT_RTNL();
541
542         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
543         if (err < 0)
544                 goto errout;
545
546         ifm = nlmsg_data(nlh);
547         in_dev = inetdev_by_index(net, ifm->ifa_index);
548         if (in_dev == NULL) {
549                 err = -ENODEV;
550                 goto errout;
551         }
552
553         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
554              ifap = &ifa->ifa_next) {
555                 if (tb[IFA_LOCAL] &&
556                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
557                         continue;
558
559                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
560                         continue;
561
562                 if (tb[IFA_ADDRESS] &&
563                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
564                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
565                         continue;
566
567                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
568                 return 0;
569         }
570
571         err = -EADDRNOTAVAIL;
572 errout:
573         return err;
574 }
575
576 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
577 {
578         struct nlattr *tb[IFA_MAX+1];
579         struct in_ifaddr *ifa;
580         struct ifaddrmsg *ifm;
581         struct net_device *dev;
582         struct in_device *in_dev;
583         int err;
584
585         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
586         if (err < 0)
587                 goto errout;
588
589         ifm = nlmsg_data(nlh);
590         err = -EINVAL;
591         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
592                 goto errout;
593
594         dev = __dev_get_by_index(net, ifm->ifa_index);
595         err = -ENODEV;
596         if (dev == NULL)
597                 goto errout;
598
599         in_dev = __in_dev_get_rtnl(dev);
600         err = -ENOBUFS;
601         if (in_dev == NULL)
602                 goto errout;
603
604         ifa = inet_alloc_ifa();
605         if (ifa == NULL)
606                 /*
607                  * A potential indev allocation can be left alive, it stays
608                  * assigned to its device and is destroy with it.
609                  */
610                 goto errout;
611
612         ipv4_devconf_setall(in_dev);
613         in_dev_hold(in_dev);
614
615         if (tb[IFA_ADDRESS] == NULL)
616                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
617
618         INIT_HLIST_NODE(&ifa->hash);
619         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
620         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
621         ifa->ifa_flags = ifm->ifa_flags;
622         ifa->ifa_scope = ifm->ifa_scope;
623         ifa->ifa_dev = in_dev;
624
625         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
626         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
627
628         if (tb[IFA_BROADCAST])
629                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
630
631         if (tb[IFA_LABEL])
632                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
633         else
634                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
635
636         return ifa;
637
638 errout:
639         return ERR_PTR(err);
640 }
641
642 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
643 {
644         struct net *net = sock_net(skb->sk);
645         struct in_ifaddr *ifa;
646
647         ASSERT_RTNL();
648
649         ifa = rtm_to_ifaddr(net, nlh);
650         if (IS_ERR(ifa))
651                 return PTR_ERR(ifa);
652
653         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
654 }
655
656 /*
657  *      Determine a default network mask, based on the IP address.
658  */
659
660 static int inet_abc_len(__be32 addr)
661 {
662         int rc = -1;    /* Something else, probably a multicast. */
663
664         if (ipv4_is_zeronet(addr))
665                 rc = 0;
666         else {
667                 __u32 haddr = ntohl(addr);
668
669                 if (IN_CLASSA(haddr))
670                         rc = 8;
671                 else if (IN_CLASSB(haddr))
672                         rc = 16;
673                 else if (IN_CLASSC(haddr))
674                         rc = 24;
675         }
676
677         return rc;
678 }
679
680
681 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
682 {
683         struct ifreq ifr;
684         struct sockaddr_in sin_orig;
685         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
686         struct in_device *in_dev;
687         struct in_ifaddr **ifap = NULL;
688         struct in_ifaddr *ifa = NULL;
689         struct net_device *dev;
690         char *colon;
691         int ret = -EFAULT;
692         int tryaddrmatch = 0;
693
694         /*
695          *      Fetch the caller's info block into kernel space
696          */
697
698         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
699                 goto out;
700         ifr.ifr_name[IFNAMSIZ - 1] = 0;
701
702         /* save original address for comparison */
703         memcpy(&sin_orig, sin, sizeof(*sin));
704
705         colon = strchr(ifr.ifr_name, ':');
706         if (colon)
707                 *colon = 0;
708
709         dev_load(net, ifr.ifr_name);
710
711         switch (cmd) {
712         case SIOCGIFADDR:       /* Get interface address */
713         case SIOCGIFBRDADDR:    /* Get the broadcast address */
714         case SIOCGIFDSTADDR:    /* Get the destination address */
715         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
716                 /* Note that these ioctls will not sleep,
717                    so that we do not impose a lock.
718                    One day we will be forced to put shlock here (I mean SMP)
719                  */
720                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
721                 memset(sin, 0, sizeof(*sin));
722                 sin->sin_family = AF_INET;
723                 break;
724
725         case SIOCSIFFLAGS:
726                 ret = -EPERM;
727                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
728                         goto out;
729                 break;
730         case SIOCSIFADDR:       /* Set interface address (and family) */
731         case SIOCSIFBRDADDR:    /* Set the broadcast address */
732         case SIOCSIFDSTADDR:    /* Set the destination address */
733         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
734                 ret = -EPERM;
735                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
736                         goto out;
737                 ret = -EINVAL;
738                 if (sin->sin_family != AF_INET)
739                         goto out;
740                 break;
741         default:
742                 ret = -EINVAL;
743                 goto out;
744         }
745
746         rtnl_lock();
747
748         ret = -ENODEV;
749         dev = __dev_get_by_name(net, ifr.ifr_name);
750         if (!dev)
751                 goto done;
752
753         if (colon)
754                 *colon = ':';
755
756         in_dev = __in_dev_get_rtnl(dev);
757         if (in_dev) {
758                 if (tryaddrmatch) {
759                         /* Matthias Andree */
760                         /* compare label and address (4.4BSD style) */
761                         /* note: we only do this for a limited set of ioctls
762                            and only if the original address family was AF_INET.
763                            This is checked above. */
764                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
765                              ifap = &ifa->ifa_next) {
766                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
767                                     sin_orig.sin_addr.s_addr ==
768                                                         ifa->ifa_local) {
769                                         break; /* found */
770                                 }
771                         }
772                 }
773                 /* we didn't get a match, maybe the application is
774                    4.3BSD-style and passed in junk so we fall back to
775                    comparing just the label */
776                 if (!ifa) {
777                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
778                              ifap = &ifa->ifa_next)
779                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
780                                         break;
781                 }
782         }
783
784         ret = -EADDRNOTAVAIL;
785         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
786                 goto done;
787
788         switch (cmd) {
789         case SIOCGIFADDR:       /* Get interface address */
790                 sin->sin_addr.s_addr = ifa->ifa_local;
791                 goto rarok;
792
793         case SIOCGIFBRDADDR:    /* Get the broadcast address */
794                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
795                 goto rarok;
796
797         case SIOCGIFDSTADDR:    /* Get the destination address */
798                 sin->sin_addr.s_addr = ifa->ifa_address;
799                 goto rarok;
800
801         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
802                 sin->sin_addr.s_addr = ifa->ifa_mask;
803                 goto rarok;
804
805         case SIOCSIFFLAGS:
806                 if (colon) {
807                         ret = -EADDRNOTAVAIL;
808                         if (!ifa)
809                                 break;
810                         ret = 0;
811                         if (!(ifr.ifr_flags & IFF_UP))
812                                 inet_del_ifa(in_dev, ifap, 1);
813                         break;
814                 }
815                 ret = dev_change_flags(dev, ifr.ifr_flags);
816                 break;
817
818         case SIOCSIFADDR:       /* Set interface address (and family) */
819                 ret = -EINVAL;
820                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
821                         break;
822
823                 if (!ifa) {
824                         ret = -ENOBUFS;
825                         ifa = inet_alloc_ifa();
826                         if (!ifa)
827                                 break;
828                         INIT_HLIST_NODE(&ifa->hash);
829                         if (colon)
830                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
831                         else
832                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
833                 } else {
834                         ret = 0;
835                         if (ifa->ifa_local == sin->sin_addr.s_addr)
836                                 break;
837                         inet_del_ifa(in_dev, ifap, 0);
838                         ifa->ifa_broadcast = 0;
839                         ifa->ifa_scope = 0;
840                 }
841
842                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
843
844                 if (!(dev->flags & IFF_POINTOPOINT)) {
845                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
846                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
847                         if ((dev->flags & IFF_BROADCAST) &&
848                             ifa->ifa_prefixlen < 31)
849                                 ifa->ifa_broadcast = ifa->ifa_address |
850                                                      ~ifa->ifa_mask;
851                 } else {
852                         ifa->ifa_prefixlen = 32;
853                         ifa->ifa_mask = inet_make_mask(32);
854                 }
855                 ret = inet_set_ifa(dev, ifa);
856                 break;
857
858         case SIOCSIFBRDADDR:    /* Set the broadcast address */
859                 ret = 0;
860                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
861                         inet_del_ifa(in_dev, ifap, 0);
862                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
863                         inet_insert_ifa(ifa);
864                 }
865                 break;
866
867         case SIOCSIFDSTADDR:    /* Set the destination address */
868                 ret = 0;
869                 if (ifa->ifa_address == sin->sin_addr.s_addr)
870                         break;
871                 ret = -EINVAL;
872                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
873                         break;
874                 ret = 0;
875                 inet_del_ifa(in_dev, ifap, 0);
876                 ifa->ifa_address = sin->sin_addr.s_addr;
877                 inet_insert_ifa(ifa);
878                 break;
879
880         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
881
882                 /*
883                  *      The mask we set must be legal.
884                  */
885                 ret = -EINVAL;
886                 if (bad_mask(sin->sin_addr.s_addr, 0))
887                         break;
888                 ret = 0;
889                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
890                         __be32 old_mask = ifa->ifa_mask;
891                         inet_del_ifa(in_dev, ifap, 0);
892                         ifa->ifa_mask = sin->sin_addr.s_addr;
893                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
894
895                         /* See if current broadcast address matches
896                          * with current netmask, then recalculate
897                          * the broadcast address. Otherwise it's a
898                          * funny address, so don't touch it since
899                          * the user seems to know what (s)he's doing...
900                          */
901                         if ((dev->flags & IFF_BROADCAST) &&
902                             (ifa->ifa_prefixlen < 31) &&
903                             (ifa->ifa_broadcast ==
904                              (ifa->ifa_local|~old_mask))) {
905                                 ifa->ifa_broadcast = (ifa->ifa_local |
906                                                       ~sin->sin_addr.s_addr);
907                         }
908                         inet_insert_ifa(ifa);
909                 }
910                 break;
911         }
912 done:
913         rtnl_unlock();
914 out:
915         return ret;
916 rarok:
917         rtnl_unlock();
918         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
919         goto out;
920 }
921
922 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
923 {
924         struct in_device *in_dev = __in_dev_get_rtnl(dev);
925         struct in_ifaddr *ifa;
926         struct ifreq ifr;
927         int done = 0;
928
929         if (!in_dev)
930                 goto out;
931
932         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
933                 if (!buf) {
934                         done += sizeof(ifr);
935                         continue;
936                 }
937                 if (len < (int) sizeof(ifr))
938                         break;
939                 memset(&ifr, 0, sizeof(struct ifreq));
940                 if (ifa->ifa_label)
941                         strcpy(ifr.ifr_name, ifa->ifa_label);
942                 else
943                         strcpy(ifr.ifr_name, dev->name);
944
945                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
946                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
947                                                                 ifa->ifa_local;
948
949                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
950                         done = -EFAULT;
951                         break;
952                 }
953                 buf  += sizeof(struct ifreq);
954                 len  -= sizeof(struct ifreq);
955                 done += sizeof(struct ifreq);
956         }
957 out:
958         return done;
959 }
960
961 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
962 {
963         __be32 addr = 0;
964         struct in_device *in_dev;
965         struct net *net = dev_net(dev);
966
967         rcu_read_lock();
968         in_dev = __in_dev_get_rcu(dev);
969         if (!in_dev)
970                 goto no_in_dev;
971
972         for_primary_ifa(in_dev) {
973                 if (ifa->ifa_scope > scope)
974                         continue;
975                 if (!dst || inet_ifa_match(dst, ifa)) {
976                         addr = ifa->ifa_local;
977                         break;
978                 }
979                 if (!addr)
980                         addr = ifa->ifa_local;
981         } endfor_ifa(in_dev);
982
983         if (addr)
984                 goto out_unlock;
985 no_in_dev:
986
987         /* Not loopback addresses on loopback should be preferred
988            in this case. It is importnat that lo is the first interface
989            in dev_base list.
990          */
991         for_each_netdev_rcu(net, dev) {
992                 in_dev = __in_dev_get_rcu(dev);
993                 if (!in_dev)
994                         continue;
995
996                 for_primary_ifa(in_dev) {
997                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
998                             ifa->ifa_scope <= scope) {
999                                 addr = ifa->ifa_local;
1000                                 goto out_unlock;
1001                         }
1002                 } endfor_ifa(in_dev);
1003         }
1004 out_unlock:
1005         rcu_read_unlock();
1006         return addr;
1007 }
1008 EXPORT_SYMBOL(inet_select_addr);
1009
1010 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1011                               __be32 local, int scope)
1012 {
1013         int same = 0;
1014         __be32 addr = 0;
1015
1016         for_ifa(in_dev) {
1017                 if (!addr &&
1018                     (local == ifa->ifa_local || !local) &&
1019                     ifa->ifa_scope <= scope) {
1020                         addr = ifa->ifa_local;
1021                         if (same)
1022                                 break;
1023                 }
1024                 if (!same) {
1025                         same = (!local || inet_ifa_match(local, ifa)) &&
1026                                 (!dst || inet_ifa_match(dst, ifa));
1027                         if (same && addr) {
1028                                 if (local || !dst)
1029                                         break;
1030                                 /* Is the selected addr into dst subnet? */
1031                                 if (inet_ifa_match(addr, ifa))
1032                                         break;
1033                                 /* No, then can we use new local src? */
1034                                 if (ifa->ifa_scope <= scope) {
1035                                         addr = ifa->ifa_local;
1036                                         break;
1037                                 }
1038                                 /* search for large dst subnet for addr */
1039                                 same = 0;
1040                         }
1041                 }
1042         } endfor_ifa(in_dev);
1043
1044         return same ? addr : 0;
1045 }
1046
1047 /*
1048  * Confirm that local IP address exists using wildcards:
1049  * - in_dev: only on this interface, 0=any interface
1050  * - dst: only in the same subnet as dst, 0=any dst
1051  * - local: address, 0=autoselect the local address
1052  * - scope: maximum allowed scope value for the local address
1053  */
1054 __be32 inet_confirm_addr(struct in_device *in_dev,
1055                          __be32 dst, __be32 local, int scope)
1056 {
1057         __be32 addr = 0;
1058         struct net_device *dev;
1059         struct net *net;
1060
1061         if (scope != RT_SCOPE_LINK)
1062                 return confirm_addr_indev(in_dev, dst, local, scope);
1063
1064         net = dev_net(in_dev->dev);
1065         rcu_read_lock();
1066         for_each_netdev_rcu(net, dev) {
1067                 in_dev = __in_dev_get_rcu(dev);
1068                 if (in_dev) {
1069                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1070                         if (addr)
1071                                 break;
1072                 }
1073         }
1074         rcu_read_unlock();
1075
1076         return addr;
1077 }
1078 EXPORT_SYMBOL(inet_confirm_addr);
1079
1080 /*
1081  *      Device notifier
1082  */
1083
1084 int register_inetaddr_notifier(struct notifier_block *nb)
1085 {
1086         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1087 }
1088 EXPORT_SYMBOL(register_inetaddr_notifier);
1089
1090 int unregister_inetaddr_notifier(struct notifier_block *nb)
1091 {
1092         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1093 }
1094 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1095
1096 /* Rename ifa_labels for a device name change. Make some effort to preserve
1097  * existing alias numbering and to create unique labels if possible.
1098 */
1099 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1100 {
1101         struct in_ifaddr *ifa;
1102         int named = 0;
1103
1104         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1105                 char old[IFNAMSIZ], *dot;
1106
1107                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1108                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1109                 if (named++ == 0)
1110                         goto skip;
1111                 dot = strchr(old, ':');
1112                 if (dot == NULL) {
1113                         sprintf(old, ":%d", named);
1114                         dot = old;
1115                 }
1116                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1117                         strcat(ifa->ifa_label, dot);
1118                 else
1119                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1120 skip:
1121                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1122         }
1123 }
1124
1125 static bool inetdev_valid_mtu(unsigned int mtu)
1126 {
1127         return mtu >= 68;
1128 }
1129
1130 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1131                                         struct in_device *in_dev)
1132
1133 {
1134         struct in_ifaddr *ifa;
1135
1136         for (ifa = in_dev->ifa_list; ifa;
1137              ifa = ifa->ifa_next) {
1138                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1139                          ifa->ifa_local, dev,
1140                          ifa->ifa_local, NULL,
1141                          dev->dev_addr, NULL);
1142         }
1143 }
1144
1145 /* Called only under RTNL semaphore */
1146
1147 static int inetdev_event(struct notifier_block *this, unsigned long event,
1148                          void *ptr)
1149 {
1150         struct net_device *dev = ptr;
1151         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1152
1153         ASSERT_RTNL();
1154
1155         if (!in_dev) {
1156                 if (event == NETDEV_REGISTER) {
1157                         in_dev = inetdev_init(dev);
1158                         if (!in_dev)
1159                                 return notifier_from_errno(-ENOMEM);
1160                         if (dev->flags & IFF_LOOPBACK) {
1161                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1162                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1163                         }
1164                 } else if (event == NETDEV_CHANGEMTU) {
1165                         /* Re-enabling IP */
1166                         if (inetdev_valid_mtu(dev->mtu))
1167                                 in_dev = inetdev_init(dev);
1168                 }
1169                 goto out;
1170         }
1171
1172         switch (event) {
1173         case NETDEV_REGISTER:
1174                 pr_debug("%s: bug\n", __func__);
1175                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1176                 break;
1177         case NETDEV_UP:
1178                 if (!inetdev_valid_mtu(dev->mtu))
1179                         break;
1180                 if (dev->flags & IFF_LOOPBACK) {
1181                         struct in_ifaddr *ifa = inet_alloc_ifa();
1182
1183                         if (ifa) {
1184                                 INIT_HLIST_NODE(&ifa->hash);
1185                                 ifa->ifa_local =
1186                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1187                                 ifa->ifa_prefixlen = 8;
1188                                 ifa->ifa_mask = inet_make_mask(8);
1189                                 in_dev_hold(in_dev);
1190                                 ifa->ifa_dev = in_dev;
1191                                 ifa->ifa_scope = RT_SCOPE_HOST;
1192                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1193                                 inet_insert_ifa(ifa);
1194                         }
1195                 }
1196                 ip_mc_up(in_dev);
1197                 /* fall through */
1198         case NETDEV_CHANGEADDR:
1199                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1200                         break;
1201                 /* fall through */
1202         case NETDEV_NOTIFY_PEERS:
1203                 /* Send gratuitous ARP to notify of link change */
1204                 inetdev_send_gratuitous_arp(dev, in_dev);
1205                 break;
1206         case NETDEV_DOWN:
1207                 ip_mc_down(in_dev);
1208                 break;
1209         case NETDEV_PRE_TYPE_CHANGE:
1210                 ip_mc_unmap(in_dev);
1211                 break;
1212         case NETDEV_POST_TYPE_CHANGE:
1213                 ip_mc_remap(in_dev);
1214                 break;
1215         case NETDEV_CHANGEMTU:
1216                 if (inetdev_valid_mtu(dev->mtu))
1217                         break;
1218                 /* disable IP when MTU is not enough */
1219         case NETDEV_UNREGISTER:
1220                 inetdev_destroy(in_dev);
1221                 break;
1222         case NETDEV_CHANGENAME:
1223                 /* Do not notify about label change, this event is
1224                  * not interesting to applications using netlink.
1225                  */
1226                 inetdev_changename(dev, in_dev);
1227
1228                 devinet_sysctl_unregister(in_dev);
1229                 devinet_sysctl_register(in_dev);
1230                 break;
1231         }
1232 out:
1233         return NOTIFY_DONE;
1234 }
1235
1236 static struct notifier_block ip_netdev_notifier = {
1237         .notifier_call = inetdev_event,
1238 };
1239
1240 static size_t inet_nlmsg_size(void)
1241 {
1242         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1243                + nla_total_size(4) /* IFA_ADDRESS */
1244                + nla_total_size(4) /* IFA_LOCAL */
1245                + nla_total_size(4) /* IFA_BROADCAST */
1246                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1247 }
1248
1249 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1250                             u32 portid, u32 seq, int event, unsigned int flags)
1251 {
1252         struct ifaddrmsg *ifm;
1253         struct nlmsghdr  *nlh;
1254
1255         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1256         if (nlh == NULL)
1257                 return -EMSGSIZE;
1258
1259         ifm = nlmsg_data(nlh);
1260         ifm->ifa_family = AF_INET;
1261         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1262         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1263         ifm->ifa_scope = ifa->ifa_scope;
1264         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1265
1266         if ((ifa->ifa_address &&
1267              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1268             (ifa->ifa_local &&
1269              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1270             (ifa->ifa_broadcast &&
1271              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1272             (ifa->ifa_label[0] &&
1273              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1274                 goto nla_put_failure;
1275
1276         return nlmsg_end(skb, nlh);
1277
1278 nla_put_failure:
1279         nlmsg_cancel(skb, nlh);
1280         return -EMSGSIZE;
1281 }
1282
1283 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1284 {
1285         struct net *net = sock_net(skb->sk);
1286         int h, s_h;
1287         int idx, s_idx;
1288         int ip_idx, s_ip_idx;
1289         struct net_device *dev;
1290         struct in_device *in_dev;
1291         struct in_ifaddr *ifa;
1292         struct hlist_head *head;
1293         struct hlist_node *node;
1294
1295         s_h = cb->args[0];
1296         s_idx = idx = cb->args[1];
1297         s_ip_idx = ip_idx = cb->args[2];
1298
1299         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1300                 idx = 0;
1301                 head = &net->dev_index_head[h];
1302                 rcu_read_lock();
1303                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1304                         if (idx < s_idx)
1305                                 goto cont;
1306                         if (h > s_h || idx > s_idx)
1307                                 s_ip_idx = 0;
1308                         in_dev = __in_dev_get_rcu(dev);
1309                         if (!in_dev)
1310                                 goto cont;
1311
1312                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1313                              ifa = ifa->ifa_next, ip_idx++) {
1314                                 if (ip_idx < s_ip_idx)
1315                                         continue;
1316                                 if (inet_fill_ifaddr(skb, ifa,
1317                                              NETLINK_CB(cb->skb).portid,
1318                                              cb->nlh->nlmsg_seq,
1319                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1320                                         rcu_read_unlock();
1321                                         goto done;
1322                                 }
1323                         }
1324 cont:
1325                         idx++;
1326                 }
1327                 rcu_read_unlock();
1328         }
1329
1330 done:
1331         cb->args[0] = h;
1332         cb->args[1] = idx;
1333         cb->args[2] = ip_idx;
1334
1335         return skb->len;
1336 }
1337
1338 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1339                       u32 portid)
1340 {
1341         struct sk_buff *skb;
1342         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1343         int err = -ENOBUFS;
1344         struct net *net;
1345
1346         net = dev_net(ifa->ifa_dev->dev);
1347         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1348         if (skb == NULL)
1349                 goto errout;
1350
1351         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1352         if (err < 0) {
1353                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1354                 WARN_ON(err == -EMSGSIZE);
1355                 kfree_skb(skb);
1356                 goto errout;
1357         }
1358         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1359         return;
1360 errout:
1361         if (err < 0)
1362                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1363 }
1364
1365 static size_t inet_get_link_af_size(const struct net_device *dev)
1366 {
1367         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1368
1369         if (!in_dev)
1370                 return 0;
1371
1372         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1373 }
1374
1375 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1376 {
1377         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1378         struct nlattr *nla;
1379         int i;
1380
1381         if (!in_dev)
1382                 return -ENODATA;
1383
1384         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1385         if (nla == NULL)
1386                 return -EMSGSIZE;
1387
1388         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1389                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1390
1391         return 0;
1392 }
1393
1394 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1395         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1396 };
1397
1398 static int inet_validate_link_af(const struct net_device *dev,
1399                                  const struct nlattr *nla)
1400 {
1401         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1402         int err, rem;
1403
1404         if (dev && !__in_dev_get_rtnl(dev))
1405                 return -EAFNOSUPPORT;
1406
1407         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1408         if (err < 0)
1409                 return err;
1410
1411         if (tb[IFLA_INET_CONF]) {
1412                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1413                         int cfgid = nla_type(a);
1414
1415                         if (nla_len(a) < 4)
1416                                 return -EINVAL;
1417
1418                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1419                                 return -EINVAL;
1420                 }
1421         }
1422
1423         return 0;
1424 }
1425
1426 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1427 {
1428         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1429         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1430         int rem;
1431
1432         if (!in_dev)
1433                 return -EAFNOSUPPORT;
1434
1435         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1436                 BUG();
1437
1438         if (tb[IFLA_INET_CONF]) {
1439                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1440                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1441         }
1442
1443         return 0;
1444 }
1445
1446 static int inet_netconf_msgsize_devconf(int type)
1447 {
1448         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1449                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1450
1451         /* type -1 is used for ALL */
1452         if (type == -1 || type == NETCONFA_FORWARDING)
1453                 size += nla_total_size(4);
1454         if (type == -1 || type == NETCONFA_RP_FILTER)
1455                 size += nla_total_size(4);
1456         if (type == -1 || type == NETCONFA_MC_FORWARDING)
1457                 size += nla_total_size(4);
1458
1459         return size;
1460 }
1461
1462 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1463                                      struct ipv4_devconf *devconf, u32 portid,
1464                                      u32 seq, int event, unsigned int flags,
1465                                      int type)
1466 {
1467         struct nlmsghdr  *nlh;
1468         struct netconfmsg *ncm;
1469
1470         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1471                         flags);
1472         if (nlh == NULL)
1473                 return -EMSGSIZE;
1474
1475         ncm = nlmsg_data(nlh);
1476         ncm->ncm_family = AF_INET;
1477
1478         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1479                 goto nla_put_failure;
1480
1481         /* type -1 is used for ALL */
1482         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1483             nla_put_s32(skb, NETCONFA_FORWARDING,
1484                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1485                 goto nla_put_failure;
1486         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1487             nla_put_s32(skb, NETCONFA_RP_FILTER,
1488                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1489                 goto nla_put_failure;
1490         if ((type == -1 || type == NETCONFA_MC_FORWARDING) &&
1491             nla_put_s32(skb, NETCONFA_MC_FORWARDING,
1492                         IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
1493                 goto nla_put_failure;
1494
1495         return nlmsg_end(skb, nlh);
1496
1497 nla_put_failure:
1498         nlmsg_cancel(skb, nlh);
1499         return -EMSGSIZE;
1500 }
1501
1502 void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1503                                  struct ipv4_devconf *devconf)
1504 {
1505         struct sk_buff *skb;
1506         int err = -ENOBUFS;
1507
1508         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1509         if (skb == NULL)
1510                 goto errout;
1511
1512         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1513                                         RTM_NEWNETCONF, 0, type);
1514         if (err < 0) {
1515                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1516                 WARN_ON(err == -EMSGSIZE);
1517                 kfree_skb(skb);
1518                 goto errout;
1519         }
1520         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1521         return;
1522 errout:
1523         if (err < 0)
1524                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1525 }
1526
1527 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1528         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1529         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1530         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1531 };
1532
1533 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1534                                     struct nlmsghdr *nlh,
1535                                     void *arg)
1536 {
1537         struct net *net = sock_net(in_skb->sk);
1538         struct nlattr *tb[NETCONFA_MAX+1];
1539         struct netconfmsg *ncm;
1540         struct sk_buff *skb;
1541         struct ipv4_devconf *devconf;
1542         struct in_device *in_dev;
1543         struct net_device *dev;
1544         int ifindex;
1545         int err;
1546
1547         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1548                           devconf_ipv4_policy);
1549         if (err < 0)
1550                 goto errout;
1551
1552         err = EINVAL;
1553         if (!tb[NETCONFA_IFINDEX])
1554                 goto errout;
1555
1556         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1557         switch (ifindex) {
1558         case NETCONFA_IFINDEX_ALL:
1559                 devconf = net->ipv4.devconf_all;
1560                 break;
1561         case NETCONFA_IFINDEX_DEFAULT:
1562                 devconf = net->ipv4.devconf_dflt;
1563                 break;
1564         default:
1565                 dev = __dev_get_by_index(net, ifindex);
1566                 if (dev == NULL)
1567                         goto errout;
1568                 in_dev = __in_dev_get_rtnl(dev);
1569                 if (in_dev == NULL)
1570                         goto errout;
1571                 devconf = &in_dev->cnf;
1572                 break;
1573         }
1574
1575         err = -ENOBUFS;
1576         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1577         if (skb == NULL)
1578                 goto errout;
1579
1580         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1581                                         NETLINK_CB(in_skb).portid,
1582                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1583                                         -1);
1584         if (err < 0) {
1585                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1586                 WARN_ON(err == -EMSGSIZE);
1587                 kfree_skb(skb);
1588                 goto errout;
1589         }
1590         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1591 errout:
1592         return err;
1593 }
1594
1595 #ifdef CONFIG_SYSCTL
1596
1597 static void devinet_copy_dflt_conf(struct net *net, int i)
1598 {
1599         struct net_device *dev;
1600
1601         rcu_read_lock();
1602         for_each_netdev_rcu(net, dev) {
1603                 struct in_device *in_dev;
1604
1605                 in_dev = __in_dev_get_rcu(dev);
1606                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1607                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1608         }
1609         rcu_read_unlock();
1610 }
1611
1612 /* called with RTNL locked */
1613 static void inet_forward_change(struct net *net)
1614 {
1615         struct net_device *dev;
1616         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1617
1618         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1619         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1620         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1621                                     NETCONFA_IFINDEX_ALL,
1622                                     net->ipv4.devconf_all);
1623         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1624                                     NETCONFA_IFINDEX_DEFAULT,
1625                                     net->ipv4.devconf_dflt);
1626
1627         for_each_netdev(net, dev) {
1628                 struct in_device *in_dev;
1629                 if (on)
1630                         dev_disable_lro(dev);
1631                 rcu_read_lock();
1632                 in_dev = __in_dev_get_rcu(dev);
1633                 if (in_dev) {
1634                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1635                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1636                                                     dev->ifindex, &in_dev->cnf);
1637                 }
1638                 rcu_read_unlock();
1639         }
1640 }
1641
1642 static int devinet_conf_proc(ctl_table *ctl, int write,
1643                              void __user *buffer,
1644                              size_t *lenp, loff_t *ppos)
1645 {
1646         int old_value = *(int *)ctl->data;
1647         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1648         int new_value = *(int *)ctl->data;
1649
1650         if (write) {
1651                 struct ipv4_devconf *cnf = ctl->extra1;
1652                 struct net *net = ctl->extra2;
1653                 int i = (int *)ctl->data - cnf->data;
1654
1655                 set_bit(i, cnf->state);
1656
1657                 if (cnf == net->ipv4.devconf_dflt)
1658                         devinet_copy_dflt_conf(net, i);
1659                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1660                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1661                         if ((new_value == 0) && (old_value != 0))
1662                                 rt_cache_flush(net);
1663                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1664                     new_value != old_value) {
1665                         int ifindex;
1666
1667                         if (cnf == net->ipv4.devconf_dflt)
1668                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1669                         else if (cnf == net->ipv4.devconf_all)
1670                                 ifindex = NETCONFA_IFINDEX_ALL;
1671                         else {
1672                                 struct in_device *idev =
1673                                         container_of(cnf, struct in_device,
1674                                                      cnf);
1675                                 ifindex = idev->dev->ifindex;
1676                         }
1677                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1678                                                     ifindex, cnf);
1679                 }
1680         }
1681
1682         return ret;
1683 }
1684
1685 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1686                                   void __user *buffer,
1687                                   size_t *lenp, loff_t *ppos)
1688 {
1689         int *valp = ctl->data;
1690         int val = *valp;
1691         loff_t pos = *ppos;
1692         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1693
1694         if (write && *valp != val) {
1695                 struct net *net = ctl->extra2;
1696
1697                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1698                         if (!rtnl_trylock()) {
1699                                 /* Restore the original values before restarting */
1700                                 *valp = val;
1701                                 *ppos = pos;
1702                                 return restart_syscall();
1703                         }
1704                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1705                                 inet_forward_change(net);
1706                         } else {
1707                                 struct ipv4_devconf *cnf = ctl->extra1;
1708                                 struct in_device *idev =
1709                                         container_of(cnf, struct in_device, cnf);
1710                                 if (*valp)
1711                                         dev_disable_lro(idev->dev);
1712                                 inet_netconf_notify_devconf(net,
1713                                                             NETCONFA_FORWARDING,
1714                                                             idev->dev->ifindex,
1715                                                             cnf);
1716                         }
1717                         rtnl_unlock();
1718                         rt_cache_flush(net);
1719                 } else
1720                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1721                                                     NETCONFA_IFINDEX_DEFAULT,
1722                                                     net->ipv4.devconf_dflt);
1723         }
1724
1725         return ret;
1726 }
1727
1728 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1729                                 void __user *buffer,
1730                                 size_t *lenp, loff_t *ppos)
1731 {
1732         int *valp = ctl->data;
1733         int val = *valp;
1734         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1735         struct net *net = ctl->extra2;
1736
1737         if (write && *valp != val)
1738                 rt_cache_flush(net);
1739
1740         return ret;
1741 }
1742
1743 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1744         { \
1745                 .procname       = name, \
1746                 .data           = ipv4_devconf.data + \
1747                                   IPV4_DEVCONF_ ## attr - 1, \
1748                 .maxlen         = sizeof(int), \
1749                 .mode           = mval, \
1750                 .proc_handler   = proc, \
1751                 .extra1         = &ipv4_devconf, \
1752         }
1753
1754 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1755         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1756
1757 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1758         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1759
1760 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1761         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1762
1763 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1764         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1765
1766 static struct devinet_sysctl_table {
1767         struct ctl_table_header *sysctl_header;
1768         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1769 } devinet_sysctl = {
1770         .devinet_vars = {
1771                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1772                                              devinet_sysctl_forward),
1773                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1774
1775                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1776                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1777                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1778                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1779                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1780                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1781                                         "accept_source_route"),
1782                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1783                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1784                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1785                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1786                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1787                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1788                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1789                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1790                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1791                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1792                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1793                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1794                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1795
1796                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1797                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1798                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1799                                               "force_igmp_version"),
1800                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1801                                               "promote_secondaries"),
1802                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1803                                               "route_localnet"),
1804         },
1805 };
1806
1807 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1808                                         struct ipv4_devconf *p)
1809 {
1810         int i;
1811         struct devinet_sysctl_table *t;
1812         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1813
1814         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1815         if (!t)
1816                 goto out;
1817
1818         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1819                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1820                 t->devinet_vars[i].extra1 = p;
1821                 t->devinet_vars[i].extra2 = net;
1822         }
1823
1824         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1825
1826         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1827         if (!t->sysctl_header)
1828                 goto free;
1829
1830         p->sysctl = t;
1831         return 0;
1832
1833 free:
1834         kfree(t);
1835 out:
1836         return -ENOBUFS;
1837 }
1838
1839 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1840 {
1841         struct devinet_sysctl_table *t = cnf->sysctl;
1842
1843         if (t == NULL)
1844                 return;
1845
1846         cnf->sysctl = NULL;
1847         unregister_net_sysctl_table(t->sysctl_header);
1848         kfree(t);
1849 }
1850
1851 static void devinet_sysctl_register(struct in_device *idev)
1852 {
1853         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1854         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1855                                         &idev->cnf);
1856 }
1857
1858 static void devinet_sysctl_unregister(struct in_device *idev)
1859 {
1860         __devinet_sysctl_unregister(&idev->cnf);
1861         neigh_sysctl_unregister(idev->arp_parms);
1862 }
1863
1864 static struct ctl_table ctl_forward_entry[] = {
1865         {
1866                 .procname       = "ip_forward",
1867                 .data           = &ipv4_devconf.data[
1868                                         IPV4_DEVCONF_FORWARDING - 1],
1869                 .maxlen         = sizeof(int),
1870                 .mode           = 0644,
1871                 .proc_handler   = devinet_sysctl_forward,
1872                 .extra1         = &ipv4_devconf,
1873                 .extra2         = &init_net,
1874         },
1875         { },
1876 };
1877 #endif
1878
1879 static __net_init int devinet_init_net(struct net *net)
1880 {
1881         int err;
1882         struct ipv4_devconf *all, *dflt;
1883 #ifdef CONFIG_SYSCTL
1884         struct ctl_table *tbl = ctl_forward_entry;
1885         struct ctl_table_header *forw_hdr;
1886 #endif
1887
1888         err = -ENOMEM;
1889         all = &ipv4_devconf;
1890         dflt = &ipv4_devconf_dflt;
1891
1892         if (!net_eq(net, &init_net)) {
1893                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1894                 if (all == NULL)
1895                         goto err_alloc_all;
1896
1897                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1898                 if (dflt == NULL)
1899                         goto err_alloc_dflt;
1900
1901 #ifdef CONFIG_SYSCTL
1902                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1903                 if (tbl == NULL)
1904                         goto err_alloc_ctl;
1905
1906                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1907                 tbl[0].extra1 = all;
1908                 tbl[0].extra2 = net;
1909 #endif
1910         }
1911
1912 #ifdef CONFIG_SYSCTL
1913         err = __devinet_sysctl_register(net, "all", all);
1914         if (err < 0)
1915                 goto err_reg_all;
1916
1917         err = __devinet_sysctl_register(net, "default", dflt);
1918         if (err < 0)
1919                 goto err_reg_dflt;
1920
1921         err = -ENOMEM;
1922         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1923         if (forw_hdr == NULL)
1924                 goto err_reg_ctl;
1925         net->ipv4.forw_hdr = forw_hdr;
1926 #endif
1927
1928         net->ipv4.devconf_all = all;
1929         net->ipv4.devconf_dflt = dflt;
1930         return 0;
1931
1932 #ifdef CONFIG_SYSCTL
1933 err_reg_ctl:
1934         __devinet_sysctl_unregister(dflt);
1935 err_reg_dflt:
1936         __devinet_sysctl_unregister(all);
1937 err_reg_all:
1938         if (tbl != ctl_forward_entry)
1939                 kfree(tbl);
1940 err_alloc_ctl:
1941 #endif
1942         if (dflt != &ipv4_devconf_dflt)
1943                 kfree(dflt);
1944 err_alloc_dflt:
1945         if (all != &ipv4_devconf)
1946                 kfree(all);
1947 err_alloc_all:
1948         return err;
1949 }
1950
1951 static __net_exit void devinet_exit_net(struct net *net)
1952 {
1953 #ifdef CONFIG_SYSCTL
1954         struct ctl_table *tbl;
1955
1956         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1957         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1958         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1959         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1960         kfree(tbl);
1961 #endif
1962         kfree(net->ipv4.devconf_dflt);
1963         kfree(net->ipv4.devconf_all);
1964 }
1965
1966 static __net_initdata struct pernet_operations devinet_ops = {
1967         .init = devinet_init_net,
1968         .exit = devinet_exit_net,
1969 };
1970
1971 static struct rtnl_af_ops inet_af_ops = {
1972         .family           = AF_INET,
1973         .fill_link_af     = inet_fill_link_af,
1974         .get_link_af_size = inet_get_link_af_size,
1975         .validate_link_af = inet_validate_link_af,
1976         .set_link_af      = inet_set_link_af,
1977 };
1978
1979 void __init devinet_init(void)
1980 {
1981         int i;
1982
1983         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1984                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1985
1986         register_pernet_subsys(&devinet_ops);
1987
1988         register_gifconf(PF_INET, inet_gifconf);
1989         register_netdevice_notifier(&ip_netdev_notifier);
1990
1991         rtnl_af_register(&inet_af_ops);
1992
1993         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1994         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1995         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1996         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
1997                       NULL, NULL);
1998 }
1999