]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 #include "fib_lookup.h"
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75         },
76 };
77
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79         .data = {
80                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85         },
86 };
87
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92         [IFA_LOCAL]             = { .type = NLA_U32 },
93         [IFA_ADDRESS]           = { .type = NLA_U32 },
94         [IFA_BROADCAST]         = { .type = NLA_U32 },
95         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97
98 #define IN4_ADDR_HSIZE_SHIFT    8
99 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
100
101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104 static u32 inet_addr_hash(struct net *net, __be32 addr)
105 {
106         u32 val = (__force u32) addr ^ net_hash_mix(net);
107
108         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
109 }
110
111 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
112 {
113         u32 hash = inet_addr_hash(net, ifa->ifa_local);
114
115         spin_lock(&inet_addr_hash_lock);
116         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
117         spin_unlock(&inet_addr_hash_lock);
118 }
119
120 static void inet_hash_remove(struct in_ifaddr *ifa)
121 {
122         spin_lock(&inet_addr_hash_lock);
123         hlist_del_init_rcu(&ifa->hash);
124         spin_unlock(&inet_addr_hash_lock);
125 }
126
127 /**
128  * __ip_dev_find - find the first device with a given source address.
129  * @net: the net namespace
130  * @addr: the source address
131  * @devref: if true, take a reference on the found device
132  *
133  * If a caller uses devref=false, it should be protected by RCU, or RTNL
134  */
135 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
136 {
137         u32 hash = inet_addr_hash(net, addr);
138         struct net_device *result = NULL;
139         struct in_ifaddr *ifa;
140         struct hlist_node *node;
141
142         rcu_read_lock();
143         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
144                 if (ifa->ifa_local == addr) {
145                         struct net_device *dev = ifa->ifa_dev->dev;
146
147                         if (!net_eq(dev_net(dev), net))
148                                 continue;
149                         result = dev;
150                         break;
151                 }
152         }
153         if (!result) {
154                 struct flowi4 fl4 = { .daddr = addr };
155                 struct fib_result res = { 0 };
156                 struct fib_table *local;
157
158                 /* Fallback to FIB local table so that communication
159                  * over loopback subnets work.
160                  */
161                 local = fib_get_table(net, RT_TABLE_LOCAL);
162                 if (local &&
163                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
164                     res.type == RTN_LOCAL)
165                         result = FIB_RES_DEV(res);
166         }
167         if (result && devref)
168                 dev_hold(result);
169         rcu_read_unlock();
170         return result;
171 }
172 EXPORT_SYMBOL(__ip_dev_find);
173
174 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
175
176 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
177 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
178                          int destroy);
179 #ifdef CONFIG_SYSCTL
180 static void devinet_sysctl_register(struct in_device *idev);
181 static void devinet_sysctl_unregister(struct in_device *idev);
182 #else
183 static void devinet_sysctl_register(struct in_device *idev)
184 {
185 }
186 static void devinet_sysctl_unregister(struct in_device *idev)
187 {
188 }
189 #endif
190
191 /* Locks all the inet devices. */
192
193 static struct in_ifaddr *inet_alloc_ifa(void)
194 {
195         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
196 }
197
198 static void inet_rcu_free_ifa(struct rcu_head *head)
199 {
200         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
201         if (ifa->ifa_dev)
202                 in_dev_put(ifa->ifa_dev);
203         kfree(ifa);
204 }
205
206 static void inet_free_ifa(struct in_ifaddr *ifa)
207 {
208         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
209 }
210
211 void in_dev_finish_destroy(struct in_device *idev)
212 {
213         struct net_device *dev = idev->dev;
214
215         WARN_ON(idev->ifa_list);
216         WARN_ON(idev->mc_list);
217 #ifdef NET_REFCNT_DEBUG
218         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
219 #endif
220         dev_put(dev);
221         if (!idev->dead)
222                 pr_err("Freeing alive in_device %p\n", idev);
223         else
224                 kfree(idev);
225 }
226 EXPORT_SYMBOL(in_dev_finish_destroy);
227
228 static struct in_device *inetdev_init(struct net_device *dev)
229 {
230         struct in_device *in_dev;
231
232         ASSERT_RTNL();
233
234         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
235         if (!in_dev)
236                 goto out;
237         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
238                         sizeof(in_dev->cnf));
239         in_dev->cnf.sysctl = NULL;
240         in_dev->dev = dev;
241         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
242         if (!in_dev->arp_parms)
243                 goto out_kfree;
244         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
245                 dev_disable_lro(dev);
246         /* Reference in_dev->dev */
247         dev_hold(dev);
248         /* Account for reference dev->ip_ptr (below) */
249         in_dev_hold(in_dev);
250
251         devinet_sysctl_register(in_dev);
252         ip_mc_init_dev(in_dev);
253         if (dev->flags & IFF_UP)
254                 ip_mc_up(in_dev);
255
256         /* we can receive as soon as ip_ptr is set -- do this last */
257         rcu_assign_pointer(dev->ip_ptr, in_dev);
258 out:
259         return in_dev;
260 out_kfree:
261         kfree(in_dev);
262         in_dev = NULL;
263         goto out;
264 }
265
266 static void in_dev_rcu_put(struct rcu_head *head)
267 {
268         struct in_device *idev = container_of(head, struct in_device, rcu_head);
269         in_dev_put(idev);
270 }
271
272 static void inetdev_destroy(struct in_device *in_dev)
273 {
274         struct in_ifaddr *ifa;
275         struct net_device *dev;
276
277         ASSERT_RTNL();
278
279         dev = in_dev->dev;
280
281         in_dev->dead = 1;
282
283         ip_mc_destroy_dev(in_dev);
284
285         while ((ifa = in_dev->ifa_list) != NULL) {
286                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
287                 inet_free_ifa(ifa);
288         }
289
290         RCU_INIT_POINTER(dev->ip_ptr, NULL);
291
292         devinet_sysctl_unregister(in_dev);
293         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
294         arp_ifdown(dev);
295
296         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
297 }
298
299 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
300 {
301         rcu_read_lock();
302         for_primary_ifa(in_dev) {
303                 if (inet_ifa_match(a, ifa)) {
304                         if (!b || inet_ifa_match(b, ifa)) {
305                                 rcu_read_unlock();
306                                 return 1;
307                         }
308                 }
309         } endfor_ifa(in_dev);
310         rcu_read_unlock();
311         return 0;
312 }
313
314 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
315                          int destroy, struct nlmsghdr *nlh, u32 portid)
316 {
317         struct in_ifaddr *promote = NULL;
318         struct in_ifaddr *ifa, *ifa1 = *ifap;
319         struct in_ifaddr *last_prim = in_dev->ifa_list;
320         struct in_ifaddr *prev_prom = NULL;
321         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
322
323         ASSERT_RTNL();
324
325         /* 1. Deleting primary ifaddr forces deletion all secondaries
326          * unless alias promotion is set
327          **/
328
329         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
330                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
331
332                 while ((ifa = *ifap1) != NULL) {
333                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
334                             ifa1->ifa_scope <= ifa->ifa_scope)
335                                 last_prim = ifa;
336
337                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
338                             ifa1->ifa_mask != ifa->ifa_mask ||
339                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
340                                 ifap1 = &ifa->ifa_next;
341                                 prev_prom = ifa;
342                                 continue;
343                         }
344
345                         if (!do_promote) {
346                                 inet_hash_remove(ifa);
347                                 *ifap1 = ifa->ifa_next;
348
349                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
350                                 blocking_notifier_call_chain(&inetaddr_chain,
351                                                 NETDEV_DOWN, ifa);
352                                 inet_free_ifa(ifa);
353                         } else {
354                                 promote = ifa;
355                                 break;
356                         }
357                 }
358         }
359
360         /* On promotion all secondaries from subnet are changing
361          * the primary IP, we must remove all their routes silently
362          * and later to add them back with new prefsrc. Do this
363          * while all addresses are on the device list.
364          */
365         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
366                 if (ifa1->ifa_mask == ifa->ifa_mask &&
367                     inet_ifa_match(ifa1->ifa_address, ifa))
368                         fib_del_ifaddr(ifa, ifa1);
369         }
370
371         /* 2. Unlink it */
372
373         *ifap = ifa1->ifa_next;
374         inet_hash_remove(ifa1);
375
376         /* 3. Announce address deletion */
377
378         /* Send message first, then call notifier.
379            At first sight, FIB update triggered by notifier
380            will refer to already deleted ifaddr, that could confuse
381            netlink listeners. It is not true: look, gated sees
382            that route deleted and if it still thinks that ifaddr
383            is valid, it will try to restore deleted routes... Grr.
384            So that, this order is correct.
385          */
386         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
387         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
388
389         if (promote) {
390                 struct in_ifaddr *next_sec = promote->ifa_next;
391
392                 if (prev_prom) {
393                         prev_prom->ifa_next = promote->ifa_next;
394                         promote->ifa_next = last_prim->ifa_next;
395                         last_prim->ifa_next = promote;
396                 }
397
398                 promote->ifa_flags &= ~IFA_F_SECONDARY;
399                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
400                 blocking_notifier_call_chain(&inetaddr_chain,
401                                 NETDEV_UP, promote);
402                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
403                         if (ifa1->ifa_mask != ifa->ifa_mask ||
404                             !inet_ifa_match(ifa1->ifa_address, ifa))
405                                         continue;
406                         fib_add_ifaddr(ifa);
407                 }
408
409         }
410         if (destroy)
411                 inet_free_ifa(ifa1);
412 }
413
414 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
415                          int destroy)
416 {
417         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
418 }
419
420 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
421                              u32 portid)
422 {
423         struct in_device *in_dev = ifa->ifa_dev;
424         struct in_ifaddr *ifa1, **ifap, **last_primary;
425
426         ASSERT_RTNL();
427
428         if (!ifa->ifa_local) {
429                 inet_free_ifa(ifa);
430                 return 0;
431         }
432
433         ifa->ifa_flags &= ~IFA_F_SECONDARY;
434         last_primary = &in_dev->ifa_list;
435
436         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
437              ifap = &ifa1->ifa_next) {
438                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
439                     ifa->ifa_scope <= ifa1->ifa_scope)
440                         last_primary = &ifa1->ifa_next;
441                 if (ifa1->ifa_mask == ifa->ifa_mask &&
442                     inet_ifa_match(ifa1->ifa_address, ifa)) {
443                         if (ifa1->ifa_local == ifa->ifa_local) {
444                                 inet_free_ifa(ifa);
445                                 return -EEXIST;
446                         }
447                         if (ifa1->ifa_scope != ifa->ifa_scope) {
448                                 inet_free_ifa(ifa);
449                                 return -EINVAL;
450                         }
451                         ifa->ifa_flags |= IFA_F_SECONDARY;
452                 }
453         }
454
455         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
456                 net_srandom(ifa->ifa_local);
457                 ifap = last_primary;
458         }
459
460         ifa->ifa_next = *ifap;
461         *ifap = ifa;
462
463         inet_hash_insert(dev_net(in_dev->dev), ifa);
464
465         /* Send message first, then call notifier.
466            Notifier will trigger FIB update, so that
467            listeners of netlink will know about new ifaddr */
468         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
469         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
470
471         return 0;
472 }
473
474 static int inet_insert_ifa(struct in_ifaddr *ifa)
475 {
476         return __inet_insert_ifa(ifa, NULL, 0);
477 }
478
479 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
480 {
481         struct in_device *in_dev = __in_dev_get_rtnl(dev);
482
483         ASSERT_RTNL();
484
485         if (!in_dev) {
486                 inet_free_ifa(ifa);
487                 return -ENOBUFS;
488         }
489         ipv4_devconf_setall(in_dev);
490         if (ifa->ifa_dev != in_dev) {
491                 WARN_ON(ifa->ifa_dev);
492                 in_dev_hold(in_dev);
493                 ifa->ifa_dev = in_dev;
494         }
495         if (ipv4_is_loopback(ifa->ifa_local))
496                 ifa->ifa_scope = RT_SCOPE_HOST;
497         return inet_insert_ifa(ifa);
498 }
499
500 /* Caller must hold RCU or RTNL :
501  * We dont take a reference on found in_device
502  */
503 struct in_device *inetdev_by_index(struct net *net, int ifindex)
504 {
505         struct net_device *dev;
506         struct in_device *in_dev = NULL;
507
508         rcu_read_lock();
509         dev = dev_get_by_index_rcu(net, ifindex);
510         if (dev)
511                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
512         rcu_read_unlock();
513         return in_dev;
514 }
515 EXPORT_SYMBOL(inetdev_by_index);
516
517 /* Called only from RTNL semaphored context. No locks. */
518
519 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
520                                     __be32 mask)
521 {
522         ASSERT_RTNL();
523
524         for_primary_ifa(in_dev) {
525                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
526                         return ifa;
527         } endfor_ifa(in_dev);
528         return NULL;
529 }
530
531 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
532 {
533         struct net *net = sock_net(skb->sk);
534         struct nlattr *tb[IFA_MAX+1];
535         struct in_device *in_dev;
536         struct ifaddrmsg *ifm;
537         struct in_ifaddr *ifa, **ifap;
538         int err = -EINVAL;
539
540         ASSERT_RTNL();
541
542         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
543         if (err < 0)
544                 goto errout;
545
546         ifm = nlmsg_data(nlh);
547         in_dev = inetdev_by_index(net, ifm->ifa_index);
548         if (in_dev == NULL) {
549                 err = -ENODEV;
550                 goto errout;
551         }
552
553         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
554              ifap = &ifa->ifa_next) {
555                 if (tb[IFA_LOCAL] &&
556                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
557                         continue;
558
559                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
560                         continue;
561
562                 if (tb[IFA_ADDRESS] &&
563                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
564                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
565                         continue;
566
567                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
568                 return 0;
569         }
570
571         err = -EADDRNOTAVAIL;
572 errout:
573         return err;
574 }
575
576 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
577 {
578         struct nlattr *tb[IFA_MAX+1];
579         struct in_ifaddr *ifa;
580         struct ifaddrmsg *ifm;
581         struct net_device *dev;
582         struct in_device *in_dev;
583         int err;
584
585         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
586         if (err < 0)
587                 goto errout;
588
589         ifm = nlmsg_data(nlh);
590         err = -EINVAL;
591         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
592                 goto errout;
593
594         dev = __dev_get_by_index(net, ifm->ifa_index);
595         err = -ENODEV;
596         if (dev == NULL)
597                 goto errout;
598
599         in_dev = __in_dev_get_rtnl(dev);
600         err = -ENOBUFS;
601         if (in_dev == NULL)
602                 goto errout;
603
604         ifa = inet_alloc_ifa();
605         if (ifa == NULL)
606                 /*
607                  * A potential indev allocation can be left alive, it stays
608                  * assigned to its device and is destroy with it.
609                  */
610                 goto errout;
611
612         ipv4_devconf_setall(in_dev);
613         in_dev_hold(in_dev);
614
615         if (tb[IFA_ADDRESS] == NULL)
616                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
617
618         INIT_HLIST_NODE(&ifa->hash);
619         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
620         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
621         ifa->ifa_flags = ifm->ifa_flags;
622         ifa->ifa_scope = ifm->ifa_scope;
623         ifa->ifa_dev = in_dev;
624
625         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
626         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
627
628         if (tb[IFA_BROADCAST])
629                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
630
631         if (tb[IFA_LABEL])
632                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
633         else
634                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
635
636         return ifa;
637
638 errout:
639         return ERR_PTR(err);
640 }
641
642 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
643 {
644         struct net *net = sock_net(skb->sk);
645         struct in_ifaddr *ifa;
646
647         ASSERT_RTNL();
648
649         ifa = rtm_to_ifaddr(net, nlh);
650         if (IS_ERR(ifa))
651                 return PTR_ERR(ifa);
652
653         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
654 }
655
656 /*
657  *      Determine a default network mask, based on the IP address.
658  */
659
660 static int inet_abc_len(__be32 addr)
661 {
662         int rc = -1;    /* Something else, probably a multicast. */
663
664         if (ipv4_is_zeronet(addr))
665                 rc = 0;
666         else {
667                 __u32 haddr = ntohl(addr);
668
669                 if (IN_CLASSA(haddr))
670                         rc = 8;
671                 else if (IN_CLASSB(haddr))
672                         rc = 16;
673                 else if (IN_CLASSC(haddr))
674                         rc = 24;
675         }
676
677         return rc;
678 }
679
680
681 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
682 {
683         struct ifreq ifr;
684         struct sockaddr_in sin_orig;
685         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
686         struct in_device *in_dev;
687         struct in_ifaddr **ifap = NULL;
688         struct in_ifaddr *ifa = NULL;
689         struct net_device *dev;
690         char *colon;
691         int ret = -EFAULT;
692         int tryaddrmatch = 0;
693
694         /*
695          *      Fetch the caller's info block into kernel space
696          */
697
698         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
699                 goto out;
700         ifr.ifr_name[IFNAMSIZ - 1] = 0;
701
702         /* save original address for comparison */
703         memcpy(&sin_orig, sin, sizeof(*sin));
704
705         colon = strchr(ifr.ifr_name, ':');
706         if (colon)
707                 *colon = 0;
708
709         dev_load(net, ifr.ifr_name);
710
711         switch (cmd) {
712         case SIOCGIFADDR:       /* Get interface address */
713         case SIOCGIFBRDADDR:    /* Get the broadcast address */
714         case SIOCGIFDSTADDR:    /* Get the destination address */
715         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
716                 /* Note that these ioctls will not sleep,
717                    so that we do not impose a lock.
718                    One day we will be forced to put shlock here (I mean SMP)
719                  */
720                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
721                 memset(sin, 0, sizeof(*sin));
722                 sin->sin_family = AF_INET;
723                 break;
724
725         case SIOCSIFFLAGS:
726                 ret = -EPERM;
727                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
728                         goto out;
729                 break;
730         case SIOCSIFADDR:       /* Set interface address (and family) */
731         case SIOCSIFBRDADDR:    /* Set the broadcast address */
732         case SIOCSIFDSTADDR:    /* Set the destination address */
733         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
734                 ret = -EPERM;
735                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
736                         goto out;
737                 ret = -EINVAL;
738                 if (sin->sin_family != AF_INET)
739                         goto out;
740                 break;
741         default:
742                 ret = -EINVAL;
743                 goto out;
744         }
745
746         rtnl_lock();
747
748         ret = -ENODEV;
749         dev = __dev_get_by_name(net, ifr.ifr_name);
750         if (!dev)
751                 goto done;
752
753         if (colon)
754                 *colon = ':';
755
756         in_dev = __in_dev_get_rtnl(dev);
757         if (in_dev) {
758                 if (tryaddrmatch) {
759                         /* Matthias Andree */
760                         /* compare label and address (4.4BSD style) */
761                         /* note: we only do this for a limited set of ioctls
762                            and only if the original address family was AF_INET.
763                            This is checked above. */
764                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
765                              ifap = &ifa->ifa_next) {
766                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
767                                     sin_orig.sin_addr.s_addr ==
768                                                         ifa->ifa_local) {
769                                         break; /* found */
770                                 }
771                         }
772                 }
773                 /* we didn't get a match, maybe the application is
774                    4.3BSD-style and passed in junk so we fall back to
775                    comparing just the label */
776                 if (!ifa) {
777                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
778                              ifap = &ifa->ifa_next)
779                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
780                                         break;
781                 }
782         }
783
784         ret = -EADDRNOTAVAIL;
785         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
786                 goto done;
787
788         switch (cmd) {
789         case SIOCGIFADDR:       /* Get interface address */
790                 sin->sin_addr.s_addr = ifa->ifa_local;
791                 goto rarok;
792
793         case SIOCGIFBRDADDR:    /* Get the broadcast address */
794                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
795                 goto rarok;
796
797         case SIOCGIFDSTADDR:    /* Get the destination address */
798                 sin->sin_addr.s_addr = ifa->ifa_address;
799                 goto rarok;
800
801         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
802                 sin->sin_addr.s_addr = ifa->ifa_mask;
803                 goto rarok;
804
805         case SIOCSIFFLAGS:
806                 if (colon) {
807                         ret = -EADDRNOTAVAIL;
808                         if (!ifa)
809                                 break;
810                         ret = 0;
811                         if (!(ifr.ifr_flags & IFF_UP))
812                                 inet_del_ifa(in_dev, ifap, 1);
813                         break;
814                 }
815                 ret = dev_change_flags(dev, ifr.ifr_flags);
816                 break;
817
818         case SIOCSIFADDR:       /* Set interface address (and family) */
819                 ret = -EINVAL;
820                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
821                         break;
822
823                 if (!ifa) {
824                         ret = -ENOBUFS;
825                         ifa = inet_alloc_ifa();
826                         INIT_HLIST_NODE(&ifa->hash);
827                         if (!ifa)
828                                 break;
829                         if (colon)
830                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
831                         else
832                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
833                 } else {
834                         ret = 0;
835                         if (ifa->ifa_local == sin->sin_addr.s_addr)
836                                 break;
837                         inet_del_ifa(in_dev, ifap, 0);
838                         ifa->ifa_broadcast = 0;
839                         ifa->ifa_scope = 0;
840                 }
841
842                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
843
844                 if (!(dev->flags & IFF_POINTOPOINT)) {
845                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
846                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
847                         if ((dev->flags & IFF_BROADCAST) &&
848                             ifa->ifa_prefixlen < 31)
849                                 ifa->ifa_broadcast = ifa->ifa_address |
850                                                      ~ifa->ifa_mask;
851                 } else {
852                         ifa->ifa_prefixlen = 32;
853                         ifa->ifa_mask = inet_make_mask(32);
854                 }
855                 ret = inet_set_ifa(dev, ifa);
856                 break;
857
858         case SIOCSIFBRDADDR:    /* Set the broadcast address */
859                 ret = 0;
860                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
861                         inet_del_ifa(in_dev, ifap, 0);
862                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
863                         inet_insert_ifa(ifa);
864                 }
865                 break;
866
867         case SIOCSIFDSTADDR:    /* Set the destination address */
868                 ret = 0;
869                 if (ifa->ifa_address == sin->sin_addr.s_addr)
870                         break;
871                 ret = -EINVAL;
872                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
873                         break;
874                 ret = 0;
875                 inet_del_ifa(in_dev, ifap, 0);
876                 ifa->ifa_address = sin->sin_addr.s_addr;
877                 inet_insert_ifa(ifa);
878                 break;
879
880         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
881
882                 /*
883                  *      The mask we set must be legal.
884                  */
885                 ret = -EINVAL;
886                 if (bad_mask(sin->sin_addr.s_addr, 0))
887                         break;
888                 ret = 0;
889                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
890                         __be32 old_mask = ifa->ifa_mask;
891                         inet_del_ifa(in_dev, ifap, 0);
892                         ifa->ifa_mask = sin->sin_addr.s_addr;
893                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
894
895                         /* See if current broadcast address matches
896                          * with current netmask, then recalculate
897                          * the broadcast address. Otherwise it's a
898                          * funny address, so don't touch it since
899                          * the user seems to know what (s)he's doing...
900                          */
901                         if ((dev->flags & IFF_BROADCAST) &&
902                             (ifa->ifa_prefixlen < 31) &&
903                             (ifa->ifa_broadcast ==
904                              (ifa->ifa_local|~old_mask))) {
905                                 ifa->ifa_broadcast = (ifa->ifa_local |
906                                                       ~sin->sin_addr.s_addr);
907                         }
908                         inet_insert_ifa(ifa);
909                 }
910                 break;
911         }
912 done:
913         rtnl_unlock();
914 out:
915         return ret;
916 rarok:
917         rtnl_unlock();
918         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
919         goto out;
920 }
921
922 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
923 {
924         struct in_device *in_dev = __in_dev_get_rtnl(dev);
925         struct in_ifaddr *ifa;
926         struct ifreq ifr;
927         int done = 0;
928
929         if (!in_dev)
930                 goto out;
931
932         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
933                 if (!buf) {
934                         done += sizeof(ifr);
935                         continue;
936                 }
937                 if (len < (int) sizeof(ifr))
938                         break;
939                 memset(&ifr, 0, sizeof(struct ifreq));
940                 if (ifa->ifa_label)
941                         strcpy(ifr.ifr_name, ifa->ifa_label);
942                 else
943                         strcpy(ifr.ifr_name, dev->name);
944
945                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
946                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
947                                                                 ifa->ifa_local;
948
949                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
950                         done = -EFAULT;
951                         break;
952                 }
953                 buf  += sizeof(struct ifreq);
954                 len  -= sizeof(struct ifreq);
955                 done += sizeof(struct ifreq);
956         }
957 out:
958         return done;
959 }
960
961 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
962 {
963         __be32 addr = 0;
964         struct in_device *in_dev;
965         struct net *net = dev_net(dev);
966
967         rcu_read_lock();
968         in_dev = __in_dev_get_rcu(dev);
969         if (!in_dev)
970                 goto no_in_dev;
971
972         for_primary_ifa(in_dev) {
973                 if (ifa->ifa_scope > scope)
974                         continue;
975                 if (!dst || inet_ifa_match(dst, ifa)) {
976                         addr = ifa->ifa_local;
977                         break;
978                 }
979                 if (!addr)
980                         addr = ifa->ifa_local;
981         } endfor_ifa(in_dev);
982
983         if (addr)
984                 goto out_unlock;
985 no_in_dev:
986
987         /* Not loopback addresses on loopback should be preferred
988            in this case. It is importnat that lo is the first interface
989            in dev_base list.
990          */
991         for_each_netdev_rcu(net, dev) {
992                 in_dev = __in_dev_get_rcu(dev);
993                 if (!in_dev)
994                         continue;
995
996                 for_primary_ifa(in_dev) {
997                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
998                             ifa->ifa_scope <= scope) {
999                                 addr = ifa->ifa_local;
1000                                 goto out_unlock;
1001                         }
1002                 } endfor_ifa(in_dev);
1003         }
1004 out_unlock:
1005         rcu_read_unlock();
1006         return addr;
1007 }
1008 EXPORT_SYMBOL(inet_select_addr);
1009
1010 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1011                               __be32 local, int scope)
1012 {
1013         int same = 0;
1014         __be32 addr = 0;
1015
1016         for_ifa(in_dev) {
1017                 if (!addr &&
1018                     (local == ifa->ifa_local || !local) &&
1019                     ifa->ifa_scope <= scope) {
1020                         addr = ifa->ifa_local;
1021                         if (same)
1022                                 break;
1023                 }
1024                 if (!same) {
1025                         same = (!local || inet_ifa_match(local, ifa)) &&
1026                                 (!dst || inet_ifa_match(dst, ifa));
1027                         if (same && addr) {
1028                                 if (local || !dst)
1029                                         break;
1030                                 /* Is the selected addr into dst subnet? */
1031                                 if (inet_ifa_match(addr, ifa))
1032                                         break;
1033                                 /* No, then can we use new local src? */
1034                                 if (ifa->ifa_scope <= scope) {
1035                                         addr = ifa->ifa_local;
1036                                         break;
1037                                 }
1038                                 /* search for large dst subnet for addr */
1039                                 same = 0;
1040                         }
1041                 }
1042         } endfor_ifa(in_dev);
1043
1044         return same ? addr : 0;
1045 }
1046
1047 /*
1048  * Confirm that local IP address exists using wildcards:
1049  * - in_dev: only on this interface, 0=any interface
1050  * - dst: only in the same subnet as dst, 0=any dst
1051  * - local: address, 0=autoselect the local address
1052  * - scope: maximum allowed scope value for the local address
1053  */
1054 __be32 inet_confirm_addr(struct in_device *in_dev,
1055                          __be32 dst, __be32 local, int scope)
1056 {
1057         __be32 addr = 0;
1058         struct net_device *dev;
1059         struct net *net;
1060
1061         if (scope != RT_SCOPE_LINK)
1062                 return confirm_addr_indev(in_dev, dst, local, scope);
1063
1064         net = dev_net(in_dev->dev);
1065         rcu_read_lock();
1066         for_each_netdev_rcu(net, dev) {
1067                 in_dev = __in_dev_get_rcu(dev);
1068                 if (in_dev) {
1069                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1070                         if (addr)
1071                                 break;
1072                 }
1073         }
1074         rcu_read_unlock();
1075
1076         return addr;
1077 }
1078 EXPORT_SYMBOL(inet_confirm_addr);
1079
1080 /*
1081  *      Device notifier
1082  */
1083
1084 int register_inetaddr_notifier(struct notifier_block *nb)
1085 {
1086         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1087 }
1088 EXPORT_SYMBOL(register_inetaddr_notifier);
1089
1090 int unregister_inetaddr_notifier(struct notifier_block *nb)
1091 {
1092         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1093 }
1094 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1095
1096 /* Rename ifa_labels for a device name change. Make some effort to preserve
1097  * existing alias numbering and to create unique labels if possible.
1098 */
1099 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1100 {
1101         struct in_ifaddr *ifa;
1102         int named = 0;
1103
1104         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1105                 char old[IFNAMSIZ], *dot;
1106
1107                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1108                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1109                 if (named++ == 0)
1110                         goto skip;
1111                 dot = strchr(old, ':');
1112                 if (dot == NULL) {
1113                         sprintf(old, ":%d", named);
1114                         dot = old;
1115                 }
1116                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1117                         strcat(ifa->ifa_label, dot);
1118                 else
1119                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1120 skip:
1121                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1122         }
1123 }
1124
1125 static bool inetdev_valid_mtu(unsigned int mtu)
1126 {
1127         return mtu >= 68;
1128 }
1129
1130 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1131                                         struct in_device *in_dev)
1132
1133 {
1134         struct in_ifaddr *ifa;
1135
1136         for (ifa = in_dev->ifa_list; ifa;
1137              ifa = ifa->ifa_next) {
1138                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1139                          ifa->ifa_local, dev,
1140                          ifa->ifa_local, NULL,
1141                          dev->dev_addr, NULL);
1142         }
1143 }
1144
1145 /* Called only under RTNL semaphore */
1146
1147 static int inetdev_event(struct notifier_block *this, unsigned long event,
1148                          void *ptr)
1149 {
1150         struct net_device *dev = ptr;
1151         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1152
1153         ASSERT_RTNL();
1154
1155         if (!in_dev) {
1156                 if (event == NETDEV_REGISTER) {
1157                         in_dev = inetdev_init(dev);
1158                         if (!in_dev)
1159                                 return notifier_from_errno(-ENOMEM);
1160                         if (dev->flags & IFF_LOOPBACK) {
1161                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1162                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1163                         }
1164                 } else if (event == NETDEV_CHANGEMTU) {
1165                         /* Re-enabling IP */
1166                         if (inetdev_valid_mtu(dev->mtu))
1167                                 in_dev = inetdev_init(dev);
1168                 }
1169                 goto out;
1170         }
1171
1172         switch (event) {
1173         case NETDEV_REGISTER:
1174                 pr_debug("%s: bug\n", __func__);
1175                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1176                 break;
1177         case NETDEV_UP:
1178                 if (!inetdev_valid_mtu(dev->mtu))
1179                         break;
1180                 if (dev->flags & IFF_LOOPBACK) {
1181                         struct in_ifaddr *ifa = inet_alloc_ifa();
1182
1183                         if (ifa) {
1184                                 INIT_HLIST_NODE(&ifa->hash);
1185                                 ifa->ifa_local =
1186                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1187                                 ifa->ifa_prefixlen = 8;
1188                                 ifa->ifa_mask = inet_make_mask(8);
1189                                 in_dev_hold(in_dev);
1190                                 ifa->ifa_dev = in_dev;
1191                                 ifa->ifa_scope = RT_SCOPE_HOST;
1192                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1193                                 inet_insert_ifa(ifa);
1194                         }
1195                 }
1196                 ip_mc_up(in_dev);
1197                 /* fall through */
1198         case NETDEV_CHANGEADDR:
1199                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1200                         break;
1201                 /* fall through */
1202         case NETDEV_NOTIFY_PEERS:
1203                 /* Send gratuitous ARP to notify of link change */
1204                 inetdev_send_gratuitous_arp(dev, in_dev);
1205                 break;
1206         case NETDEV_DOWN:
1207                 ip_mc_down(in_dev);
1208                 break;
1209         case NETDEV_PRE_TYPE_CHANGE:
1210                 ip_mc_unmap(in_dev);
1211                 break;
1212         case NETDEV_POST_TYPE_CHANGE:
1213                 ip_mc_remap(in_dev);
1214                 break;
1215         case NETDEV_CHANGEMTU:
1216                 if (inetdev_valid_mtu(dev->mtu))
1217                         break;
1218                 /* disable IP when MTU is not enough */
1219         case NETDEV_UNREGISTER:
1220                 inetdev_destroy(in_dev);
1221                 break;
1222         case NETDEV_CHANGENAME:
1223                 /* Do not notify about label change, this event is
1224                  * not interesting to applications using netlink.
1225                  */
1226                 inetdev_changename(dev, in_dev);
1227
1228                 devinet_sysctl_unregister(in_dev);
1229                 devinet_sysctl_register(in_dev);
1230                 break;
1231         }
1232 out:
1233         return NOTIFY_DONE;
1234 }
1235
1236 static struct notifier_block ip_netdev_notifier = {
1237         .notifier_call = inetdev_event,
1238 };
1239
1240 static size_t inet_nlmsg_size(void)
1241 {
1242         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1243                + nla_total_size(4) /* IFA_ADDRESS */
1244                + nla_total_size(4) /* IFA_LOCAL */
1245                + nla_total_size(4) /* IFA_BROADCAST */
1246                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1247 }
1248
1249 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1250                             u32 portid, u32 seq, int event, unsigned int flags)
1251 {
1252         struct ifaddrmsg *ifm;
1253         struct nlmsghdr  *nlh;
1254
1255         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1256         if (nlh == NULL)
1257                 return -EMSGSIZE;
1258
1259         ifm = nlmsg_data(nlh);
1260         ifm->ifa_family = AF_INET;
1261         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1262         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1263         ifm->ifa_scope = ifa->ifa_scope;
1264         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1265
1266         if ((ifa->ifa_address &&
1267              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1268             (ifa->ifa_local &&
1269              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1270             (ifa->ifa_broadcast &&
1271              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1272             (ifa->ifa_label[0] &&
1273              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1274                 goto nla_put_failure;
1275
1276         return nlmsg_end(skb, nlh);
1277
1278 nla_put_failure:
1279         nlmsg_cancel(skb, nlh);
1280         return -EMSGSIZE;
1281 }
1282
1283 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1284 {
1285         struct net *net = sock_net(skb->sk);
1286         int h, s_h;
1287         int idx, s_idx;
1288         int ip_idx, s_ip_idx;
1289         struct net_device *dev;
1290         struct in_device *in_dev;
1291         struct in_ifaddr *ifa;
1292         struct hlist_head *head;
1293         struct hlist_node *node;
1294
1295         s_h = cb->args[0];
1296         s_idx = idx = cb->args[1];
1297         s_ip_idx = ip_idx = cb->args[2];
1298
1299         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1300                 idx = 0;
1301                 head = &net->dev_index_head[h];
1302                 rcu_read_lock();
1303                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1304                         if (idx < s_idx)
1305                                 goto cont;
1306                         if (h > s_h || idx > s_idx)
1307                                 s_ip_idx = 0;
1308                         in_dev = __in_dev_get_rcu(dev);
1309                         if (!in_dev)
1310                                 goto cont;
1311
1312                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1313                              ifa = ifa->ifa_next, ip_idx++) {
1314                                 if (ip_idx < s_ip_idx)
1315                                         continue;
1316                                 if (inet_fill_ifaddr(skb, ifa,
1317                                              NETLINK_CB(cb->skb).portid,
1318                                              cb->nlh->nlmsg_seq,
1319                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1320                                         rcu_read_unlock();
1321                                         goto done;
1322                                 }
1323                         }
1324 cont:
1325                         idx++;
1326                 }
1327                 rcu_read_unlock();
1328         }
1329
1330 done:
1331         cb->args[0] = h;
1332         cb->args[1] = idx;
1333         cb->args[2] = ip_idx;
1334
1335         return skb->len;
1336 }
1337
1338 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1339                       u32 portid)
1340 {
1341         struct sk_buff *skb;
1342         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1343         int err = -ENOBUFS;
1344         struct net *net;
1345
1346         net = dev_net(ifa->ifa_dev->dev);
1347         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1348         if (skb == NULL)
1349                 goto errout;
1350
1351         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1352         if (err < 0) {
1353                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1354                 WARN_ON(err == -EMSGSIZE);
1355                 kfree_skb(skb);
1356                 goto errout;
1357         }
1358         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1359         return;
1360 errout:
1361         if (err < 0)
1362                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1363 }
1364
1365 static size_t inet_get_link_af_size(const struct net_device *dev)
1366 {
1367         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1368
1369         if (!in_dev)
1370                 return 0;
1371
1372         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1373 }
1374
1375 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1376 {
1377         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1378         struct nlattr *nla;
1379         int i;
1380
1381         if (!in_dev)
1382                 return -ENODATA;
1383
1384         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1385         if (nla == NULL)
1386                 return -EMSGSIZE;
1387
1388         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1389                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1390
1391         return 0;
1392 }
1393
1394 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1395         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1396 };
1397
1398 static int inet_validate_link_af(const struct net_device *dev,
1399                                  const struct nlattr *nla)
1400 {
1401         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1402         int err, rem;
1403
1404         if (dev && !__in_dev_get_rtnl(dev))
1405                 return -EAFNOSUPPORT;
1406
1407         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1408         if (err < 0)
1409                 return err;
1410
1411         if (tb[IFLA_INET_CONF]) {
1412                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1413                         int cfgid = nla_type(a);
1414
1415                         if (nla_len(a) < 4)
1416                                 return -EINVAL;
1417
1418                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1419                                 return -EINVAL;
1420                 }
1421         }
1422
1423         return 0;
1424 }
1425
1426 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1427 {
1428         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1429         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1430         int rem;
1431
1432         if (!in_dev)
1433                 return -EAFNOSUPPORT;
1434
1435         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1436                 BUG();
1437
1438         if (tb[IFLA_INET_CONF]) {
1439                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1440                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1441         }
1442
1443         return 0;
1444 }
1445
1446 static int inet_netconf_msgsize_devconf(int type)
1447 {
1448         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1449                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1450
1451         /* type -1 is used for ALL */
1452         if (type == -1 || type == NETCONFA_FORWARDING)
1453                 size += nla_total_size(4);
1454         if (type == -1 || type == NETCONFA_RP_FILTER)
1455                 size += nla_total_size(4);
1456
1457         return size;
1458 }
1459
1460 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1461                                      struct ipv4_devconf *devconf, u32 portid,
1462                                      u32 seq, int event, unsigned int flags,
1463                                      int type)
1464 {
1465         struct nlmsghdr  *nlh;
1466         struct netconfmsg *ncm;
1467
1468         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1469                         flags);
1470         if (nlh == NULL)
1471                 return -EMSGSIZE;
1472
1473         ncm = nlmsg_data(nlh);
1474         ncm->ncm_family = AF_INET;
1475
1476         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1477                 goto nla_put_failure;
1478
1479         /* type -1 is used for ALL */
1480         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1481             nla_put_s32(skb, NETCONFA_FORWARDING,
1482                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1483                 goto nla_put_failure;
1484         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1485             nla_put_s32(skb, NETCONFA_RP_FILTER,
1486                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1487                 goto nla_put_failure;
1488
1489         return nlmsg_end(skb, nlh);
1490
1491 nla_put_failure:
1492         nlmsg_cancel(skb, nlh);
1493         return -EMSGSIZE;
1494 }
1495
1496 static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1497                                         struct ipv4_devconf *devconf)
1498 {
1499         struct sk_buff *skb;
1500         int err = -ENOBUFS;
1501
1502         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1503         if (skb == NULL)
1504                 goto errout;
1505
1506         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1507                                         RTM_NEWNETCONF, 0, type);
1508         if (err < 0) {
1509                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1510                 WARN_ON(err == -EMSGSIZE);
1511                 kfree_skb(skb);
1512                 goto errout;
1513         }
1514         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1515         return;
1516 errout:
1517         if (err < 0)
1518                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1519 }
1520
1521 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1522         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1523         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1524         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1525 };
1526
1527 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1528                                     struct nlmsghdr *nlh,
1529                                     void *arg)
1530 {
1531         struct net *net = sock_net(in_skb->sk);
1532         struct nlattr *tb[NETCONFA_MAX+1];
1533         struct netconfmsg *ncm;
1534         struct sk_buff *skb;
1535         struct ipv4_devconf *devconf;
1536         struct in_device *in_dev;
1537         struct net_device *dev;
1538         int ifindex;
1539         int err;
1540
1541         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1542                           devconf_ipv4_policy);
1543         if (err < 0)
1544                 goto errout;
1545
1546         err = EINVAL;
1547         if (!tb[NETCONFA_IFINDEX])
1548                 goto errout;
1549
1550         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1551         switch (ifindex) {
1552         case NETCONFA_IFINDEX_ALL:
1553                 devconf = net->ipv4.devconf_all;
1554                 break;
1555         case NETCONFA_IFINDEX_DEFAULT:
1556                 devconf = net->ipv4.devconf_dflt;
1557                 break;
1558         default:
1559                 dev = __dev_get_by_index(net, ifindex);
1560                 if (dev == NULL)
1561                         goto errout;
1562                 in_dev = __in_dev_get_rtnl(dev);
1563                 if (in_dev == NULL)
1564                         goto errout;
1565                 devconf = &in_dev->cnf;
1566                 break;
1567         }
1568
1569         err = -ENOBUFS;
1570         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1571         if (skb == NULL)
1572                 goto errout;
1573
1574         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1575                                         NETLINK_CB(in_skb).portid,
1576                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1577                                         -1);
1578         if (err < 0) {
1579                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1580                 WARN_ON(err == -EMSGSIZE);
1581                 kfree_skb(skb);
1582                 goto errout;
1583         }
1584         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1585 errout:
1586         return err;
1587 }
1588
1589 #ifdef CONFIG_SYSCTL
1590
1591 static void devinet_copy_dflt_conf(struct net *net, int i)
1592 {
1593         struct net_device *dev;
1594
1595         rcu_read_lock();
1596         for_each_netdev_rcu(net, dev) {
1597                 struct in_device *in_dev;
1598
1599                 in_dev = __in_dev_get_rcu(dev);
1600                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1601                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1602         }
1603         rcu_read_unlock();
1604 }
1605
1606 /* called with RTNL locked */
1607 static void inet_forward_change(struct net *net)
1608 {
1609         struct net_device *dev;
1610         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1611
1612         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1613         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1614         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1615                                     NETCONFA_IFINDEX_ALL,
1616                                     net->ipv4.devconf_all);
1617         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1618                                     NETCONFA_IFINDEX_DEFAULT,
1619                                     net->ipv4.devconf_dflt);
1620
1621         for_each_netdev(net, dev) {
1622                 struct in_device *in_dev;
1623                 if (on)
1624                         dev_disable_lro(dev);
1625                 rcu_read_lock();
1626                 in_dev = __in_dev_get_rcu(dev);
1627                 if (in_dev) {
1628                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1629                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1630                                                     dev->ifindex, &in_dev->cnf);
1631                 }
1632                 rcu_read_unlock();
1633         }
1634 }
1635
1636 static int devinet_conf_proc(ctl_table *ctl, int write,
1637                              void __user *buffer,
1638                              size_t *lenp, loff_t *ppos)
1639 {
1640         int old_value = *(int *)ctl->data;
1641         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1642         int new_value = *(int *)ctl->data;
1643
1644         if (write) {
1645                 struct ipv4_devconf *cnf = ctl->extra1;
1646                 struct net *net = ctl->extra2;
1647                 int i = (int *)ctl->data - cnf->data;
1648
1649                 set_bit(i, cnf->state);
1650
1651                 if (cnf == net->ipv4.devconf_dflt)
1652                         devinet_copy_dflt_conf(net, i);
1653                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1654                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1655                         if ((new_value == 0) && (old_value != 0))
1656                                 rt_cache_flush(net);
1657                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1658                     new_value != old_value) {
1659                         int ifindex;
1660
1661                         if (cnf == net->ipv4.devconf_dflt)
1662                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1663                         else if (cnf == net->ipv4.devconf_all)
1664                                 ifindex = NETCONFA_IFINDEX_ALL;
1665                         else {
1666                                 struct in_device *idev =
1667                                         container_of(cnf, struct in_device,
1668                                                      cnf);
1669                                 ifindex = idev->dev->ifindex;
1670                         }
1671                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1672                                                     ifindex, cnf);
1673                 }
1674         }
1675
1676         return ret;
1677 }
1678
1679 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1680                                   void __user *buffer,
1681                                   size_t *lenp, loff_t *ppos)
1682 {
1683         int *valp = ctl->data;
1684         int val = *valp;
1685         loff_t pos = *ppos;
1686         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1687
1688         if (write && *valp != val) {
1689                 struct net *net = ctl->extra2;
1690
1691                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1692                         if (!rtnl_trylock()) {
1693                                 /* Restore the original values before restarting */
1694                                 *valp = val;
1695                                 *ppos = pos;
1696                                 return restart_syscall();
1697                         }
1698                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1699                                 inet_forward_change(net);
1700                         } else {
1701                                 struct ipv4_devconf *cnf = ctl->extra1;
1702                                 struct in_device *idev =
1703                                         container_of(cnf, struct in_device, cnf);
1704                                 if (*valp)
1705                                         dev_disable_lro(idev->dev);
1706                                 inet_netconf_notify_devconf(net,
1707                                                             NETCONFA_FORWARDING,
1708                                                             idev->dev->ifindex,
1709                                                             cnf);
1710                         }
1711                         rtnl_unlock();
1712                         rt_cache_flush(net);
1713                 } else
1714                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1715                                                     NETCONFA_IFINDEX_DEFAULT,
1716                                                     net->ipv4.devconf_dflt);
1717         }
1718
1719         return ret;
1720 }
1721
1722 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1723                                 void __user *buffer,
1724                                 size_t *lenp, loff_t *ppos)
1725 {
1726         int *valp = ctl->data;
1727         int val = *valp;
1728         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1729         struct net *net = ctl->extra2;
1730
1731         if (write && *valp != val)
1732                 rt_cache_flush(net);
1733
1734         return ret;
1735 }
1736
1737 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1738         { \
1739                 .procname       = name, \
1740                 .data           = ipv4_devconf.data + \
1741                                   IPV4_DEVCONF_ ## attr - 1, \
1742                 .maxlen         = sizeof(int), \
1743                 .mode           = mval, \
1744                 .proc_handler   = proc, \
1745                 .extra1         = &ipv4_devconf, \
1746         }
1747
1748 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1749         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1750
1751 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1752         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1753
1754 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1755         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1756
1757 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1758         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1759
1760 static struct devinet_sysctl_table {
1761         struct ctl_table_header *sysctl_header;
1762         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1763 } devinet_sysctl = {
1764         .devinet_vars = {
1765                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1766                                              devinet_sysctl_forward),
1767                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1768
1769                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1770                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1771                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1772                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1773                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1774                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1775                                         "accept_source_route"),
1776                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1777                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1778                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1779                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1780                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1781                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1782                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1783                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1784                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1785                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1786                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1787                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1788                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1789
1790                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1791                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1792                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1793                                               "force_igmp_version"),
1794                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1795                                               "promote_secondaries"),
1796                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1797                                               "route_localnet"),
1798         },
1799 };
1800
1801 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1802                                         struct ipv4_devconf *p)
1803 {
1804         int i;
1805         struct devinet_sysctl_table *t;
1806         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1807
1808         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1809         if (!t)
1810                 goto out;
1811
1812         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1813                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1814                 t->devinet_vars[i].extra1 = p;
1815                 t->devinet_vars[i].extra2 = net;
1816         }
1817
1818         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1819
1820         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1821         if (!t->sysctl_header)
1822                 goto free;
1823
1824         p->sysctl = t;
1825         return 0;
1826
1827 free:
1828         kfree(t);
1829 out:
1830         return -ENOBUFS;
1831 }
1832
1833 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1834 {
1835         struct devinet_sysctl_table *t = cnf->sysctl;
1836
1837         if (t == NULL)
1838                 return;
1839
1840         cnf->sysctl = NULL;
1841         unregister_net_sysctl_table(t->sysctl_header);
1842         kfree(t);
1843 }
1844
1845 static void devinet_sysctl_register(struct in_device *idev)
1846 {
1847         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1848         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1849                                         &idev->cnf);
1850 }
1851
1852 static void devinet_sysctl_unregister(struct in_device *idev)
1853 {
1854         __devinet_sysctl_unregister(&idev->cnf);
1855         neigh_sysctl_unregister(idev->arp_parms);
1856 }
1857
1858 static struct ctl_table ctl_forward_entry[] = {
1859         {
1860                 .procname       = "ip_forward",
1861                 .data           = &ipv4_devconf.data[
1862                                         IPV4_DEVCONF_FORWARDING - 1],
1863                 .maxlen         = sizeof(int),
1864                 .mode           = 0644,
1865                 .proc_handler   = devinet_sysctl_forward,
1866                 .extra1         = &ipv4_devconf,
1867                 .extra2         = &init_net,
1868         },
1869         { },
1870 };
1871 #endif
1872
1873 static __net_init int devinet_init_net(struct net *net)
1874 {
1875         int err;
1876         struct ipv4_devconf *all, *dflt;
1877 #ifdef CONFIG_SYSCTL
1878         struct ctl_table *tbl = ctl_forward_entry;
1879         struct ctl_table_header *forw_hdr;
1880 #endif
1881
1882         err = -ENOMEM;
1883         all = &ipv4_devconf;
1884         dflt = &ipv4_devconf_dflt;
1885
1886         if (!net_eq(net, &init_net)) {
1887                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1888                 if (all == NULL)
1889                         goto err_alloc_all;
1890
1891                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1892                 if (dflt == NULL)
1893                         goto err_alloc_dflt;
1894
1895 #ifdef CONFIG_SYSCTL
1896                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1897                 if (tbl == NULL)
1898                         goto err_alloc_ctl;
1899
1900                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1901                 tbl[0].extra1 = all;
1902                 tbl[0].extra2 = net;
1903 #endif
1904         }
1905
1906 #ifdef CONFIG_SYSCTL
1907         err = __devinet_sysctl_register(net, "all", all);
1908         if (err < 0)
1909                 goto err_reg_all;
1910
1911         err = __devinet_sysctl_register(net, "default", dflt);
1912         if (err < 0)
1913                 goto err_reg_dflt;
1914
1915         err = -ENOMEM;
1916         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1917         if (forw_hdr == NULL)
1918                 goto err_reg_ctl;
1919         net->ipv4.forw_hdr = forw_hdr;
1920 #endif
1921
1922         net->ipv4.devconf_all = all;
1923         net->ipv4.devconf_dflt = dflt;
1924         return 0;
1925
1926 #ifdef CONFIG_SYSCTL
1927 err_reg_ctl:
1928         __devinet_sysctl_unregister(dflt);
1929 err_reg_dflt:
1930         __devinet_sysctl_unregister(all);
1931 err_reg_all:
1932         if (tbl != ctl_forward_entry)
1933                 kfree(tbl);
1934 err_alloc_ctl:
1935 #endif
1936         if (dflt != &ipv4_devconf_dflt)
1937                 kfree(dflt);
1938 err_alloc_dflt:
1939         if (all != &ipv4_devconf)
1940                 kfree(all);
1941 err_alloc_all:
1942         return err;
1943 }
1944
1945 static __net_exit void devinet_exit_net(struct net *net)
1946 {
1947 #ifdef CONFIG_SYSCTL
1948         struct ctl_table *tbl;
1949
1950         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1951         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1952         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1953         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1954         kfree(tbl);
1955 #endif
1956         kfree(net->ipv4.devconf_dflt);
1957         kfree(net->ipv4.devconf_all);
1958 }
1959
1960 static __net_initdata struct pernet_operations devinet_ops = {
1961         .init = devinet_init_net,
1962         .exit = devinet_exit_net,
1963 };
1964
1965 static struct rtnl_af_ops inet_af_ops = {
1966         .family           = AF_INET,
1967         .fill_link_af     = inet_fill_link_af,
1968         .get_link_af_size = inet_get_link_af_size,
1969         .validate_link_af = inet_validate_link_af,
1970         .set_link_af      = inet_set_link_af,
1971 };
1972
1973 void __init devinet_init(void)
1974 {
1975         int i;
1976
1977         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1978                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1979
1980         register_pernet_subsys(&devinet_ops);
1981
1982         register_gifconf(PF_INET, inet_gifconf);
1983         register_netdevice_notifier(&ip_netdev_notifier);
1984
1985         rtnl_af_register(&inet_af_ops);
1986
1987         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1988         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1989         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1990         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
1991                       NULL, NULL);
1992 }
1993