]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
net: Push capable(CAP_NET_ADMIN) into the rtnl methods
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58 #include <linux/netconf.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 #include "fib_lookup.h"
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75         },
76 };
77
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79         .data = {
80                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85         },
86 };
87
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92         [IFA_LOCAL]             = { .type = NLA_U32 },
93         [IFA_ADDRESS]           = { .type = NLA_U32 },
94         [IFA_BROADCAST]         = { .type = NLA_U32 },
95         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97
98 #define IN4_ADDR_HSIZE_SHIFT    8
99 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
100
101 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
102 static DEFINE_SPINLOCK(inet_addr_hash_lock);
103
104 static u32 inet_addr_hash(struct net *net, __be32 addr)
105 {
106         u32 val = (__force u32) addr ^ net_hash_mix(net);
107
108         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
109 }
110
111 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
112 {
113         u32 hash = inet_addr_hash(net, ifa->ifa_local);
114
115         spin_lock(&inet_addr_hash_lock);
116         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
117         spin_unlock(&inet_addr_hash_lock);
118 }
119
120 static void inet_hash_remove(struct in_ifaddr *ifa)
121 {
122         spin_lock(&inet_addr_hash_lock);
123         hlist_del_init_rcu(&ifa->hash);
124         spin_unlock(&inet_addr_hash_lock);
125 }
126
127 /**
128  * __ip_dev_find - find the first device with a given source address.
129  * @net: the net namespace
130  * @addr: the source address
131  * @devref: if true, take a reference on the found device
132  *
133  * If a caller uses devref=false, it should be protected by RCU, or RTNL
134  */
135 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
136 {
137         u32 hash = inet_addr_hash(net, addr);
138         struct net_device *result = NULL;
139         struct in_ifaddr *ifa;
140         struct hlist_node *node;
141
142         rcu_read_lock();
143         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
144                 if (ifa->ifa_local == addr) {
145                         struct net_device *dev = ifa->ifa_dev->dev;
146
147                         if (!net_eq(dev_net(dev), net))
148                                 continue;
149                         result = dev;
150                         break;
151                 }
152         }
153         if (!result) {
154                 struct flowi4 fl4 = { .daddr = addr };
155                 struct fib_result res = { 0 };
156                 struct fib_table *local;
157
158                 /* Fallback to FIB local table so that communication
159                  * over loopback subnets work.
160                  */
161                 local = fib_get_table(net, RT_TABLE_LOCAL);
162                 if (local &&
163                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
164                     res.type == RTN_LOCAL)
165                         result = FIB_RES_DEV(res);
166         }
167         if (result && devref)
168                 dev_hold(result);
169         rcu_read_unlock();
170         return result;
171 }
172 EXPORT_SYMBOL(__ip_dev_find);
173
174 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
175
176 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
177 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
178                          int destroy);
179 #ifdef CONFIG_SYSCTL
180 static void devinet_sysctl_register(struct in_device *idev);
181 static void devinet_sysctl_unregister(struct in_device *idev);
182 #else
183 static void devinet_sysctl_register(struct in_device *idev)
184 {
185 }
186 static void devinet_sysctl_unregister(struct in_device *idev)
187 {
188 }
189 #endif
190
191 /* Locks all the inet devices. */
192
193 static struct in_ifaddr *inet_alloc_ifa(void)
194 {
195         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
196 }
197
198 static void inet_rcu_free_ifa(struct rcu_head *head)
199 {
200         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
201         if (ifa->ifa_dev)
202                 in_dev_put(ifa->ifa_dev);
203         kfree(ifa);
204 }
205
206 static void inet_free_ifa(struct in_ifaddr *ifa)
207 {
208         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
209 }
210
211 void in_dev_finish_destroy(struct in_device *idev)
212 {
213         struct net_device *dev = idev->dev;
214
215         WARN_ON(idev->ifa_list);
216         WARN_ON(idev->mc_list);
217 #ifdef NET_REFCNT_DEBUG
218         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
219 #endif
220         dev_put(dev);
221         if (!idev->dead)
222                 pr_err("Freeing alive in_device %p\n", idev);
223         else
224                 kfree(idev);
225 }
226 EXPORT_SYMBOL(in_dev_finish_destroy);
227
228 static struct in_device *inetdev_init(struct net_device *dev)
229 {
230         struct in_device *in_dev;
231
232         ASSERT_RTNL();
233
234         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
235         if (!in_dev)
236                 goto out;
237         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
238                         sizeof(in_dev->cnf));
239         in_dev->cnf.sysctl = NULL;
240         in_dev->dev = dev;
241         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
242         if (!in_dev->arp_parms)
243                 goto out_kfree;
244         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
245                 dev_disable_lro(dev);
246         /* Reference in_dev->dev */
247         dev_hold(dev);
248         /* Account for reference dev->ip_ptr (below) */
249         in_dev_hold(in_dev);
250
251         devinet_sysctl_register(in_dev);
252         ip_mc_init_dev(in_dev);
253         if (dev->flags & IFF_UP)
254                 ip_mc_up(in_dev);
255
256         /* we can receive as soon as ip_ptr is set -- do this last */
257         rcu_assign_pointer(dev->ip_ptr, in_dev);
258 out:
259         return in_dev;
260 out_kfree:
261         kfree(in_dev);
262         in_dev = NULL;
263         goto out;
264 }
265
266 static void in_dev_rcu_put(struct rcu_head *head)
267 {
268         struct in_device *idev = container_of(head, struct in_device, rcu_head);
269         in_dev_put(idev);
270 }
271
272 static void inetdev_destroy(struct in_device *in_dev)
273 {
274         struct in_ifaddr *ifa;
275         struct net_device *dev;
276
277         ASSERT_RTNL();
278
279         dev = in_dev->dev;
280
281         in_dev->dead = 1;
282
283         ip_mc_destroy_dev(in_dev);
284
285         while ((ifa = in_dev->ifa_list) != NULL) {
286                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
287                 inet_free_ifa(ifa);
288         }
289
290         RCU_INIT_POINTER(dev->ip_ptr, NULL);
291
292         devinet_sysctl_unregister(in_dev);
293         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
294         arp_ifdown(dev);
295
296         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
297 }
298
299 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
300 {
301         rcu_read_lock();
302         for_primary_ifa(in_dev) {
303                 if (inet_ifa_match(a, ifa)) {
304                         if (!b || inet_ifa_match(b, ifa)) {
305                                 rcu_read_unlock();
306                                 return 1;
307                         }
308                 }
309         } endfor_ifa(in_dev);
310         rcu_read_unlock();
311         return 0;
312 }
313
314 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
315                          int destroy, struct nlmsghdr *nlh, u32 portid)
316 {
317         struct in_ifaddr *promote = NULL;
318         struct in_ifaddr *ifa, *ifa1 = *ifap;
319         struct in_ifaddr *last_prim = in_dev->ifa_list;
320         struct in_ifaddr *prev_prom = NULL;
321         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
322
323         ASSERT_RTNL();
324
325         /* 1. Deleting primary ifaddr forces deletion all secondaries
326          * unless alias promotion is set
327          **/
328
329         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
330                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
331
332                 while ((ifa = *ifap1) != NULL) {
333                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
334                             ifa1->ifa_scope <= ifa->ifa_scope)
335                                 last_prim = ifa;
336
337                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
338                             ifa1->ifa_mask != ifa->ifa_mask ||
339                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
340                                 ifap1 = &ifa->ifa_next;
341                                 prev_prom = ifa;
342                                 continue;
343                         }
344
345                         if (!do_promote) {
346                                 inet_hash_remove(ifa);
347                                 *ifap1 = ifa->ifa_next;
348
349                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
350                                 blocking_notifier_call_chain(&inetaddr_chain,
351                                                 NETDEV_DOWN, ifa);
352                                 inet_free_ifa(ifa);
353                         } else {
354                                 promote = ifa;
355                                 break;
356                         }
357                 }
358         }
359
360         /* On promotion all secondaries from subnet are changing
361          * the primary IP, we must remove all their routes silently
362          * and later to add them back with new prefsrc. Do this
363          * while all addresses are on the device list.
364          */
365         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
366                 if (ifa1->ifa_mask == ifa->ifa_mask &&
367                     inet_ifa_match(ifa1->ifa_address, ifa))
368                         fib_del_ifaddr(ifa, ifa1);
369         }
370
371         /* 2. Unlink it */
372
373         *ifap = ifa1->ifa_next;
374         inet_hash_remove(ifa1);
375
376         /* 3. Announce address deletion */
377
378         /* Send message first, then call notifier.
379            At first sight, FIB update triggered by notifier
380            will refer to already deleted ifaddr, that could confuse
381            netlink listeners. It is not true: look, gated sees
382            that route deleted and if it still thinks that ifaddr
383            is valid, it will try to restore deleted routes... Grr.
384            So that, this order is correct.
385          */
386         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
387         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
388
389         if (promote) {
390                 struct in_ifaddr *next_sec = promote->ifa_next;
391
392                 if (prev_prom) {
393                         prev_prom->ifa_next = promote->ifa_next;
394                         promote->ifa_next = last_prim->ifa_next;
395                         last_prim->ifa_next = promote;
396                 }
397
398                 promote->ifa_flags &= ~IFA_F_SECONDARY;
399                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
400                 blocking_notifier_call_chain(&inetaddr_chain,
401                                 NETDEV_UP, promote);
402                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
403                         if (ifa1->ifa_mask != ifa->ifa_mask ||
404                             !inet_ifa_match(ifa1->ifa_address, ifa))
405                                         continue;
406                         fib_add_ifaddr(ifa);
407                 }
408
409         }
410         if (destroy)
411                 inet_free_ifa(ifa1);
412 }
413
414 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
415                          int destroy)
416 {
417         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
418 }
419
420 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
421                              u32 portid)
422 {
423         struct in_device *in_dev = ifa->ifa_dev;
424         struct in_ifaddr *ifa1, **ifap, **last_primary;
425
426         ASSERT_RTNL();
427
428         if (!ifa->ifa_local) {
429                 inet_free_ifa(ifa);
430                 return 0;
431         }
432
433         ifa->ifa_flags &= ~IFA_F_SECONDARY;
434         last_primary = &in_dev->ifa_list;
435
436         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
437              ifap = &ifa1->ifa_next) {
438                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
439                     ifa->ifa_scope <= ifa1->ifa_scope)
440                         last_primary = &ifa1->ifa_next;
441                 if (ifa1->ifa_mask == ifa->ifa_mask &&
442                     inet_ifa_match(ifa1->ifa_address, ifa)) {
443                         if (ifa1->ifa_local == ifa->ifa_local) {
444                                 inet_free_ifa(ifa);
445                                 return -EEXIST;
446                         }
447                         if (ifa1->ifa_scope != ifa->ifa_scope) {
448                                 inet_free_ifa(ifa);
449                                 return -EINVAL;
450                         }
451                         ifa->ifa_flags |= IFA_F_SECONDARY;
452                 }
453         }
454
455         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
456                 net_srandom(ifa->ifa_local);
457                 ifap = last_primary;
458         }
459
460         ifa->ifa_next = *ifap;
461         *ifap = ifa;
462
463         inet_hash_insert(dev_net(in_dev->dev), ifa);
464
465         /* Send message first, then call notifier.
466            Notifier will trigger FIB update, so that
467            listeners of netlink will know about new ifaddr */
468         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
469         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
470
471         return 0;
472 }
473
474 static int inet_insert_ifa(struct in_ifaddr *ifa)
475 {
476         return __inet_insert_ifa(ifa, NULL, 0);
477 }
478
479 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
480 {
481         struct in_device *in_dev = __in_dev_get_rtnl(dev);
482
483         ASSERT_RTNL();
484
485         if (!in_dev) {
486                 inet_free_ifa(ifa);
487                 return -ENOBUFS;
488         }
489         ipv4_devconf_setall(in_dev);
490         if (ifa->ifa_dev != in_dev) {
491                 WARN_ON(ifa->ifa_dev);
492                 in_dev_hold(in_dev);
493                 ifa->ifa_dev = in_dev;
494         }
495         if (ipv4_is_loopback(ifa->ifa_local))
496                 ifa->ifa_scope = RT_SCOPE_HOST;
497         return inet_insert_ifa(ifa);
498 }
499
500 /* Caller must hold RCU or RTNL :
501  * We dont take a reference on found in_device
502  */
503 struct in_device *inetdev_by_index(struct net *net, int ifindex)
504 {
505         struct net_device *dev;
506         struct in_device *in_dev = NULL;
507
508         rcu_read_lock();
509         dev = dev_get_by_index_rcu(net, ifindex);
510         if (dev)
511                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
512         rcu_read_unlock();
513         return in_dev;
514 }
515 EXPORT_SYMBOL(inetdev_by_index);
516
517 /* Called only from RTNL semaphored context. No locks. */
518
519 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
520                                     __be32 mask)
521 {
522         ASSERT_RTNL();
523
524         for_primary_ifa(in_dev) {
525                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
526                         return ifa;
527         } endfor_ifa(in_dev);
528         return NULL;
529 }
530
531 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
532 {
533         struct net *net = sock_net(skb->sk);
534         struct nlattr *tb[IFA_MAX+1];
535         struct in_device *in_dev;
536         struct ifaddrmsg *ifm;
537         struct in_ifaddr *ifa, **ifap;
538         int err = -EINVAL;
539
540         ASSERT_RTNL();
541
542         if (!capable(CAP_NET_ADMIN))
543                 return -EPERM;
544
545         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
546         if (err < 0)
547                 goto errout;
548
549         ifm = nlmsg_data(nlh);
550         in_dev = inetdev_by_index(net, ifm->ifa_index);
551         if (in_dev == NULL) {
552                 err = -ENODEV;
553                 goto errout;
554         }
555
556         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
557              ifap = &ifa->ifa_next) {
558                 if (tb[IFA_LOCAL] &&
559                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
560                         continue;
561
562                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
563                         continue;
564
565                 if (tb[IFA_ADDRESS] &&
566                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
567                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
568                         continue;
569
570                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
571                 return 0;
572         }
573
574         err = -EADDRNOTAVAIL;
575 errout:
576         return err;
577 }
578
579 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
580 {
581         struct nlattr *tb[IFA_MAX+1];
582         struct in_ifaddr *ifa;
583         struct ifaddrmsg *ifm;
584         struct net_device *dev;
585         struct in_device *in_dev;
586         int err;
587
588         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
589         if (err < 0)
590                 goto errout;
591
592         ifm = nlmsg_data(nlh);
593         err = -EINVAL;
594         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
595                 goto errout;
596
597         dev = __dev_get_by_index(net, ifm->ifa_index);
598         err = -ENODEV;
599         if (dev == NULL)
600                 goto errout;
601
602         in_dev = __in_dev_get_rtnl(dev);
603         err = -ENOBUFS;
604         if (in_dev == NULL)
605                 goto errout;
606
607         ifa = inet_alloc_ifa();
608         if (ifa == NULL)
609                 /*
610                  * A potential indev allocation can be left alive, it stays
611                  * assigned to its device and is destroy with it.
612                  */
613                 goto errout;
614
615         ipv4_devconf_setall(in_dev);
616         in_dev_hold(in_dev);
617
618         if (tb[IFA_ADDRESS] == NULL)
619                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
620
621         INIT_HLIST_NODE(&ifa->hash);
622         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
623         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
624         ifa->ifa_flags = ifm->ifa_flags;
625         ifa->ifa_scope = ifm->ifa_scope;
626         ifa->ifa_dev = in_dev;
627
628         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
629         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
630
631         if (tb[IFA_BROADCAST])
632                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
633
634         if (tb[IFA_LABEL])
635                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
636         else
637                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
638
639         return ifa;
640
641 errout:
642         return ERR_PTR(err);
643 }
644
645 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
646 {
647         struct net *net = sock_net(skb->sk);
648         struct in_ifaddr *ifa;
649
650         ASSERT_RTNL();
651
652         if (!capable(CAP_NET_ADMIN))
653                 return -EPERM;
654
655         ifa = rtm_to_ifaddr(net, nlh);
656         if (IS_ERR(ifa))
657                 return PTR_ERR(ifa);
658
659         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
660 }
661
662 /*
663  *      Determine a default network mask, based on the IP address.
664  */
665
666 static int inet_abc_len(__be32 addr)
667 {
668         int rc = -1;    /* Something else, probably a multicast. */
669
670         if (ipv4_is_zeronet(addr))
671                 rc = 0;
672         else {
673                 __u32 haddr = ntohl(addr);
674
675                 if (IN_CLASSA(haddr))
676                         rc = 8;
677                 else if (IN_CLASSB(haddr))
678                         rc = 16;
679                 else if (IN_CLASSC(haddr))
680                         rc = 24;
681         }
682
683         return rc;
684 }
685
686
687 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
688 {
689         struct ifreq ifr;
690         struct sockaddr_in sin_orig;
691         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
692         struct in_device *in_dev;
693         struct in_ifaddr **ifap = NULL;
694         struct in_ifaddr *ifa = NULL;
695         struct net_device *dev;
696         char *colon;
697         int ret = -EFAULT;
698         int tryaddrmatch = 0;
699
700         /*
701          *      Fetch the caller's info block into kernel space
702          */
703
704         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
705                 goto out;
706         ifr.ifr_name[IFNAMSIZ - 1] = 0;
707
708         /* save original address for comparison */
709         memcpy(&sin_orig, sin, sizeof(*sin));
710
711         colon = strchr(ifr.ifr_name, ':');
712         if (colon)
713                 *colon = 0;
714
715         dev_load(net, ifr.ifr_name);
716
717         switch (cmd) {
718         case SIOCGIFADDR:       /* Get interface address */
719         case SIOCGIFBRDADDR:    /* Get the broadcast address */
720         case SIOCGIFDSTADDR:    /* Get the destination address */
721         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
722                 /* Note that these ioctls will not sleep,
723                    so that we do not impose a lock.
724                    One day we will be forced to put shlock here (I mean SMP)
725                  */
726                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
727                 memset(sin, 0, sizeof(*sin));
728                 sin->sin_family = AF_INET;
729                 break;
730
731         case SIOCSIFFLAGS:
732                 ret = -EPERM;
733                 if (!capable(CAP_NET_ADMIN))
734                         goto out;
735                 break;
736         case SIOCSIFADDR:       /* Set interface address (and family) */
737         case SIOCSIFBRDADDR:    /* Set the broadcast address */
738         case SIOCSIFDSTADDR:    /* Set the destination address */
739         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
740                 ret = -EPERM;
741                 if (!capable(CAP_NET_ADMIN))
742                         goto out;
743                 ret = -EINVAL;
744                 if (sin->sin_family != AF_INET)
745                         goto out;
746                 break;
747         default:
748                 ret = -EINVAL;
749                 goto out;
750         }
751
752         rtnl_lock();
753
754         ret = -ENODEV;
755         dev = __dev_get_by_name(net, ifr.ifr_name);
756         if (!dev)
757                 goto done;
758
759         if (colon)
760                 *colon = ':';
761
762         in_dev = __in_dev_get_rtnl(dev);
763         if (in_dev) {
764                 if (tryaddrmatch) {
765                         /* Matthias Andree */
766                         /* compare label and address (4.4BSD style) */
767                         /* note: we only do this for a limited set of ioctls
768                            and only if the original address family was AF_INET.
769                            This is checked above. */
770                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
771                              ifap = &ifa->ifa_next) {
772                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
773                                     sin_orig.sin_addr.s_addr ==
774                                                         ifa->ifa_local) {
775                                         break; /* found */
776                                 }
777                         }
778                 }
779                 /* we didn't get a match, maybe the application is
780                    4.3BSD-style and passed in junk so we fall back to
781                    comparing just the label */
782                 if (!ifa) {
783                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
784                              ifap = &ifa->ifa_next)
785                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
786                                         break;
787                 }
788         }
789
790         ret = -EADDRNOTAVAIL;
791         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
792                 goto done;
793
794         switch (cmd) {
795         case SIOCGIFADDR:       /* Get interface address */
796                 sin->sin_addr.s_addr = ifa->ifa_local;
797                 goto rarok;
798
799         case SIOCGIFBRDADDR:    /* Get the broadcast address */
800                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
801                 goto rarok;
802
803         case SIOCGIFDSTADDR:    /* Get the destination address */
804                 sin->sin_addr.s_addr = ifa->ifa_address;
805                 goto rarok;
806
807         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
808                 sin->sin_addr.s_addr = ifa->ifa_mask;
809                 goto rarok;
810
811         case SIOCSIFFLAGS:
812                 if (colon) {
813                         ret = -EADDRNOTAVAIL;
814                         if (!ifa)
815                                 break;
816                         ret = 0;
817                         if (!(ifr.ifr_flags & IFF_UP))
818                                 inet_del_ifa(in_dev, ifap, 1);
819                         break;
820                 }
821                 ret = dev_change_flags(dev, ifr.ifr_flags);
822                 break;
823
824         case SIOCSIFADDR:       /* Set interface address (and family) */
825                 ret = -EINVAL;
826                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
827                         break;
828
829                 if (!ifa) {
830                         ret = -ENOBUFS;
831                         ifa = inet_alloc_ifa();
832                         INIT_HLIST_NODE(&ifa->hash);
833                         if (!ifa)
834                                 break;
835                         if (colon)
836                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
837                         else
838                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
839                 } else {
840                         ret = 0;
841                         if (ifa->ifa_local == sin->sin_addr.s_addr)
842                                 break;
843                         inet_del_ifa(in_dev, ifap, 0);
844                         ifa->ifa_broadcast = 0;
845                         ifa->ifa_scope = 0;
846                 }
847
848                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
849
850                 if (!(dev->flags & IFF_POINTOPOINT)) {
851                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
852                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
853                         if ((dev->flags & IFF_BROADCAST) &&
854                             ifa->ifa_prefixlen < 31)
855                                 ifa->ifa_broadcast = ifa->ifa_address |
856                                                      ~ifa->ifa_mask;
857                 } else {
858                         ifa->ifa_prefixlen = 32;
859                         ifa->ifa_mask = inet_make_mask(32);
860                 }
861                 ret = inet_set_ifa(dev, ifa);
862                 break;
863
864         case SIOCSIFBRDADDR:    /* Set the broadcast address */
865                 ret = 0;
866                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
867                         inet_del_ifa(in_dev, ifap, 0);
868                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
869                         inet_insert_ifa(ifa);
870                 }
871                 break;
872
873         case SIOCSIFDSTADDR:    /* Set the destination address */
874                 ret = 0;
875                 if (ifa->ifa_address == sin->sin_addr.s_addr)
876                         break;
877                 ret = -EINVAL;
878                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
879                         break;
880                 ret = 0;
881                 inet_del_ifa(in_dev, ifap, 0);
882                 ifa->ifa_address = sin->sin_addr.s_addr;
883                 inet_insert_ifa(ifa);
884                 break;
885
886         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
887
888                 /*
889                  *      The mask we set must be legal.
890                  */
891                 ret = -EINVAL;
892                 if (bad_mask(sin->sin_addr.s_addr, 0))
893                         break;
894                 ret = 0;
895                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
896                         __be32 old_mask = ifa->ifa_mask;
897                         inet_del_ifa(in_dev, ifap, 0);
898                         ifa->ifa_mask = sin->sin_addr.s_addr;
899                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
900
901                         /* See if current broadcast address matches
902                          * with current netmask, then recalculate
903                          * the broadcast address. Otherwise it's a
904                          * funny address, so don't touch it since
905                          * the user seems to know what (s)he's doing...
906                          */
907                         if ((dev->flags & IFF_BROADCAST) &&
908                             (ifa->ifa_prefixlen < 31) &&
909                             (ifa->ifa_broadcast ==
910                              (ifa->ifa_local|~old_mask))) {
911                                 ifa->ifa_broadcast = (ifa->ifa_local |
912                                                       ~sin->sin_addr.s_addr);
913                         }
914                         inet_insert_ifa(ifa);
915                 }
916                 break;
917         }
918 done:
919         rtnl_unlock();
920 out:
921         return ret;
922 rarok:
923         rtnl_unlock();
924         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
925         goto out;
926 }
927
928 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
929 {
930         struct in_device *in_dev = __in_dev_get_rtnl(dev);
931         struct in_ifaddr *ifa;
932         struct ifreq ifr;
933         int done = 0;
934
935         if (!in_dev)
936                 goto out;
937
938         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
939                 if (!buf) {
940                         done += sizeof(ifr);
941                         continue;
942                 }
943                 if (len < (int) sizeof(ifr))
944                         break;
945                 memset(&ifr, 0, sizeof(struct ifreq));
946                 if (ifa->ifa_label)
947                         strcpy(ifr.ifr_name, ifa->ifa_label);
948                 else
949                         strcpy(ifr.ifr_name, dev->name);
950
951                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
952                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
953                                                                 ifa->ifa_local;
954
955                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
956                         done = -EFAULT;
957                         break;
958                 }
959                 buf  += sizeof(struct ifreq);
960                 len  -= sizeof(struct ifreq);
961                 done += sizeof(struct ifreq);
962         }
963 out:
964         return done;
965 }
966
967 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
968 {
969         __be32 addr = 0;
970         struct in_device *in_dev;
971         struct net *net = dev_net(dev);
972
973         rcu_read_lock();
974         in_dev = __in_dev_get_rcu(dev);
975         if (!in_dev)
976                 goto no_in_dev;
977
978         for_primary_ifa(in_dev) {
979                 if (ifa->ifa_scope > scope)
980                         continue;
981                 if (!dst || inet_ifa_match(dst, ifa)) {
982                         addr = ifa->ifa_local;
983                         break;
984                 }
985                 if (!addr)
986                         addr = ifa->ifa_local;
987         } endfor_ifa(in_dev);
988
989         if (addr)
990                 goto out_unlock;
991 no_in_dev:
992
993         /* Not loopback addresses on loopback should be preferred
994            in this case. It is importnat that lo is the first interface
995            in dev_base list.
996          */
997         for_each_netdev_rcu(net, dev) {
998                 in_dev = __in_dev_get_rcu(dev);
999                 if (!in_dev)
1000                         continue;
1001
1002                 for_primary_ifa(in_dev) {
1003                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1004                             ifa->ifa_scope <= scope) {
1005                                 addr = ifa->ifa_local;
1006                                 goto out_unlock;
1007                         }
1008                 } endfor_ifa(in_dev);
1009         }
1010 out_unlock:
1011         rcu_read_unlock();
1012         return addr;
1013 }
1014 EXPORT_SYMBOL(inet_select_addr);
1015
1016 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1017                               __be32 local, int scope)
1018 {
1019         int same = 0;
1020         __be32 addr = 0;
1021
1022         for_ifa(in_dev) {
1023                 if (!addr &&
1024                     (local == ifa->ifa_local || !local) &&
1025                     ifa->ifa_scope <= scope) {
1026                         addr = ifa->ifa_local;
1027                         if (same)
1028                                 break;
1029                 }
1030                 if (!same) {
1031                         same = (!local || inet_ifa_match(local, ifa)) &&
1032                                 (!dst || inet_ifa_match(dst, ifa));
1033                         if (same && addr) {
1034                                 if (local || !dst)
1035                                         break;
1036                                 /* Is the selected addr into dst subnet? */
1037                                 if (inet_ifa_match(addr, ifa))
1038                                         break;
1039                                 /* No, then can we use new local src? */
1040                                 if (ifa->ifa_scope <= scope) {
1041                                         addr = ifa->ifa_local;
1042                                         break;
1043                                 }
1044                                 /* search for large dst subnet for addr */
1045                                 same = 0;
1046                         }
1047                 }
1048         } endfor_ifa(in_dev);
1049
1050         return same ? addr : 0;
1051 }
1052
1053 /*
1054  * Confirm that local IP address exists using wildcards:
1055  * - in_dev: only on this interface, 0=any interface
1056  * - dst: only in the same subnet as dst, 0=any dst
1057  * - local: address, 0=autoselect the local address
1058  * - scope: maximum allowed scope value for the local address
1059  */
1060 __be32 inet_confirm_addr(struct in_device *in_dev,
1061                          __be32 dst, __be32 local, int scope)
1062 {
1063         __be32 addr = 0;
1064         struct net_device *dev;
1065         struct net *net;
1066
1067         if (scope != RT_SCOPE_LINK)
1068                 return confirm_addr_indev(in_dev, dst, local, scope);
1069
1070         net = dev_net(in_dev->dev);
1071         rcu_read_lock();
1072         for_each_netdev_rcu(net, dev) {
1073                 in_dev = __in_dev_get_rcu(dev);
1074                 if (in_dev) {
1075                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1076                         if (addr)
1077                                 break;
1078                 }
1079         }
1080         rcu_read_unlock();
1081
1082         return addr;
1083 }
1084 EXPORT_SYMBOL(inet_confirm_addr);
1085
1086 /*
1087  *      Device notifier
1088  */
1089
1090 int register_inetaddr_notifier(struct notifier_block *nb)
1091 {
1092         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1093 }
1094 EXPORT_SYMBOL(register_inetaddr_notifier);
1095
1096 int unregister_inetaddr_notifier(struct notifier_block *nb)
1097 {
1098         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1099 }
1100 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1101
1102 /* Rename ifa_labels for a device name change. Make some effort to preserve
1103  * existing alias numbering and to create unique labels if possible.
1104 */
1105 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1106 {
1107         struct in_ifaddr *ifa;
1108         int named = 0;
1109
1110         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1111                 char old[IFNAMSIZ], *dot;
1112
1113                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1114                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1115                 if (named++ == 0)
1116                         goto skip;
1117                 dot = strchr(old, ':');
1118                 if (dot == NULL) {
1119                         sprintf(old, ":%d", named);
1120                         dot = old;
1121                 }
1122                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1123                         strcat(ifa->ifa_label, dot);
1124                 else
1125                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1126 skip:
1127                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1128         }
1129 }
1130
1131 static bool inetdev_valid_mtu(unsigned int mtu)
1132 {
1133         return mtu >= 68;
1134 }
1135
1136 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1137                                         struct in_device *in_dev)
1138
1139 {
1140         struct in_ifaddr *ifa;
1141
1142         for (ifa = in_dev->ifa_list; ifa;
1143              ifa = ifa->ifa_next) {
1144                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1145                          ifa->ifa_local, dev,
1146                          ifa->ifa_local, NULL,
1147                          dev->dev_addr, NULL);
1148         }
1149 }
1150
1151 /* Called only under RTNL semaphore */
1152
1153 static int inetdev_event(struct notifier_block *this, unsigned long event,
1154                          void *ptr)
1155 {
1156         struct net_device *dev = ptr;
1157         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1158
1159         ASSERT_RTNL();
1160
1161         if (!in_dev) {
1162                 if (event == NETDEV_REGISTER) {
1163                         in_dev = inetdev_init(dev);
1164                         if (!in_dev)
1165                                 return notifier_from_errno(-ENOMEM);
1166                         if (dev->flags & IFF_LOOPBACK) {
1167                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1168                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1169                         }
1170                 } else if (event == NETDEV_CHANGEMTU) {
1171                         /* Re-enabling IP */
1172                         if (inetdev_valid_mtu(dev->mtu))
1173                                 in_dev = inetdev_init(dev);
1174                 }
1175                 goto out;
1176         }
1177
1178         switch (event) {
1179         case NETDEV_REGISTER:
1180                 pr_debug("%s: bug\n", __func__);
1181                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1182                 break;
1183         case NETDEV_UP:
1184                 if (!inetdev_valid_mtu(dev->mtu))
1185                         break;
1186                 if (dev->flags & IFF_LOOPBACK) {
1187                         struct in_ifaddr *ifa = inet_alloc_ifa();
1188
1189                         if (ifa) {
1190                                 INIT_HLIST_NODE(&ifa->hash);
1191                                 ifa->ifa_local =
1192                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1193                                 ifa->ifa_prefixlen = 8;
1194                                 ifa->ifa_mask = inet_make_mask(8);
1195                                 in_dev_hold(in_dev);
1196                                 ifa->ifa_dev = in_dev;
1197                                 ifa->ifa_scope = RT_SCOPE_HOST;
1198                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1199                                 inet_insert_ifa(ifa);
1200                         }
1201                 }
1202                 ip_mc_up(in_dev);
1203                 /* fall through */
1204         case NETDEV_CHANGEADDR:
1205                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1206                         break;
1207                 /* fall through */
1208         case NETDEV_NOTIFY_PEERS:
1209                 /* Send gratuitous ARP to notify of link change */
1210                 inetdev_send_gratuitous_arp(dev, in_dev);
1211                 break;
1212         case NETDEV_DOWN:
1213                 ip_mc_down(in_dev);
1214                 break;
1215         case NETDEV_PRE_TYPE_CHANGE:
1216                 ip_mc_unmap(in_dev);
1217                 break;
1218         case NETDEV_POST_TYPE_CHANGE:
1219                 ip_mc_remap(in_dev);
1220                 break;
1221         case NETDEV_CHANGEMTU:
1222                 if (inetdev_valid_mtu(dev->mtu))
1223                         break;
1224                 /* disable IP when MTU is not enough */
1225         case NETDEV_UNREGISTER:
1226                 inetdev_destroy(in_dev);
1227                 break;
1228         case NETDEV_CHANGENAME:
1229                 /* Do not notify about label change, this event is
1230                  * not interesting to applications using netlink.
1231                  */
1232                 inetdev_changename(dev, in_dev);
1233
1234                 devinet_sysctl_unregister(in_dev);
1235                 devinet_sysctl_register(in_dev);
1236                 break;
1237         }
1238 out:
1239         return NOTIFY_DONE;
1240 }
1241
1242 static struct notifier_block ip_netdev_notifier = {
1243         .notifier_call = inetdev_event,
1244 };
1245
1246 static size_t inet_nlmsg_size(void)
1247 {
1248         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1249                + nla_total_size(4) /* IFA_ADDRESS */
1250                + nla_total_size(4) /* IFA_LOCAL */
1251                + nla_total_size(4) /* IFA_BROADCAST */
1252                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1253 }
1254
1255 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1256                             u32 portid, u32 seq, int event, unsigned int flags)
1257 {
1258         struct ifaddrmsg *ifm;
1259         struct nlmsghdr  *nlh;
1260
1261         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1262         if (nlh == NULL)
1263                 return -EMSGSIZE;
1264
1265         ifm = nlmsg_data(nlh);
1266         ifm->ifa_family = AF_INET;
1267         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1268         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1269         ifm->ifa_scope = ifa->ifa_scope;
1270         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1271
1272         if ((ifa->ifa_address &&
1273              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1274             (ifa->ifa_local &&
1275              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1276             (ifa->ifa_broadcast &&
1277              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1278             (ifa->ifa_label[0] &&
1279              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1280                 goto nla_put_failure;
1281
1282         return nlmsg_end(skb, nlh);
1283
1284 nla_put_failure:
1285         nlmsg_cancel(skb, nlh);
1286         return -EMSGSIZE;
1287 }
1288
1289 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1290 {
1291         struct net *net = sock_net(skb->sk);
1292         int h, s_h;
1293         int idx, s_idx;
1294         int ip_idx, s_ip_idx;
1295         struct net_device *dev;
1296         struct in_device *in_dev;
1297         struct in_ifaddr *ifa;
1298         struct hlist_head *head;
1299         struct hlist_node *node;
1300
1301         s_h = cb->args[0];
1302         s_idx = idx = cb->args[1];
1303         s_ip_idx = ip_idx = cb->args[2];
1304
1305         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1306                 idx = 0;
1307                 head = &net->dev_index_head[h];
1308                 rcu_read_lock();
1309                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1310                         if (idx < s_idx)
1311                                 goto cont;
1312                         if (h > s_h || idx > s_idx)
1313                                 s_ip_idx = 0;
1314                         in_dev = __in_dev_get_rcu(dev);
1315                         if (!in_dev)
1316                                 goto cont;
1317
1318                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1319                              ifa = ifa->ifa_next, ip_idx++) {
1320                                 if (ip_idx < s_ip_idx)
1321                                         continue;
1322                                 if (inet_fill_ifaddr(skb, ifa,
1323                                              NETLINK_CB(cb->skb).portid,
1324                                              cb->nlh->nlmsg_seq,
1325                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1326                                         rcu_read_unlock();
1327                                         goto done;
1328                                 }
1329                         }
1330 cont:
1331                         idx++;
1332                 }
1333                 rcu_read_unlock();
1334         }
1335
1336 done:
1337         cb->args[0] = h;
1338         cb->args[1] = idx;
1339         cb->args[2] = ip_idx;
1340
1341         return skb->len;
1342 }
1343
1344 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1345                       u32 portid)
1346 {
1347         struct sk_buff *skb;
1348         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1349         int err = -ENOBUFS;
1350         struct net *net;
1351
1352         net = dev_net(ifa->ifa_dev->dev);
1353         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1354         if (skb == NULL)
1355                 goto errout;
1356
1357         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1358         if (err < 0) {
1359                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1360                 WARN_ON(err == -EMSGSIZE);
1361                 kfree_skb(skb);
1362                 goto errout;
1363         }
1364         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1365         return;
1366 errout:
1367         if (err < 0)
1368                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1369 }
1370
1371 static size_t inet_get_link_af_size(const struct net_device *dev)
1372 {
1373         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1374
1375         if (!in_dev)
1376                 return 0;
1377
1378         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1379 }
1380
1381 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1382 {
1383         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1384         struct nlattr *nla;
1385         int i;
1386
1387         if (!in_dev)
1388                 return -ENODATA;
1389
1390         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1391         if (nla == NULL)
1392                 return -EMSGSIZE;
1393
1394         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1395                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1396
1397         return 0;
1398 }
1399
1400 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1401         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1402 };
1403
1404 static int inet_validate_link_af(const struct net_device *dev,
1405                                  const struct nlattr *nla)
1406 {
1407         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1408         int err, rem;
1409
1410         if (dev && !__in_dev_get_rtnl(dev))
1411                 return -EAFNOSUPPORT;
1412
1413         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1414         if (err < 0)
1415                 return err;
1416
1417         if (tb[IFLA_INET_CONF]) {
1418                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1419                         int cfgid = nla_type(a);
1420
1421                         if (nla_len(a) < 4)
1422                                 return -EINVAL;
1423
1424                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1425                                 return -EINVAL;
1426                 }
1427         }
1428
1429         return 0;
1430 }
1431
1432 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1433 {
1434         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1435         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1436         int rem;
1437
1438         if (!in_dev)
1439                 return -EAFNOSUPPORT;
1440
1441         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1442                 BUG();
1443
1444         if (tb[IFLA_INET_CONF]) {
1445                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1446                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1447         }
1448
1449         return 0;
1450 }
1451
1452 static int inet_netconf_msgsize_devconf(int type)
1453 {
1454         int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1455                    + nla_total_size(4); /* NETCONFA_IFINDEX */
1456
1457         /* type -1 is used for ALL */
1458         if (type == -1 || type == NETCONFA_FORWARDING)
1459                 size += nla_total_size(4);
1460         if (type == -1 || type == NETCONFA_RP_FILTER)
1461                 size += nla_total_size(4);
1462
1463         return size;
1464 }
1465
1466 static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
1467                                      struct ipv4_devconf *devconf, u32 portid,
1468                                      u32 seq, int event, unsigned int flags,
1469                                      int type)
1470 {
1471         struct nlmsghdr  *nlh;
1472         struct netconfmsg *ncm;
1473
1474         nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1475                         flags);
1476         if (nlh == NULL)
1477                 return -EMSGSIZE;
1478
1479         ncm = nlmsg_data(nlh);
1480         ncm->ncm_family = AF_INET;
1481
1482         if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
1483                 goto nla_put_failure;
1484
1485         /* type -1 is used for ALL */
1486         if ((type == -1 || type == NETCONFA_FORWARDING) &&
1487             nla_put_s32(skb, NETCONFA_FORWARDING,
1488                         IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
1489                 goto nla_put_failure;
1490         if ((type == -1 || type == NETCONFA_RP_FILTER) &&
1491             nla_put_s32(skb, NETCONFA_RP_FILTER,
1492                         IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
1493                 goto nla_put_failure;
1494
1495         return nlmsg_end(skb, nlh);
1496
1497 nla_put_failure:
1498         nlmsg_cancel(skb, nlh);
1499         return -EMSGSIZE;
1500 }
1501
1502 static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex,
1503                                         struct ipv4_devconf *devconf)
1504 {
1505         struct sk_buff *skb;
1506         int err = -ENOBUFS;
1507
1508         skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC);
1509         if (skb == NULL)
1510                 goto errout;
1511
1512         err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
1513                                         RTM_NEWNETCONF, 0, type);
1514         if (err < 0) {
1515                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1516                 WARN_ON(err == -EMSGSIZE);
1517                 kfree_skb(skb);
1518                 goto errout;
1519         }
1520         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC);
1521         return;
1522 errout:
1523         if (err < 0)
1524                 rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
1525 }
1526
1527 static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1528         [NETCONFA_IFINDEX]      = { .len = sizeof(int) },
1529         [NETCONFA_FORWARDING]   = { .len = sizeof(int) },
1530         [NETCONFA_RP_FILTER]    = { .len = sizeof(int) },
1531 };
1532
1533 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1534                                     struct nlmsghdr *nlh,
1535                                     void *arg)
1536 {
1537         struct net *net = sock_net(in_skb->sk);
1538         struct nlattr *tb[NETCONFA_MAX+1];
1539         struct netconfmsg *ncm;
1540         struct sk_buff *skb;
1541         struct ipv4_devconf *devconf;
1542         struct in_device *in_dev;
1543         struct net_device *dev;
1544         int ifindex;
1545         int err;
1546
1547         err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
1548                           devconf_ipv4_policy);
1549         if (err < 0)
1550                 goto errout;
1551
1552         err = EINVAL;
1553         if (!tb[NETCONFA_IFINDEX])
1554                 goto errout;
1555
1556         ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1557         switch (ifindex) {
1558         case NETCONFA_IFINDEX_ALL:
1559                 devconf = net->ipv4.devconf_all;
1560                 break;
1561         case NETCONFA_IFINDEX_DEFAULT:
1562                 devconf = net->ipv4.devconf_dflt;
1563                 break;
1564         default:
1565                 dev = __dev_get_by_index(net, ifindex);
1566                 if (dev == NULL)
1567                         goto errout;
1568                 in_dev = __in_dev_get_rtnl(dev);
1569                 if (in_dev == NULL)
1570                         goto errout;
1571                 devconf = &in_dev->cnf;
1572                 break;
1573         }
1574
1575         err = -ENOBUFS;
1576         skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC);
1577         if (skb == NULL)
1578                 goto errout;
1579
1580         err = inet_netconf_fill_devconf(skb, ifindex, devconf,
1581                                         NETLINK_CB(in_skb).portid,
1582                                         nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1583                                         -1);
1584         if (err < 0) {
1585                 /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
1586                 WARN_ON(err == -EMSGSIZE);
1587                 kfree_skb(skb);
1588                 goto errout;
1589         }
1590         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1591 errout:
1592         return err;
1593 }
1594
1595 #ifdef CONFIG_SYSCTL
1596
1597 static void devinet_copy_dflt_conf(struct net *net, int i)
1598 {
1599         struct net_device *dev;
1600
1601         rcu_read_lock();
1602         for_each_netdev_rcu(net, dev) {
1603                 struct in_device *in_dev;
1604
1605                 in_dev = __in_dev_get_rcu(dev);
1606                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1607                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1608         }
1609         rcu_read_unlock();
1610 }
1611
1612 /* called with RTNL locked */
1613 static void inet_forward_change(struct net *net)
1614 {
1615         struct net_device *dev;
1616         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1617
1618         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1619         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1620         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1621                                     NETCONFA_IFINDEX_ALL,
1622                                     net->ipv4.devconf_all);
1623         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1624                                     NETCONFA_IFINDEX_DEFAULT,
1625                                     net->ipv4.devconf_dflt);
1626
1627         for_each_netdev(net, dev) {
1628                 struct in_device *in_dev;
1629                 if (on)
1630                         dev_disable_lro(dev);
1631                 rcu_read_lock();
1632                 in_dev = __in_dev_get_rcu(dev);
1633                 if (in_dev) {
1634                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1635                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1636                                                     dev->ifindex, &in_dev->cnf);
1637                 }
1638                 rcu_read_unlock();
1639         }
1640 }
1641
1642 static int devinet_conf_proc(ctl_table *ctl, int write,
1643                              void __user *buffer,
1644                              size_t *lenp, loff_t *ppos)
1645 {
1646         int old_value = *(int *)ctl->data;
1647         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1648         int new_value = *(int *)ctl->data;
1649
1650         if (write) {
1651                 struct ipv4_devconf *cnf = ctl->extra1;
1652                 struct net *net = ctl->extra2;
1653                 int i = (int *)ctl->data - cnf->data;
1654
1655                 set_bit(i, cnf->state);
1656
1657                 if (cnf == net->ipv4.devconf_dflt)
1658                         devinet_copy_dflt_conf(net, i);
1659                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1660                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1661                         if ((new_value == 0) && (old_value != 0))
1662                                 rt_cache_flush(net);
1663                 if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
1664                     new_value != old_value) {
1665                         int ifindex;
1666
1667                         if (cnf == net->ipv4.devconf_dflt)
1668                                 ifindex = NETCONFA_IFINDEX_DEFAULT;
1669                         else if (cnf == net->ipv4.devconf_all)
1670                                 ifindex = NETCONFA_IFINDEX_ALL;
1671                         else {
1672                                 struct in_device *idev =
1673                                         container_of(cnf, struct in_device,
1674                                                      cnf);
1675                                 ifindex = idev->dev->ifindex;
1676                         }
1677                         inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER,
1678                                                     ifindex, cnf);
1679                 }
1680         }
1681
1682         return ret;
1683 }
1684
1685 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1686                                   void __user *buffer,
1687                                   size_t *lenp, loff_t *ppos)
1688 {
1689         int *valp = ctl->data;
1690         int val = *valp;
1691         loff_t pos = *ppos;
1692         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1693
1694         if (write && *valp != val) {
1695                 struct net *net = ctl->extra2;
1696
1697                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1698                         if (!rtnl_trylock()) {
1699                                 /* Restore the original values before restarting */
1700                                 *valp = val;
1701                                 *ppos = pos;
1702                                 return restart_syscall();
1703                         }
1704                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1705                                 inet_forward_change(net);
1706                         } else {
1707                                 struct ipv4_devconf *cnf = ctl->extra1;
1708                                 struct in_device *idev =
1709                                         container_of(cnf, struct in_device, cnf);
1710                                 if (*valp)
1711                                         dev_disable_lro(idev->dev);
1712                                 inet_netconf_notify_devconf(net,
1713                                                             NETCONFA_FORWARDING,
1714                                                             idev->dev->ifindex,
1715                                                             cnf);
1716                         }
1717                         rtnl_unlock();
1718                         rt_cache_flush(net);
1719                 } else
1720                         inet_netconf_notify_devconf(net, NETCONFA_FORWARDING,
1721                                                     NETCONFA_IFINDEX_DEFAULT,
1722                                                     net->ipv4.devconf_dflt);
1723         }
1724
1725         return ret;
1726 }
1727
1728 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1729                                 void __user *buffer,
1730                                 size_t *lenp, loff_t *ppos)
1731 {
1732         int *valp = ctl->data;
1733         int val = *valp;
1734         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1735         struct net *net = ctl->extra2;
1736
1737         if (write && *valp != val)
1738                 rt_cache_flush(net);
1739
1740         return ret;
1741 }
1742
1743 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1744         { \
1745                 .procname       = name, \
1746                 .data           = ipv4_devconf.data + \
1747                                   IPV4_DEVCONF_ ## attr - 1, \
1748                 .maxlen         = sizeof(int), \
1749                 .mode           = mval, \
1750                 .proc_handler   = proc, \
1751                 .extra1         = &ipv4_devconf, \
1752         }
1753
1754 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1755         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1756
1757 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1758         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1759
1760 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1761         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1762
1763 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1764         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1765
1766 static struct devinet_sysctl_table {
1767         struct ctl_table_header *sysctl_header;
1768         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1769 } devinet_sysctl = {
1770         .devinet_vars = {
1771                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1772                                              devinet_sysctl_forward),
1773                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1774
1775                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1776                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1777                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1778                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1779                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1780                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1781                                         "accept_source_route"),
1782                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1783                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1784                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1785                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1786                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1787                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1788                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1789                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1790                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1791                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1792                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1793                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1794                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1795
1796                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1797                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1798                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1799                                               "force_igmp_version"),
1800                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1801                                               "promote_secondaries"),
1802                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1803                                               "route_localnet"),
1804         },
1805 };
1806
1807 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1808                                         struct ipv4_devconf *p)
1809 {
1810         int i;
1811         struct devinet_sysctl_table *t;
1812         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1813
1814         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1815         if (!t)
1816                 goto out;
1817
1818         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1819                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1820                 t->devinet_vars[i].extra1 = p;
1821                 t->devinet_vars[i].extra2 = net;
1822         }
1823
1824         /* Don't export sysctls to unprivileged users */
1825         if (net->user_ns != &init_user_ns)
1826                 t->devinet_vars[0].procname = NULL;
1827
1828         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1829
1830         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1831         if (!t->sysctl_header)
1832                 goto free;
1833
1834         p->sysctl = t;
1835         return 0;
1836
1837 free:
1838         kfree(t);
1839 out:
1840         return -ENOBUFS;
1841 }
1842
1843 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1844 {
1845         struct devinet_sysctl_table *t = cnf->sysctl;
1846
1847         if (t == NULL)
1848                 return;
1849
1850         cnf->sysctl = NULL;
1851         unregister_net_sysctl_table(t->sysctl_header);
1852         kfree(t);
1853 }
1854
1855 static void devinet_sysctl_register(struct in_device *idev)
1856 {
1857         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1858         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1859                                         &idev->cnf);
1860 }
1861
1862 static void devinet_sysctl_unregister(struct in_device *idev)
1863 {
1864         __devinet_sysctl_unregister(&idev->cnf);
1865         neigh_sysctl_unregister(idev->arp_parms);
1866 }
1867
1868 static struct ctl_table ctl_forward_entry[] = {
1869         {
1870                 .procname       = "ip_forward",
1871                 .data           = &ipv4_devconf.data[
1872                                         IPV4_DEVCONF_FORWARDING - 1],
1873                 .maxlen         = sizeof(int),
1874                 .mode           = 0644,
1875                 .proc_handler   = devinet_sysctl_forward,
1876                 .extra1         = &ipv4_devconf,
1877                 .extra2         = &init_net,
1878         },
1879         { },
1880 };
1881 #endif
1882
1883 static __net_init int devinet_init_net(struct net *net)
1884 {
1885         int err;
1886         struct ipv4_devconf *all, *dflt;
1887 #ifdef CONFIG_SYSCTL
1888         struct ctl_table *tbl = ctl_forward_entry;
1889         struct ctl_table_header *forw_hdr;
1890 #endif
1891
1892         err = -ENOMEM;
1893         all = &ipv4_devconf;
1894         dflt = &ipv4_devconf_dflt;
1895
1896         if (!net_eq(net, &init_net)) {
1897                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1898                 if (all == NULL)
1899                         goto err_alloc_all;
1900
1901                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1902                 if (dflt == NULL)
1903                         goto err_alloc_dflt;
1904
1905 #ifdef CONFIG_SYSCTL
1906                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1907                 if (tbl == NULL)
1908                         goto err_alloc_ctl;
1909
1910                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1911                 tbl[0].extra1 = all;
1912                 tbl[0].extra2 = net;
1913
1914                 /* Don't export sysctls to unprivileged users */
1915                 if (net->user_ns != &init_user_ns)
1916                         tbl[0].procname = NULL;
1917 #endif
1918         }
1919
1920 #ifdef CONFIG_SYSCTL
1921         err = __devinet_sysctl_register(net, "all", all);
1922         if (err < 0)
1923                 goto err_reg_all;
1924
1925         err = __devinet_sysctl_register(net, "default", dflt);
1926         if (err < 0)
1927                 goto err_reg_dflt;
1928
1929         err = -ENOMEM;
1930         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1931         if (forw_hdr == NULL)
1932                 goto err_reg_ctl;
1933         net->ipv4.forw_hdr = forw_hdr;
1934 #endif
1935
1936         net->ipv4.devconf_all = all;
1937         net->ipv4.devconf_dflt = dflt;
1938         return 0;
1939
1940 #ifdef CONFIG_SYSCTL
1941 err_reg_ctl:
1942         __devinet_sysctl_unregister(dflt);
1943 err_reg_dflt:
1944         __devinet_sysctl_unregister(all);
1945 err_reg_all:
1946         if (tbl != ctl_forward_entry)
1947                 kfree(tbl);
1948 err_alloc_ctl:
1949 #endif
1950         if (dflt != &ipv4_devconf_dflt)
1951                 kfree(dflt);
1952 err_alloc_dflt:
1953         if (all != &ipv4_devconf)
1954                 kfree(all);
1955 err_alloc_all:
1956         return err;
1957 }
1958
1959 static __net_exit void devinet_exit_net(struct net *net)
1960 {
1961 #ifdef CONFIG_SYSCTL
1962         struct ctl_table *tbl;
1963
1964         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1965         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1966         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1967         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1968         kfree(tbl);
1969 #endif
1970         kfree(net->ipv4.devconf_dflt);
1971         kfree(net->ipv4.devconf_all);
1972 }
1973
1974 static __net_initdata struct pernet_operations devinet_ops = {
1975         .init = devinet_init_net,
1976         .exit = devinet_exit_net,
1977 };
1978
1979 static struct rtnl_af_ops inet_af_ops = {
1980         .family           = AF_INET,
1981         .fill_link_af     = inet_fill_link_af,
1982         .get_link_af_size = inet_get_link_af_size,
1983         .validate_link_af = inet_validate_link_af,
1984         .set_link_af      = inet_set_link_af,
1985 };
1986
1987 void __init devinet_init(void)
1988 {
1989         int i;
1990
1991         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1992                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1993
1994         register_pernet_subsys(&devinet_ops);
1995
1996         register_gifconf(PF_INET, inet_gifconf);
1997         register_netdevice_notifier(&ip_netdev_notifier);
1998
1999         rtnl_af_register(&inet_af_ops);
2000
2001         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
2002         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2003         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2004         rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2005                       NULL, NULL);
2006 }
2007