]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
Merge tag 'xtensa-next-20121008' of git://github.com/czankel/xtensa-linux
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <linux/bitops.h>
31 #include <linux/capability.h>
32 #include <linux/module.h>
33 #include <linux/types.h>
34 #include <linux/kernel.h>
35 #include <linux/string.h>
36 #include <linux/mm.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <linux/in.h>
40 #include <linux/errno.h>
41 #include <linux/interrupt.h>
42 #include <linux/if_addr.h>
43 #include <linux/if_ether.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/etherdevice.h>
47 #include <linux/skbuff.h>
48 #include <linux/init.h>
49 #include <linux/notifier.h>
50 #include <linux/inetdevice.h>
51 #include <linux/igmp.h>
52 #include <linux/slab.h>
53 #include <linux/hash.h>
54 #ifdef CONFIG_SYSCTL
55 #include <linux/sysctl.h>
56 #endif
57 #include <linux/kmod.h>
58
59 #include <net/arp.h>
60 #include <net/ip.h>
61 #include <net/route.h>
62 #include <net/ip_fib.h>
63 #include <net/rtnetlink.h>
64 #include <net/net_namespace.h>
65
66 #include "fib_lookup.h"
67
68 static struct ipv4_devconf ipv4_devconf = {
69         .data = {
70                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
71                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
74         },
75 };
76
77 static struct ipv4_devconf ipv4_devconf_dflt = {
78         .data = {
79                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
80                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
83                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
84         },
85 };
86
87 #define IPV4_DEVCONF_DFLT(net, attr) \
88         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
89
90 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
91         [IFA_LOCAL]             = { .type = NLA_U32 },
92         [IFA_ADDRESS]           = { .type = NLA_U32 },
93         [IFA_BROADCAST]         = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 #define IN4_ADDR_HSIZE_SHIFT    8
98 #define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
99
100 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
101 static DEFINE_SPINLOCK(inet_addr_hash_lock);
102
103 static u32 inet_addr_hash(struct net *net, __be32 addr)
104 {
105         u32 val = (__force u32) addr ^ net_hash_mix(net);
106
107         return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
108 }
109
110 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
111 {
112         u32 hash = inet_addr_hash(net, ifa->ifa_local);
113
114         spin_lock(&inet_addr_hash_lock);
115         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
116         spin_unlock(&inet_addr_hash_lock);
117 }
118
119 static void inet_hash_remove(struct in_ifaddr *ifa)
120 {
121         spin_lock(&inet_addr_hash_lock);
122         hlist_del_init_rcu(&ifa->hash);
123         spin_unlock(&inet_addr_hash_lock);
124 }
125
126 /**
127  * __ip_dev_find - find the first device with a given source address.
128  * @net: the net namespace
129  * @addr: the source address
130  * @devref: if true, take a reference on the found device
131  *
132  * If a caller uses devref=false, it should be protected by RCU, or RTNL
133  */
134 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
135 {
136         u32 hash = inet_addr_hash(net, addr);
137         struct net_device *result = NULL;
138         struct in_ifaddr *ifa;
139         struct hlist_node *node;
140
141         rcu_read_lock();
142         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
143                 if (ifa->ifa_local == addr) {
144                         struct net_device *dev = ifa->ifa_dev->dev;
145
146                         if (!net_eq(dev_net(dev), net))
147                                 continue;
148                         result = dev;
149                         break;
150                 }
151         }
152         if (!result) {
153                 struct flowi4 fl4 = { .daddr = addr };
154                 struct fib_result res = { 0 };
155                 struct fib_table *local;
156
157                 /* Fallback to FIB local table so that communication
158                  * over loopback subnets work.
159                  */
160                 local = fib_get_table(net, RT_TABLE_LOCAL);
161                 if (local &&
162                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
163                     res.type == RTN_LOCAL)
164                         result = FIB_RES_DEV(res);
165         }
166         if (result && devref)
167                 dev_hold(result);
168         rcu_read_unlock();
169         return result;
170 }
171 EXPORT_SYMBOL(__ip_dev_find);
172
173 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
174
175 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
176 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
177                          int destroy);
178 #ifdef CONFIG_SYSCTL
179 static void devinet_sysctl_register(struct in_device *idev);
180 static void devinet_sysctl_unregister(struct in_device *idev);
181 #else
182 static void devinet_sysctl_register(struct in_device *idev)
183 {
184 }
185 static void devinet_sysctl_unregister(struct in_device *idev)
186 {
187 }
188 #endif
189
190 /* Locks all the inet devices. */
191
192 static struct in_ifaddr *inet_alloc_ifa(void)
193 {
194         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
195 }
196
197 static void inet_rcu_free_ifa(struct rcu_head *head)
198 {
199         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
200         if (ifa->ifa_dev)
201                 in_dev_put(ifa->ifa_dev);
202         kfree(ifa);
203 }
204
205 static void inet_free_ifa(struct in_ifaddr *ifa)
206 {
207         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
208 }
209
210 void in_dev_finish_destroy(struct in_device *idev)
211 {
212         struct net_device *dev = idev->dev;
213
214         WARN_ON(idev->ifa_list);
215         WARN_ON(idev->mc_list);
216 #ifdef NET_REFCNT_DEBUG
217         pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
218 #endif
219         dev_put(dev);
220         if (!idev->dead)
221                 pr_err("Freeing alive in_device %p\n", idev);
222         else
223                 kfree(idev);
224 }
225 EXPORT_SYMBOL(in_dev_finish_destroy);
226
227 static struct in_device *inetdev_init(struct net_device *dev)
228 {
229         struct in_device *in_dev;
230
231         ASSERT_RTNL();
232
233         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
234         if (!in_dev)
235                 goto out;
236         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
237                         sizeof(in_dev->cnf));
238         in_dev->cnf.sysctl = NULL;
239         in_dev->dev = dev;
240         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
241         if (!in_dev->arp_parms)
242                 goto out_kfree;
243         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
244                 dev_disable_lro(dev);
245         /* Reference in_dev->dev */
246         dev_hold(dev);
247         /* Account for reference dev->ip_ptr (below) */
248         in_dev_hold(in_dev);
249
250         devinet_sysctl_register(in_dev);
251         ip_mc_init_dev(in_dev);
252         if (dev->flags & IFF_UP)
253                 ip_mc_up(in_dev);
254
255         /* we can receive as soon as ip_ptr is set -- do this last */
256         rcu_assign_pointer(dev->ip_ptr, in_dev);
257 out:
258         return in_dev;
259 out_kfree:
260         kfree(in_dev);
261         in_dev = NULL;
262         goto out;
263 }
264
265 static void in_dev_rcu_put(struct rcu_head *head)
266 {
267         struct in_device *idev = container_of(head, struct in_device, rcu_head);
268         in_dev_put(idev);
269 }
270
271 static void inetdev_destroy(struct in_device *in_dev)
272 {
273         struct in_ifaddr *ifa;
274         struct net_device *dev;
275
276         ASSERT_RTNL();
277
278         dev = in_dev->dev;
279
280         in_dev->dead = 1;
281
282         ip_mc_destroy_dev(in_dev);
283
284         while ((ifa = in_dev->ifa_list) != NULL) {
285                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
286                 inet_free_ifa(ifa);
287         }
288
289         RCU_INIT_POINTER(dev->ip_ptr, NULL);
290
291         devinet_sysctl_unregister(in_dev);
292         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
293         arp_ifdown(dev);
294
295         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
296 }
297
298 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
299 {
300         rcu_read_lock();
301         for_primary_ifa(in_dev) {
302                 if (inet_ifa_match(a, ifa)) {
303                         if (!b || inet_ifa_match(b, ifa)) {
304                                 rcu_read_unlock();
305                                 return 1;
306                         }
307                 }
308         } endfor_ifa(in_dev);
309         rcu_read_unlock();
310         return 0;
311 }
312
313 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
314                          int destroy, struct nlmsghdr *nlh, u32 portid)
315 {
316         struct in_ifaddr *promote = NULL;
317         struct in_ifaddr *ifa, *ifa1 = *ifap;
318         struct in_ifaddr *last_prim = in_dev->ifa_list;
319         struct in_ifaddr *prev_prom = NULL;
320         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
321
322         ASSERT_RTNL();
323
324         /* 1. Deleting primary ifaddr forces deletion all secondaries
325          * unless alias promotion is set
326          **/
327
328         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
329                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
330
331                 while ((ifa = *ifap1) != NULL) {
332                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
333                             ifa1->ifa_scope <= ifa->ifa_scope)
334                                 last_prim = ifa;
335
336                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
337                             ifa1->ifa_mask != ifa->ifa_mask ||
338                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
339                                 ifap1 = &ifa->ifa_next;
340                                 prev_prom = ifa;
341                                 continue;
342                         }
343
344                         if (!do_promote) {
345                                 inet_hash_remove(ifa);
346                                 *ifap1 = ifa->ifa_next;
347
348                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
349                                 blocking_notifier_call_chain(&inetaddr_chain,
350                                                 NETDEV_DOWN, ifa);
351                                 inet_free_ifa(ifa);
352                         } else {
353                                 promote = ifa;
354                                 break;
355                         }
356                 }
357         }
358
359         /* On promotion all secondaries from subnet are changing
360          * the primary IP, we must remove all their routes silently
361          * and later to add them back with new prefsrc. Do this
362          * while all addresses are on the device list.
363          */
364         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
365                 if (ifa1->ifa_mask == ifa->ifa_mask &&
366                     inet_ifa_match(ifa1->ifa_address, ifa))
367                         fib_del_ifaddr(ifa, ifa1);
368         }
369
370         /* 2. Unlink it */
371
372         *ifap = ifa1->ifa_next;
373         inet_hash_remove(ifa1);
374
375         /* 3. Announce address deletion */
376
377         /* Send message first, then call notifier.
378            At first sight, FIB update triggered by notifier
379            will refer to already deleted ifaddr, that could confuse
380            netlink listeners. It is not true: look, gated sees
381            that route deleted and if it still thinks that ifaddr
382            is valid, it will try to restore deleted routes... Grr.
383            So that, this order is correct.
384          */
385         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
386         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
387
388         if (promote) {
389                 struct in_ifaddr *next_sec = promote->ifa_next;
390
391                 if (prev_prom) {
392                         prev_prom->ifa_next = promote->ifa_next;
393                         promote->ifa_next = last_prim->ifa_next;
394                         last_prim->ifa_next = promote;
395                 }
396
397                 promote->ifa_flags &= ~IFA_F_SECONDARY;
398                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
399                 blocking_notifier_call_chain(&inetaddr_chain,
400                                 NETDEV_UP, promote);
401                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
402                         if (ifa1->ifa_mask != ifa->ifa_mask ||
403                             !inet_ifa_match(ifa1->ifa_address, ifa))
404                                         continue;
405                         fib_add_ifaddr(ifa);
406                 }
407
408         }
409         if (destroy)
410                 inet_free_ifa(ifa1);
411 }
412
413 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
414                          int destroy)
415 {
416         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
417 }
418
419 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
420                              u32 portid)
421 {
422         struct in_device *in_dev = ifa->ifa_dev;
423         struct in_ifaddr *ifa1, **ifap, **last_primary;
424
425         ASSERT_RTNL();
426
427         if (!ifa->ifa_local) {
428                 inet_free_ifa(ifa);
429                 return 0;
430         }
431
432         ifa->ifa_flags &= ~IFA_F_SECONDARY;
433         last_primary = &in_dev->ifa_list;
434
435         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
436              ifap = &ifa1->ifa_next) {
437                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
438                     ifa->ifa_scope <= ifa1->ifa_scope)
439                         last_primary = &ifa1->ifa_next;
440                 if (ifa1->ifa_mask == ifa->ifa_mask &&
441                     inet_ifa_match(ifa1->ifa_address, ifa)) {
442                         if (ifa1->ifa_local == ifa->ifa_local) {
443                                 inet_free_ifa(ifa);
444                                 return -EEXIST;
445                         }
446                         if (ifa1->ifa_scope != ifa->ifa_scope) {
447                                 inet_free_ifa(ifa);
448                                 return -EINVAL;
449                         }
450                         ifa->ifa_flags |= IFA_F_SECONDARY;
451                 }
452         }
453
454         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
455                 net_srandom(ifa->ifa_local);
456                 ifap = last_primary;
457         }
458
459         ifa->ifa_next = *ifap;
460         *ifap = ifa;
461
462         inet_hash_insert(dev_net(in_dev->dev), ifa);
463
464         /* Send message first, then call notifier.
465            Notifier will trigger FIB update, so that
466            listeners of netlink will know about new ifaddr */
467         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
468         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
469
470         return 0;
471 }
472
473 static int inet_insert_ifa(struct in_ifaddr *ifa)
474 {
475         return __inet_insert_ifa(ifa, NULL, 0);
476 }
477
478 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
479 {
480         struct in_device *in_dev = __in_dev_get_rtnl(dev);
481
482         ASSERT_RTNL();
483
484         if (!in_dev) {
485                 inet_free_ifa(ifa);
486                 return -ENOBUFS;
487         }
488         ipv4_devconf_setall(in_dev);
489         if (ifa->ifa_dev != in_dev) {
490                 WARN_ON(ifa->ifa_dev);
491                 in_dev_hold(in_dev);
492                 ifa->ifa_dev = in_dev;
493         }
494         if (ipv4_is_loopback(ifa->ifa_local))
495                 ifa->ifa_scope = RT_SCOPE_HOST;
496         return inet_insert_ifa(ifa);
497 }
498
499 /* Caller must hold RCU or RTNL :
500  * We dont take a reference on found in_device
501  */
502 struct in_device *inetdev_by_index(struct net *net, int ifindex)
503 {
504         struct net_device *dev;
505         struct in_device *in_dev = NULL;
506
507         rcu_read_lock();
508         dev = dev_get_by_index_rcu(net, ifindex);
509         if (dev)
510                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
511         rcu_read_unlock();
512         return in_dev;
513 }
514 EXPORT_SYMBOL(inetdev_by_index);
515
516 /* Called only from RTNL semaphored context. No locks. */
517
518 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
519                                     __be32 mask)
520 {
521         ASSERT_RTNL();
522
523         for_primary_ifa(in_dev) {
524                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
525                         return ifa;
526         } endfor_ifa(in_dev);
527         return NULL;
528 }
529
530 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
531 {
532         struct net *net = sock_net(skb->sk);
533         struct nlattr *tb[IFA_MAX+1];
534         struct in_device *in_dev;
535         struct ifaddrmsg *ifm;
536         struct in_ifaddr *ifa, **ifap;
537         int err = -EINVAL;
538
539         ASSERT_RTNL();
540
541         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
542         if (err < 0)
543                 goto errout;
544
545         ifm = nlmsg_data(nlh);
546         in_dev = inetdev_by_index(net, ifm->ifa_index);
547         if (in_dev == NULL) {
548                 err = -ENODEV;
549                 goto errout;
550         }
551
552         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
553              ifap = &ifa->ifa_next) {
554                 if (tb[IFA_LOCAL] &&
555                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
556                         continue;
557
558                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
559                         continue;
560
561                 if (tb[IFA_ADDRESS] &&
562                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
563                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
564                         continue;
565
566                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
567                 return 0;
568         }
569
570         err = -EADDRNOTAVAIL;
571 errout:
572         return err;
573 }
574
575 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
576 {
577         struct nlattr *tb[IFA_MAX+1];
578         struct in_ifaddr *ifa;
579         struct ifaddrmsg *ifm;
580         struct net_device *dev;
581         struct in_device *in_dev;
582         int err;
583
584         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
585         if (err < 0)
586                 goto errout;
587
588         ifm = nlmsg_data(nlh);
589         err = -EINVAL;
590         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
591                 goto errout;
592
593         dev = __dev_get_by_index(net, ifm->ifa_index);
594         err = -ENODEV;
595         if (dev == NULL)
596                 goto errout;
597
598         in_dev = __in_dev_get_rtnl(dev);
599         err = -ENOBUFS;
600         if (in_dev == NULL)
601                 goto errout;
602
603         ifa = inet_alloc_ifa();
604         if (ifa == NULL)
605                 /*
606                  * A potential indev allocation can be left alive, it stays
607                  * assigned to its device and is destroy with it.
608                  */
609                 goto errout;
610
611         ipv4_devconf_setall(in_dev);
612         in_dev_hold(in_dev);
613
614         if (tb[IFA_ADDRESS] == NULL)
615                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
616
617         INIT_HLIST_NODE(&ifa->hash);
618         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
619         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
620         ifa->ifa_flags = ifm->ifa_flags;
621         ifa->ifa_scope = ifm->ifa_scope;
622         ifa->ifa_dev = in_dev;
623
624         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
625         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
626
627         if (tb[IFA_BROADCAST])
628                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
629
630         if (tb[IFA_LABEL])
631                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
632         else
633                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
634
635         return ifa;
636
637 errout:
638         return ERR_PTR(err);
639 }
640
641 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
642 {
643         struct net *net = sock_net(skb->sk);
644         struct in_ifaddr *ifa;
645
646         ASSERT_RTNL();
647
648         ifa = rtm_to_ifaddr(net, nlh);
649         if (IS_ERR(ifa))
650                 return PTR_ERR(ifa);
651
652         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
653 }
654
655 /*
656  *      Determine a default network mask, based on the IP address.
657  */
658
659 static int inet_abc_len(__be32 addr)
660 {
661         int rc = -1;    /* Something else, probably a multicast. */
662
663         if (ipv4_is_zeronet(addr))
664                 rc = 0;
665         else {
666                 __u32 haddr = ntohl(addr);
667
668                 if (IN_CLASSA(haddr))
669                         rc = 8;
670                 else if (IN_CLASSB(haddr))
671                         rc = 16;
672                 else if (IN_CLASSC(haddr))
673                         rc = 24;
674         }
675
676         return rc;
677 }
678
679
680 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
681 {
682         struct ifreq ifr;
683         struct sockaddr_in sin_orig;
684         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
685         struct in_device *in_dev;
686         struct in_ifaddr **ifap = NULL;
687         struct in_ifaddr *ifa = NULL;
688         struct net_device *dev;
689         char *colon;
690         int ret = -EFAULT;
691         int tryaddrmatch = 0;
692
693         /*
694          *      Fetch the caller's info block into kernel space
695          */
696
697         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
698                 goto out;
699         ifr.ifr_name[IFNAMSIZ - 1] = 0;
700
701         /* save original address for comparison */
702         memcpy(&sin_orig, sin, sizeof(*sin));
703
704         colon = strchr(ifr.ifr_name, ':');
705         if (colon)
706                 *colon = 0;
707
708         dev_load(net, ifr.ifr_name);
709
710         switch (cmd) {
711         case SIOCGIFADDR:       /* Get interface address */
712         case SIOCGIFBRDADDR:    /* Get the broadcast address */
713         case SIOCGIFDSTADDR:    /* Get the destination address */
714         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
715                 /* Note that these ioctls will not sleep,
716                    so that we do not impose a lock.
717                    One day we will be forced to put shlock here (I mean SMP)
718                  */
719                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
720                 memset(sin, 0, sizeof(*sin));
721                 sin->sin_family = AF_INET;
722                 break;
723
724         case SIOCSIFFLAGS:
725                 ret = -EPERM;
726                 if (!capable(CAP_NET_ADMIN))
727                         goto out;
728                 break;
729         case SIOCSIFADDR:       /* Set interface address (and family) */
730         case SIOCSIFBRDADDR:    /* Set the broadcast address */
731         case SIOCSIFDSTADDR:    /* Set the destination address */
732         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
733                 ret = -EPERM;
734                 if (!capable(CAP_NET_ADMIN))
735                         goto out;
736                 ret = -EINVAL;
737                 if (sin->sin_family != AF_INET)
738                         goto out;
739                 break;
740         default:
741                 ret = -EINVAL;
742                 goto out;
743         }
744
745         rtnl_lock();
746
747         ret = -ENODEV;
748         dev = __dev_get_by_name(net, ifr.ifr_name);
749         if (!dev)
750                 goto done;
751
752         if (colon)
753                 *colon = ':';
754
755         in_dev = __in_dev_get_rtnl(dev);
756         if (in_dev) {
757                 if (tryaddrmatch) {
758                         /* Matthias Andree */
759                         /* compare label and address (4.4BSD style) */
760                         /* note: we only do this for a limited set of ioctls
761                            and only if the original address family was AF_INET.
762                            This is checked above. */
763                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
764                              ifap = &ifa->ifa_next) {
765                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
766                                     sin_orig.sin_addr.s_addr ==
767                                                         ifa->ifa_local) {
768                                         break; /* found */
769                                 }
770                         }
771                 }
772                 /* we didn't get a match, maybe the application is
773                    4.3BSD-style and passed in junk so we fall back to
774                    comparing just the label */
775                 if (!ifa) {
776                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
777                              ifap = &ifa->ifa_next)
778                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
779                                         break;
780                 }
781         }
782
783         ret = -EADDRNOTAVAIL;
784         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
785                 goto done;
786
787         switch (cmd) {
788         case SIOCGIFADDR:       /* Get interface address */
789                 sin->sin_addr.s_addr = ifa->ifa_local;
790                 goto rarok;
791
792         case SIOCGIFBRDADDR:    /* Get the broadcast address */
793                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
794                 goto rarok;
795
796         case SIOCGIFDSTADDR:    /* Get the destination address */
797                 sin->sin_addr.s_addr = ifa->ifa_address;
798                 goto rarok;
799
800         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
801                 sin->sin_addr.s_addr = ifa->ifa_mask;
802                 goto rarok;
803
804         case SIOCSIFFLAGS:
805                 if (colon) {
806                         ret = -EADDRNOTAVAIL;
807                         if (!ifa)
808                                 break;
809                         ret = 0;
810                         if (!(ifr.ifr_flags & IFF_UP))
811                                 inet_del_ifa(in_dev, ifap, 1);
812                         break;
813                 }
814                 ret = dev_change_flags(dev, ifr.ifr_flags);
815                 break;
816
817         case SIOCSIFADDR:       /* Set interface address (and family) */
818                 ret = -EINVAL;
819                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
820                         break;
821
822                 if (!ifa) {
823                         ret = -ENOBUFS;
824                         ifa = inet_alloc_ifa();
825                         INIT_HLIST_NODE(&ifa->hash);
826                         if (!ifa)
827                                 break;
828                         if (colon)
829                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
830                         else
831                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
832                 } else {
833                         ret = 0;
834                         if (ifa->ifa_local == sin->sin_addr.s_addr)
835                                 break;
836                         inet_del_ifa(in_dev, ifap, 0);
837                         ifa->ifa_broadcast = 0;
838                         ifa->ifa_scope = 0;
839                 }
840
841                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
842
843                 if (!(dev->flags & IFF_POINTOPOINT)) {
844                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
845                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
846                         if ((dev->flags & IFF_BROADCAST) &&
847                             ifa->ifa_prefixlen < 31)
848                                 ifa->ifa_broadcast = ifa->ifa_address |
849                                                      ~ifa->ifa_mask;
850                 } else {
851                         ifa->ifa_prefixlen = 32;
852                         ifa->ifa_mask = inet_make_mask(32);
853                 }
854                 ret = inet_set_ifa(dev, ifa);
855                 break;
856
857         case SIOCSIFBRDADDR:    /* Set the broadcast address */
858                 ret = 0;
859                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
860                         inet_del_ifa(in_dev, ifap, 0);
861                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
862                         inet_insert_ifa(ifa);
863                 }
864                 break;
865
866         case SIOCSIFDSTADDR:    /* Set the destination address */
867                 ret = 0;
868                 if (ifa->ifa_address == sin->sin_addr.s_addr)
869                         break;
870                 ret = -EINVAL;
871                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
872                         break;
873                 ret = 0;
874                 inet_del_ifa(in_dev, ifap, 0);
875                 ifa->ifa_address = sin->sin_addr.s_addr;
876                 inet_insert_ifa(ifa);
877                 break;
878
879         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
880
881                 /*
882                  *      The mask we set must be legal.
883                  */
884                 ret = -EINVAL;
885                 if (bad_mask(sin->sin_addr.s_addr, 0))
886                         break;
887                 ret = 0;
888                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
889                         __be32 old_mask = ifa->ifa_mask;
890                         inet_del_ifa(in_dev, ifap, 0);
891                         ifa->ifa_mask = sin->sin_addr.s_addr;
892                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
893
894                         /* See if current broadcast address matches
895                          * with current netmask, then recalculate
896                          * the broadcast address. Otherwise it's a
897                          * funny address, so don't touch it since
898                          * the user seems to know what (s)he's doing...
899                          */
900                         if ((dev->flags & IFF_BROADCAST) &&
901                             (ifa->ifa_prefixlen < 31) &&
902                             (ifa->ifa_broadcast ==
903                              (ifa->ifa_local|~old_mask))) {
904                                 ifa->ifa_broadcast = (ifa->ifa_local |
905                                                       ~sin->sin_addr.s_addr);
906                         }
907                         inet_insert_ifa(ifa);
908                 }
909                 break;
910         }
911 done:
912         rtnl_unlock();
913 out:
914         return ret;
915 rarok:
916         rtnl_unlock();
917         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
918         goto out;
919 }
920
921 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
922 {
923         struct in_device *in_dev = __in_dev_get_rtnl(dev);
924         struct in_ifaddr *ifa;
925         struct ifreq ifr;
926         int done = 0;
927
928         if (!in_dev)
929                 goto out;
930
931         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
932                 if (!buf) {
933                         done += sizeof(ifr);
934                         continue;
935                 }
936                 if (len < (int) sizeof(ifr))
937                         break;
938                 memset(&ifr, 0, sizeof(struct ifreq));
939                 if (ifa->ifa_label)
940                         strcpy(ifr.ifr_name, ifa->ifa_label);
941                 else
942                         strcpy(ifr.ifr_name, dev->name);
943
944                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
945                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
946                                                                 ifa->ifa_local;
947
948                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
949                         done = -EFAULT;
950                         break;
951                 }
952                 buf  += sizeof(struct ifreq);
953                 len  -= sizeof(struct ifreq);
954                 done += sizeof(struct ifreq);
955         }
956 out:
957         return done;
958 }
959
960 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
961 {
962         __be32 addr = 0;
963         struct in_device *in_dev;
964         struct net *net = dev_net(dev);
965
966         rcu_read_lock();
967         in_dev = __in_dev_get_rcu(dev);
968         if (!in_dev)
969                 goto no_in_dev;
970
971         for_primary_ifa(in_dev) {
972                 if (ifa->ifa_scope > scope)
973                         continue;
974                 if (!dst || inet_ifa_match(dst, ifa)) {
975                         addr = ifa->ifa_local;
976                         break;
977                 }
978                 if (!addr)
979                         addr = ifa->ifa_local;
980         } endfor_ifa(in_dev);
981
982         if (addr)
983                 goto out_unlock;
984 no_in_dev:
985
986         /* Not loopback addresses on loopback should be preferred
987            in this case. It is importnat that lo is the first interface
988            in dev_base list.
989          */
990         for_each_netdev_rcu(net, dev) {
991                 in_dev = __in_dev_get_rcu(dev);
992                 if (!in_dev)
993                         continue;
994
995                 for_primary_ifa(in_dev) {
996                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
997                             ifa->ifa_scope <= scope) {
998                                 addr = ifa->ifa_local;
999                                 goto out_unlock;
1000                         }
1001                 } endfor_ifa(in_dev);
1002         }
1003 out_unlock:
1004         rcu_read_unlock();
1005         return addr;
1006 }
1007 EXPORT_SYMBOL(inet_select_addr);
1008
1009 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1010                               __be32 local, int scope)
1011 {
1012         int same = 0;
1013         __be32 addr = 0;
1014
1015         for_ifa(in_dev) {
1016                 if (!addr &&
1017                     (local == ifa->ifa_local || !local) &&
1018                     ifa->ifa_scope <= scope) {
1019                         addr = ifa->ifa_local;
1020                         if (same)
1021                                 break;
1022                 }
1023                 if (!same) {
1024                         same = (!local || inet_ifa_match(local, ifa)) &&
1025                                 (!dst || inet_ifa_match(dst, ifa));
1026                         if (same && addr) {
1027                                 if (local || !dst)
1028                                         break;
1029                                 /* Is the selected addr into dst subnet? */
1030                                 if (inet_ifa_match(addr, ifa))
1031                                         break;
1032                                 /* No, then can we use new local src? */
1033                                 if (ifa->ifa_scope <= scope) {
1034                                         addr = ifa->ifa_local;
1035                                         break;
1036                                 }
1037                                 /* search for large dst subnet for addr */
1038                                 same = 0;
1039                         }
1040                 }
1041         } endfor_ifa(in_dev);
1042
1043         return same ? addr : 0;
1044 }
1045
1046 /*
1047  * Confirm that local IP address exists using wildcards:
1048  * - in_dev: only on this interface, 0=any interface
1049  * - dst: only in the same subnet as dst, 0=any dst
1050  * - local: address, 0=autoselect the local address
1051  * - scope: maximum allowed scope value for the local address
1052  */
1053 __be32 inet_confirm_addr(struct in_device *in_dev,
1054                          __be32 dst, __be32 local, int scope)
1055 {
1056         __be32 addr = 0;
1057         struct net_device *dev;
1058         struct net *net;
1059
1060         if (scope != RT_SCOPE_LINK)
1061                 return confirm_addr_indev(in_dev, dst, local, scope);
1062
1063         net = dev_net(in_dev->dev);
1064         rcu_read_lock();
1065         for_each_netdev_rcu(net, dev) {
1066                 in_dev = __in_dev_get_rcu(dev);
1067                 if (in_dev) {
1068                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1069                         if (addr)
1070                                 break;
1071                 }
1072         }
1073         rcu_read_unlock();
1074
1075         return addr;
1076 }
1077 EXPORT_SYMBOL(inet_confirm_addr);
1078
1079 /*
1080  *      Device notifier
1081  */
1082
1083 int register_inetaddr_notifier(struct notifier_block *nb)
1084 {
1085         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1086 }
1087 EXPORT_SYMBOL(register_inetaddr_notifier);
1088
1089 int unregister_inetaddr_notifier(struct notifier_block *nb)
1090 {
1091         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1092 }
1093 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1094
1095 /* Rename ifa_labels for a device name change. Make some effort to preserve
1096  * existing alias numbering and to create unique labels if possible.
1097 */
1098 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1099 {
1100         struct in_ifaddr *ifa;
1101         int named = 0;
1102
1103         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1104                 char old[IFNAMSIZ], *dot;
1105
1106                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1107                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1108                 if (named++ == 0)
1109                         goto skip;
1110                 dot = strchr(old, ':');
1111                 if (dot == NULL) {
1112                         sprintf(old, ":%d", named);
1113                         dot = old;
1114                 }
1115                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1116                         strcat(ifa->ifa_label, dot);
1117                 else
1118                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1119 skip:
1120                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1121         }
1122 }
1123
1124 static bool inetdev_valid_mtu(unsigned int mtu)
1125 {
1126         return mtu >= 68;
1127 }
1128
1129 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1130                                         struct in_device *in_dev)
1131
1132 {
1133         struct in_ifaddr *ifa;
1134
1135         for (ifa = in_dev->ifa_list; ifa;
1136              ifa = ifa->ifa_next) {
1137                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1138                          ifa->ifa_local, dev,
1139                          ifa->ifa_local, NULL,
1140                          dev->dev_addr, NULL);
1141         }
1142 }
1143
1144 /* Called only under RTNL semaphore */
1145
1146 static int inetdev_event(struct notifier_block *this, unsigned long event,
1147                          void *ptr)
1148 {
1149         struct net_device *dev = ptr;
1150         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1151
1152         ASSERT_RTNL();
1153
1154         if (!in_dev) {
1155                 if (event == NETDEV_REGISTER) {
1156                         in_dev = inetdev_init(dev);
1157                         if (!in_dev)
1158                                 return notifier_from_errno(-ENOMEM);
1159                         if (dev->flags & IFF_LOOPBACK) {
1160                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1161                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1162                         }
1163                 } else if (event == NETDEV_CHANGEMTU) {
1164                         /* Re-enabling IP */
1165                         if (inetdev_valid_mtu(dev->mtu))
1166                                 in_dev = inetdev_init(dev);
1167                 }
1168                 goto out;
1169         }
1170
1171         switch (event) {
1172         case NETDEV_REGISTER:
1173                 pr_debug("%s: bug\n", __func__);
1174                 RCU_INIT_POINTER(dev->ip_ptr, NULL);
1175                 break;
1176         case NETDEV_UP:
1177                 if (!inetdev_valid_mtu(dev->mtu))
1178                         break;
1179                 if (dev->flags & IFF_LOOPBACK) {
1180                         struct in_ifaddr *ifa = inet_alloc_ifa();
1181
1182                         if (ifa) {
1183                                 INIT_HLIST_NODE(&ifa->hash);
1184                                 ifa->ifa_local =
1185                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1186                                 ifa->ifa_prefixlen = 8;
1187                                 ifa->ifa_mask = inet_make_mask(8);
1188                                 in_dev_hold(in_dev);
1189                                 ifa->ifa_dev = in_dev;
1190                                 ifa->ifa_scope = RT_SCOPE_HOST;
1191                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1192                                 inet_insert_ifa(ifa);
1193                         }
1194                 }
1195                 ip_mc_up(in_dev);
1196                 /* fall through */
1197         case NETDEV_CHANGEADDR:
1198                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1199                         break;
1200                 /* fall through */
1201         case NETDEV_NOTIFY_PEERS:
1202                 /* Send gratuitous ARP to notify of link change */
1203                 inetdev_send_gratuitous_arp(dev, in_dev);
1204                 break;
1205         case NETDEV_DOWN:
1206                 ip_mc_down(in_dev);
1207                 break;
1208         case NETDEV_PRE_TYPE_CHANGE:
1209                 ip_mc_unmap(in_dev);
1210                 break;
1211         case NETDEV_POST_TYPE_CHANGE:
1212                 ip_mc_remap(in_dev);
1213                 break;
1214         case NETDEV_CHANGEMTU:
1215                 if (inetdev_valid_mtu(dev->mtu))
1216                         break;
1217                 /* disable IP when MTU is not enough */
1218         case NETDEV_UNREGISTER:
1219                 inetdev_destroy(in_dev);
1220                 break;
1221         case NETDEV_CHANGENAME:
1222                 /* Do not notify about label change, this event is
1223                  * not interesting to applications using netlink.
1224                  */
1225                 inetdev_changename(dev, in_dev);
1226
1227                 devinet_sysctl_unregister(in_dev);
1228                 devinet_sysctl_register(in_dev);
1229                 break;
1230         }
1231 out:
1232         return NOTIFY_DONE;
1233 }
1234
1235 static struct notifier_block ip_netdev_notifier = {
1236         .notifier_call = inetdev_event,
1237 };
1238
1239 static size_t inet_nlmsg_size(void)
1240 {
1241         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1242                + nla_total_size(4) /* IFA_ADDRESS */
1243                + nla_total_size(4) /* IFA_LOCAL */
1244                + nla_total_size(4) /* IFA_BROADCAST */
1245                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1246 }
1247
1248 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1249                             u32 portid, u32 seq, int event, unsigned int flags)
1250 {
1251         struct ifaddrmsg *ifm;
1252         struct nlmsghdr  *nlh;
1253
1254         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1255         if (nlh == NULL)
1256                 return -EMSGSIZE;
1257
1258         ifm = nlmsg_data(nlh);
1259         ifm->ifa_family = AF_INET;
1260         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1261         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1262         ifm->ifa_scope = ifa->ifa_scope;
1263         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1264
1265         if ((ifa->ifa_address &&
1266              nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1267             (ifa->ifa_local &&
1268              nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1269             (ifa->ifa_broadcast &&
1270              nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1271             (ifa->ifa_label[0] &&
1272              nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1273                 goto nla_put_failure;
1274
1275         return nlmsg_end(skb, nlh);
1276
1277 nla_put_failure:
1278         nlmsg_cancel(skb, nlh);
1279         return -EMSGSIZE;
1280 }
1281
1282 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1283 {
1284         struct net *net = sock_net(skb->sk);
1285         int h, s_h;
1286         int idx, s_idx;
1287         int ip_idx, s_ip_idx;
1288         struct net_device *dev;
1289         struct in_device *in_dev;
1290         struct in_ifaddr *ifa;
1291         struct hlist_head *head;
1292         struct hlist_node *node;
1293
1294         s_h = cb->args[0];
1295         s_idx = idx = cb->args[1];
1296         s_ip_idx = ip_idx = cb->args[2];
1297
1298         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1299                 idx = 0;
1300                 head = &net->dev_index_head[h];
1301                 rcu_read_lock();
1302                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1303                         if (idx < s_idx)
1304                                 goto cont;
1305                         if (h > s_h || idx > s_idx)
1306                                 s_ip_idx = 0;
1307                         in_dev = __in_dev_get_rcu(dev);
1308                         if (!in_dev)
1309                                 goto cont;
1310
1311                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1312                              ifa = ifa->ifa_next, ip_idx++) {
1313                                 if (ip_idx < s_ip_idx)
1314                                         continue;
1315                                 if (inet_fill_ifaddr(skb, ifa,
1316                                              NETLINK_CB(cb->skb).portid,
1317                                              cb->nlh->nlmsg_seq,
1318                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1319                                         rcu_read_unlock();
1320                                         goto done;
1321                                 }
1322                         }
1323 cont:
1324                         idx++;
1325                 }
1326                 rcu_read_unlock();
1327         }
1328
1329 done:
1330         cb->args[0] = h;
1331         cb->args[1] = idx;
1332         cb->args[2] = ip_idx;
1333
1334         return skb->len;
1335 }
1336
1337 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1338                       u32 portid)
1339 {
1340         struct sk_buff *skb;
1341         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1342         int err = -ENOBUFS;
1343         struct net *net;
1344
1345         net = dev_net(ifa->ifa_dev->dev);
1346         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1347         if (skb == NULL)
1348                 goto errout;
1349
1350         err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1351         if (err < 0) {
1352                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1353                 WARN_ON(err == -EMSGSIZE);
1354                 kfree_skb(skb);
1355                 goto errout;
1356         }
1357         rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1358         return;
1359 errout:
1360         if (err < 0)
1361                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1362 }
1363
1364 static size_t inet_get_link_af_size(const struct net_device *dev)
1365 {
1366         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1367
1368         if (!in_dev)
1369                 return 0;
1370
1371         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1372 }
1373
1374 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1375 {
1376         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1377         struct nlattr *nla;
1378         int i;
1379
1380         if (!in_dev)
1381                 return -ENODATA;
1382
1383         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1384         if (nla == NULL)
1385                 return -EMSGSIZE;
1386
1387         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1388                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1389
1390         return 0;
1391 }
1392
1393 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1394         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1395 };
1396
1397 static int inet_validate_link_af(const struct net_device *dev,
1398                                  const struct nlattr *nla)
1399 {
1400         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1401         int err, rem;
1402
1403         if (dev && !__in_dev_get_rtnl(dev))
1404                 return -EAFNOSUPPORT;
1405
1406         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1407         if (err < 0)
1408                 return err;
1409
1410         if (tb[IFLA_INET_CONF]) {
1411                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1412                         int cfgid = nla_type(a);
1413
1414                         if (nla_len(a) < 4)
1415                                 return -EINVAL;
1416
1417                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1418                                 return -EINVAL;
1419                 }
1420         }
1421
1422         return 0;
1423 }
1424
1425 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1426 {
1427         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1428         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1429         int rem;
1430
1431         if (!in_dev)
1432                 return -EAFNOSUPPORT;
1433
1434         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1435                 BUG();
1436
1437         if (tb[IFLA_INET_CONF]) {
1438                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1439                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1440         }
1441
1442         return 0;
1443 }
1444
1445 #ifdef CONFIG_SYSCTL
1446
1447 static void devinet_copy_dflt_conf(struct net *net, int i)
1448 {
1449         struct net_device *dev;
1450
1451         rcu_read_lock();
1452         for_each_netdev_rcu(net, dev) {
1453                 struct in_device *in_dev;
1454
1455                 in_dev = __in_dev_get_rcu(dev);
1456                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1457                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1458         }
1459         rcu_read_unlock();
1460 }
1461
1462 /* called with RTNL locked */
1463 static void inet_forward_change(struct net *net)
1464 {
1465         struct net_device *dev;
1466         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1467
1468         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1470
1471         for_each_netdev(net, dev) {
1472                 struct in_device *in_dev;
1473                 if (on)
1474                         dev_disable_lro(dev);
1475                 rcu_read_lock();
1476                 in_dev = __in_dev_get_rcu(dev);
1477                 if (in_dev)
1478                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1479                 rcu_read_unlock();
1480         }
1481 }
1482
1483 static int devinet_conf_proc(ctl_table *ctl, int write,
1484                              void __user *buffer,
1485                              size_t *lenp, loff_t *ppos)
1486 {
1487         int old_value = *(int *)ctl->data;
1488         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489         int new_value = *(int *)ctl->data;
1490
1491         if (write) {
1492                 struct ipv4_devconf *cnf = ctl->extra1;
1493                 struct net *net = ctl->extra2;
1494                 int i = (int *)ctl->data - cnf->data;
1495
1496                 set_bit(i, cnf->state);
1497
1498                 if (cnf == net->ipv4.devconf_dflt)
1499                         devinet_copy_dflt_conf(net, i);
1500                 if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1501                     i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502                         if ((new_value == 0) && (old_value != 0))
1503                                 rt_cache_flush(net);
1504         }
1505
1506         return ret;
1507 }
1508
1509 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510                                   void __user *buffer,
1511                                   size_t *lenp, loff_t *ppos)
1512 {
1513         int *valp = ctl->data;
1514         int val = *valp;
1515         loff_t pos = *ppos;
1516         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517
1518         if (write && *valp != val) {
1519                 struct net *net = ctl->extra2;
1520
1521                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522                         if (!rtnl_trylock()) {
1523                                 /* Restore the original values before restarting */
1524                                 *valp = val;
1525                                 *ppos = pos;
1526                                 return restart_syscall();
1527                         }
1528                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529                                 inet_forward_change(net);
1530                         } else if (*valp) {
1531                                 struct ipv4_devconf *cnf = ctl->extra1;
1532                                 struct in_device *idev =
1533                                         container_of(cnf, struct in_device, cnf);
1534                                 dev_disable_lro(idev->dev);
1535                         }
1536                         rtnl_unlock();
1537                         rt_cache_flush(net);
1538                 }
1539         }
1540
1541         return ret;
1542 }
1543
1544 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545                                 void __user *buffer,
1546                                 size_t *lenp, loff_t *ppos)
1547 {
1548         int *valp = ctl->data;
1549         int val = *valp;
1550         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551         struct net *net = ctl->extra2;
1552
1553         if (write && *valp != val)
1554                 rt_cache_flush(net);
1555
1556         return ret;
1557 }
1558
1559 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560         { \
1561                 .procname       = name, \
1562                 .data           = ipv4_devconf.data + \
1563                                   IPV4_DEVCONF_ ## attr - 1, \
1564                 .maxlen         = sizeof(int), \
1565                 .mode           = mval, \
1566                 .proc_handler   = proc, \
1567                 .extra1         = &ipv4_devconf, \
1568         }
1569
1570 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572
1573 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575
1576 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578
1579 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581
1582 static struct devinet_sysctl_table {
1583         struct ctl_table_header *sysctl_header;
1584         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585 } devinet_sysctl = {
1586         .devinet_vars = {
1587                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1588                                              devinet_sysctl_forward),
1589                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1590
1591                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1592                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1593                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1594                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1595                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1596                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1597                                         "accept_source_route"),
1598                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1599                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1600                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1601                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1602                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1603                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1604                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1605                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1606                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1607                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1608                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1609                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1610                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1611
1612                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1613                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1614                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1615                                               "force_igmp_version"),
1616                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1617                                               "promote_secondaries"),
1618                 DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1619                                               "route_localnet"),
1620         },
1621 };
1622
1623 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624                                         struct ipv4_devconf *p)
1625 {
1626         int i;
1627         struct devinet_sysctl_table *t;
1628         char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1629
1630         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1631         if (!t)
1632                 goto out;
1633
1634         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1635                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1636                 t->devinet_vars[i].extra1 = p;
1637                 t->devinet_vars[i].extra2 = net;
1638         }
1639
1640         snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1641
1642         t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1643         if (!t->sysctl_header)
1644                 goto free;
1645
1646         p->sysctl = t;
1647         return 0;
1648
1649 free:
1650         kfree(t);
1651 out:
1652         return -ENOBUFS;
1653 }
1654
1655 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1656 {
1657         struct devinet_sysctl_table *t = cnf->sysctl;
1658
1659         if (t == NULL)
1660                 return;
1661
1662         cnf->sysctl = NULL;
1663         unregister_net_sysctl_table(t->sysctl_header);
1664         kfree(t);
1665 }
1666
1667 static void devinet_sysctl_register(struct in_device *idev)
1668 {
1669         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1670         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1671                                         &idev->cnf);
1672 }
1673
1674 static void devinet_sysctl_unregister(struct in_device *idev)
1675 {
1676         __devinet_sysctl_unregister(&idev->cnf);
1677         neigh_sysctl_unregister(idev->arp_parms);
1678 }
1679
1680 static struct ctl_table ctl_forward_entry[] = {
1681         {
1682                 .procname       = "ip_forward",
1683                 .data           = &ipv4_devconf.data[
1684                                         IPV4_DEVCONF_FORWARDING - 1],
1685                 .maxlen         = sizeof(int),
1686                 .mode           = 0644,
1687                 .proc_handler   = devinet_sysctl_forward,
1688                 .extra1         = &ipv4_devconf,
1689                 .extra2         = &init_net,
1690         },
1691         { },
1692 };
1693 #endif
1694
1695 static __net_init int devinet_init_net(struct net *net)
1696 {
1697         int err;
1698         struct ipv4_devconf *all, *dflt;
1699 #ifdef CONFIG_SYSCTL
1700         struct ctl_table *tbl = ctl_forward_entry;
1701         struct ctl_table_header *forw_hdr;
1702 #endif
1703
1704         err = -ENOMEM;
1705         all = &ipv4_devconf;
1706         dflt = &ipv4_devconf_dflt;
1707
1708         if (!net_eq(net, &init_net)) {
1709                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1710                 if (all == NULL)
1711                         goto err_alloc_all;
1712
1713                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1714                 if (dflt == NULL)
1715                         goto err_alloc_dflt;
1716
1717 #ifdef CONFIG_SYSCTL
1718                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1719                 if (tbl == NULL)
1720                         goto err_alloc_ctl;
1721
1722                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1723                 tbl[0].extra1 = all;
1724                 tbl[0].extra2 = net;
1725 #endif
1726         }
1727
1728 #ifdef CONFIG_SYSCTL
1729         err = __devinet_sysctl_register(net, "all", all);
1730         if (err < 0)
1731                 goto err_reg_all;
1732
1733         err = __devinet_sysctl_register(net, "default", dflt);
1734         if (err < 0)
1735                 goto err_reg_dflt;
1736
1737         err = -ENOMEM;
1738         forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1739         if (forw_hdr == NULL)
1740                 goto err_reg_ctl;
1741         net->ipv4.forw_hdr = forw_hdr;
1742 #endif
1743
1744         net->ipv4.devconf_all = all;
1745         net->ipv4.devconf_dflt = dflt;
1746         return 0;
1747
1748 #ifdef CONFIG_SYSCTL
1749 err_reg_ctl:
1750         __devinet_sysctl_unregister(dflt);
1751 err_reg_dflt:
1752         __devinet_sysctl_unregister(all);
1753 err_reg_all:
1754         if (tbl != ctl_forward_entry)
1755                 kfree(tbl);
1756 err_alloc_ctl:
1757 #endif
1758         if (dflt != &ipv4_devconf_dflt)
1759                 kfree(dflt);
1760 err_alloc_dflt:
1761         if (all != &ipv4_devconf)
1762                 kfree(all);
1763 err_alloc_all:
1764         return err;
1765 }
1766
1767 static __net_exit void devinet_exit_net(struct net *net)
1768 {
1769 #ifdef CONFIG_SYSCTL
1770         struct ctl_table *tbl;
1771
1772         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1773         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1774         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1775         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1776         kfree(tbl);
1777 #endif
1778         kfree(net->ipv4.devconf_dflt);
1779         kfree(net->ipv4.devconf_all);
1780 }
1781
1782 static __net_initdata struct pernet_operations devinet_ops = {
1783         .init = devinet_init_net,
1784         .exit = devinet_exit_net,
1785 };
1786
1787 static struct rtnl_af_ops inet_af_ops = {
1788         .family           = AF_INET,
1789         .fill_link_af     = inet_fill_link_af,
1790         .get_link_af_size = inet_get_link_af_size,
1791         .validate_link_af = inet_validate_link_af,
1792         .set_link_af      = inet_set_link_af,
1793 };
1794
1795 void __init devinet_init(void)
1796 {
1797         int i;
1798
1799         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1800                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1801
1802         register_pernet_subsys(&devinet_ops);
1803
1804         register_gifconf(PF_INET, inet_gifconf);
1805         register_netdevice_notifier(&ip_netdev_notifier);
1806
1807         rtnl_af_register(&inet_af_ops);
1808
1809         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1812 }
1813