]> Pileus Git - ~andy/linux/blob - net/ipv4/devinet.c
[IPV4]: Move the devinet pointers on the struct net
[~andy/linux] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
87
88 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
89         [IFA_LOCAL]             = { .type = NLA_U32 },
90         [IFA_ADDRESS]           = { .type = NLA_U32 },
91         [IFA_BROADCAST]         = { .type = NLA_U32 },
92         [IFA_ANYCAST]           = { .type = NLA_U32 },
93         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
94 };
95
96 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
97
98 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
99 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
100                          int destroy);
101 #ifdef CONFIG_SYSCTL
102 static void devinet_sysctl_register(struct in_device *idev);
103 static void devinet_sysctl_unregister(struct in_device *idev);
104 #else
105 static inline void devinet_sysctl_register(struct in_device *idev)
106 {
107 }
108 static inline void devinet_sysctl_unregister(struct in_device *idev)
109 {
110 }
111 #endif
112
113 /* Locks all the inet devices. */
114
115 static struct in_ifaddr *inet_alloc_ifa(void)
116 {
117         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
118
119         if (ifa) {
120                 INIT_RCU_HEAD(&ifa->rcu_head);
121         }
122
123         return ifa;
124 }
125
126 static void inet_rcu_free_ifa(struct rcu_head *head)
127 {
128         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
129         if (ifa->ifa_dev)
130                 in_dev_put(ifa->ifa_dev);
131         kfree(ifa);
132 }
133
134 static inline void inet_free_ifa(struct in_ifaddr *ifa)
135 {
136         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
137 }
138
139 void in_dev_finish_destroy(struct in_device *idev)
140 {
141         struct net_device *dev = idev->dev;
142
143         BUG_TRAP(!idev->ifa_list);
144         BUG_TRAP(!idev->mc_list);
145 #ifdef NET_REFCNT_DEBUG
146         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
147                idev, dev ? dev->name : "NIL");
148 #endif
149         dev_put(dev);
150         if (!idev->dead)
151                 printk("Freeing alive in_device %p\n", idev);
152         else {
153                 kfree(idev);
154         }
155 }
156
157 static struct in_device *inetdev_init(struct net_device *dev)
158 {
159         struct in_device *in_dev;
160
161         ASSERT_RTNL();
162
163         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
164         if (!in_dev)
165                 goto out;
166         INIT_RCU_HEAD(&in_dev->rcu_head);
167         memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
168         in_dev->cnf.sysctl = NULL;
169         in_dev->dev = dev;
170         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
171                 goto out_kfree;
172         /* Reference in_dev->dev */
173         dev_hold(dev);
174         /* Account for reference dev->ip_ptr (below) */
175         in_dev_hold(in_dev);
176
177         devinet_sysctl_register(in_dev);
178         ip_mc_init_dev(in_dev);
179         if (dev->flags & IFF_UP)
180                 ip_mc_up(in_dev);
181
182         /* we can receive as soon as ip_ptr is set -- do this last */
183         rcu_assign_pointer(dev->ip_ptr, in_dev);
184 out:
185         return in_dev;
186 out_kfree:
187         kfree(in_dev);
188         in_dev = NULL;
189         goto out;
190 }
191
192 static void in_dev_rcu_put(struct rcu_head *head)
193 {
194         struct in_device *idev = container_of(head, struct in_device, rcu_head);
195         in_dev_put(idev);
196 }
197
198 static void inetdev_destroy(struct in_device *in_dev)
199 {
200         struct in_ifaddr *ifa;
201         struct net_device *dev;
202
203         ASSERT_RTNL();
204
205         dev = in_dev->dev;
206
207         in_dev->dead = 1;
208
209         ip_mc_destroy_dev(in_dev);
210
211         while ((ifa = in_dev->ifa_list) != NULL) {
212                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
213                 inet_free_ifa(ifa);
214         }
215
216         dev->ip_ptr = NULL;
217
218         devinet_sysctl_unregister(in_dev);
219         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
220         arp_ifdown(dev);
221
222         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
223 }
224
225 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
226 {
227         rcu_read_lock();
228         for_primary_ifa(in_dev) {
229                 if (inet_ifa_match(a, ifa)) {
230                         if (!b || inet_ifa_match(b, ifa)) {
231                                 rcu_read_unlock();
232                                 return 1;
233                         }
234                 }
235         } endfor_ifa(in_dev);
236         rcu_read_unlock();
237         return 0;
238 }
239
240 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
241                          int destroy, struct nlmsghdr *nlh, u32 pid)
242 {
243         struct in_ifaddr *promote = NULL;
244         struct in_ifaddr *ifa, *ifa1 = *ifap;
245         struct in_ifaddr *last_prim = in_dev->ifa_list;
246         struct in_ifaddr *prev_prom = NULL;
247         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
248
249         ASSERT_RTNL();
250
251         /* 1. Deleting primary ifaddr forces deletion all secondaries
252          * unless alias promotion is set
253          **/
254
255         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
256                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
257
258                 while ((ifa = *ifap1) != NULL) {
259                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
260                             ifa1->ifa_scope <= ifa->ifa_scope)
261                                 last_prim = ifa;
262
263                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
264                             ifa1->ifa_mask != ifa->ifa_mask ||
265                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
266                                 ifap1 = &ifa->ifa_next;
267                                 prev_prom = ifa;
268                                 continue;
269                         }
270
271                         if (!do_promote) {
272                                 *ifap1 = ifa->ifa_next;
273
274                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
275                                 blocking_notifier_call_chain(&inetaddr_chain,
276                                                 NETDEV_DOWN, ifa);
277                                 inet_free_ifa(ifa);
278                         } else {
279                                 promote = ifa;
280                                 break;
281                         }
282                 }
283         }
284
285         /* 2. Unlink it */
286
287         *ifap = ifa1->ifa_next;
288
289         /* 3. Announce address deletion */
290
291         /* Send message first, then call notifier.
292            At first sight, FIB update triggered by notifier
293            will refer to already deleted ifaddr, that could confuse
294            netlink listeners. It is not true: look, gated sees
295            that route deleted and if it still thinks that ifaddr
296            is valid, it will try to restore deleted routes... Grr.
297            So that, this order is correct.
298          */
299         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
300         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
301
302         if (promote) {
303
304                 if (prev_prom) {
305                         prev_prom->ifa_next = promote->ifa_next;
306                         promote->ifa_next = last_prim->ifa_next;
307                         last_prim->ifa_next = promote;
308                 }
309
310                 promote->ifa_flags &= ~IFA_F_SECONDARY;
311                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
312                 blocking_notifier_call_chain(&inetaddr_chain,
313                                 NETDEV_UP, promote);
314                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
315                         if (ifa1->ifa_mask != ifa->ifa_mask ||
316                             !inet_ifa_match(ifa1->ifa_address, ifa))
317                                         continue;
318                         fib_add_ifaddr(ifa);
319                 }
320
321         }
322         if (destroy)
323                 inet_free_ifa(ifa1);
324 }
325
326 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
327                          int destroy)
328 {
329         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
330 }
331
332 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
333                              u32 pid)
334 {
335         struct in_device *in_dev = ifa->ifa_dev;
336         struct in_ifaddr *ifa1, **ifap, **last_primary;
337
338         ASSERT_RTNL();
339
340         if (!ifa->ifa_local) {
341                 inet_free_ifa(ifa);
342                 return 0;
343         }
344
345         ifa->ifa_flags &= ~IFA_F_SECONDARY;
346         last_primary = &in_dev->ifa_list;
347
348         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
349              ifap = &ifa1->ifa_next) {
350                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
351                     ifa->ifa_scope <= ifa1->ifa_scope)
352                         last_primary = &ifa1->ifa_next;
353                 if (ifa1->ifa_mask == ifa->ifa_mask &&
354                     inet_ifa_match(ifa1->ifa_address, ifa)) {
355                         if (ifa1->ifa_local == ifa->ifa_local) {
356                                 inet_free_ifa(ifa);
357                                 return -EEXIST;
358                         }
359                         if (ifa1->ifa_scope != ifa->ifa_scope) {
360                                 inet_free_ifa(ifa);
361                                 return -EINVAL;
362                         }
363                         ifa->ifa_flags |= IFA_F_SECONDARY;
364                 }
365         }
366
367         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
368                 net_srandom(ifa->ifa_local);
369                 ifap = last_primary;
370         }
371
372         ifa->ifa_next = *ifap;
373         *ifap = ifa;
374
375         /* Send message first, then call notifier.
376            Notifier will trigger FIB update, so that
377            listeners of netlink will know about new ifaddr */
378         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
379         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
380
381         return 0;
382 }
383
384 static int inet_insert_ifa(struct in_ifaddr *ifa)
385 {
386         return __inet_insert_ifa(ifa, NULL, 0);
387 }
388
389 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
390 {
391         struct in_device *in_dev = __in_dev_get_rtnl(dev);
392
393         ASSERT_RTNL();
394
395         if (!in_dev) {
396                 inet_free_ifa(ifa);
397                 return -ENOBUFS;
398         }
399         ipv4_devconf_setall(in_dev);
400         if (ifa->ifa_dev != in_dev) {
401                 BUG_TRAP(!ifa->ifa_dev);
402                 in_dev_hold(in_dev);
403                 ifa->ifa_dev = in_dev;
404         }
405         if (LOOPBACK(ifa->ifa_local))
406                 ifa->ifa_scope = RT_SCOPE_HOST;
407         return inet_insert_ifa(ifa);
408 }
409
410 struct in_device *inetdev_by_index(int ifindex)
411 {
412         struct net_device *dev;
413         struct in_device *in_dev = NULL;
414         read_lock(&dev_base_lock);
415         dev = __dev_get_by_index(&init_net, ifindex);
416         if (dev)
417                 in_dev = in_dev_get(dev);
418         read_unlock(&dev_base_lock);
419         return in_dev;
420 }
421
422 /* Called only from RTNL semaphored context. No locks. */
423
424 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
425                                     __be32 mask)
426 {
427         ASSERT_RTNL();
428
429         for_primary_ifa(in_dev) {
430                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
431                         return ifa;
432         } endfor_ifa(in_dev);
433         return NULL;
434 }
435
436 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
437 {
438         struct net *net = skb->sk->sk_net;
439         struct nlattr *tb[IFA_MAX+1];
440         struct in_device *in_dev;
441         struct ifaddrmsg *ifm;
442         struct in_ifaddr *ifa, **ifap;
443         int err = -EINVAL;
444
445         ASSERT_RTNL();
446
447         if (net != &init_net)
448                 return -EINVAL;
449
450         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
451         if (err < 0)
452                 goto errout;
453
454         ifm = nlmsg_data(nlh);
455         in_dev = inetdev_by_index(ifm->ifa_index);
456         if (in_dev == NULL) {
457                 err = -ENODEV;
458                 goto errout;
459         }
460
461         __in_dev_put(in_dev);
462
463         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
464              ifap = &ifa->ifa_next) {
465                 if (tb[IFA_LOCAL] &&
466                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
467                         continue;
468
469                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
470                         continue;
471
472                 if (tb[IFA_ADDRESS] &&
473                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
474                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
475                         continue;
476
477                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
478                 return 0;
479         }
480
481         err = -EADDRNOTAVAIL;
482 errout:
483         return err;
484 }
485
486 static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
487 {
488         struct nlattr *tb[IFA_MAX+1];
489         struct in_ifaddr *ifa;
490         struct ifaddrmsg *ifm;
491         struct net_device *dev;
492         struct in_device *in_dev;
493         int err = -EINVAL;
494
495         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
496         if (err < 0)
497                 goto errout;
498
499         ifm = nlmsg_data(nlh);
500         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) {
501                 err = -EINVAL;
502                 goto errout;
503         }
504
505         dev = __dev_get_by_index(&init_net, ifm->ifa_index);
506         if (dev == NULL) {
507                 err = -ENODEV;
508                 goto errout;
509         }
510
511         in_dev = __in_dev_get_rtnl(dev);
512         if (in_dev == NULL) {
513                 err = -ENOBUFS;
514                 goto errout;
515         }
516
517         ifa = inet_alloc_ifa();
518         if (ifa == NULL) {
519                 /*
520                  * A potential indev allocation can be left alive, it stays
521                  * assigned to its device and is destroy with it.
522                  */
523                 err = -ENOBUFS;
524                 goto errout;
525         }
526
527         ipv4_devconf_setall(in_dev);
528         in_dev_hold(in_dev);
529
530         if (tb[IFA_ADDRESS] == NULL)
531                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
532
533         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
534         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
535         ifa->ifa_flags = ifm->ifa_flags;
536         ifa->ifa_scope = ifm->ifa_scope;
537         ifa->ifa_dev = in_dev;
538
539         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
540         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
541
542         if (tb[IFA_BROADCAST])
543                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
544
545         if (tb[IFA_ANYCAST])
546                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
547
548         if (tb[IFA_LABEL])
549                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
550         else
551                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
552
553         return ifa;
554
555 errout:
556         return ERR_PTR(err);
557 }
558
559 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
560 {
561         struct net *net = skb->sk->sk_net;
562         struct in_ifaddr *ifa;
563
564         ASSERT_RTNL();
565
566         if (net != &init_net)
567                 return -EINVAL;
568
569         ifa = rtm_to_ifaddr(nlh);
570         if (IS_ERR(ifa))
571                 return PTR_ERR(ifa);
572
573         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
574 }
575
576 /*
577  *      Determine a default network mask, based on the IP address.
578  */
579
580 static __inline__ int inet_abc_len(__be32 addr)
581 {
582         int rc = -1;    /* Something else, probably a multicast. */
583
584         if (ZERONET(addr))
585                 rc = 0;
586         else {
587                 __u32 haddr = ntohl(addr);
588
589                 if (IN_CLASSA(haddr))
590                         rc = 8;
591                 else if (IN_CLASSB(haddr))
592                         rc = 16;
593                 else if (IN_CLASSC(haddr))
594                         rc = 24;
595         }
596
597         return rc;
598 }
599
600
601 int devinet_ioctl(unsigned int cmd, void __user *arg)
602 {
603         struct ifreq ifr;
604         struct sockaddr_in sin_orig;
605         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
606         struct in_device *in_dev;
607         struct in_ifaddr **ifap = NULL;
608         struct in_ifaddr *ifa = NULL;
609         struct net_device *dev;
610         char *colon;
611         int ret = -EFAULT;
612         int tryaddrmatch = 0;
613
614         /*
615          *      Fetch the caller's info block into kernel space
616          */
617
618         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
619                 goto out;
620         ifr.ifr_name[IFNAMSIZ - 1] = 0;
621
622         /* save original address for comparison */
623         memcpy(&sin_orig, sin, sizeof(*sin));
624
625         colon = strchr(ifr.ifr_name, ':');
626         if (colon)
627                 *colon = 0;
628
629 #ifdef CONFIG_KMOD
630         dev_load(&init_net, ifr.ifr_name);
631 #endif
632
633         switch (cmd) {
634         case SIOCGIFADDR:       /* Get interface address */
635         case SIOCGIFBRDADDR:    /* Get the broadcast address */
636         case SIOCGIFDSTADDR:    /* Get the destination address */
637         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
638                 /* Note that these ioctls will not sleep,
639                    so that we do not impose a lock.
640                    One day we will be forced to put shlock here (I mean SMP)
641                  */
642                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
643                 memset(sin, 0, sizeof(*sin));
644                 sin->sin_family = AF_INET;
645                 break;
646
647         case SIOCSIFFLAGS:
648                 ret = -EACCES;
649                 if (!capable(CAP_NET_ADMIN))
650                         goto out;
651                 break;
652         case SIOCSIFADDR:       /* Set interface address (and family) */
653         case SIOCSIFBRDADDR:    /* Set the broadcast address */
654         case SIOCSIFDSTADDR:    /* Set the destination address */
655         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
656                 ret = -EACCES;
657                 if (!capable(CAP_NET_ADMIN))
658                         goto out;
659                 ret = -EINVAL;
660                 if (sin->sin_family != AF_INET)
661                         goto out;
662                 break;
663         default:
664                 ret = -EINVAL;
665                 goto out;
666         }
667
668         rtnl_lock();
669
670         ret = -ENODEV;
671         if ((dev = __dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
672                 goto done;
673
674         if (colon)
675                 *colon = ':';
676
677         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
678                 if (tryaddrmatch) {
679                         /* Matthias Andree */
680                         /* compare label and address (4.4BSD style) */
681                         /* note: we only do this for a limited set of ioctls
682                            and only if the original address family was AF_INET.
683                            This is checked above. */
684                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
685                              ifap = &ifa->ifa_next) {
686                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
687                                     sin_orig.sin_addr.s_addr ==
688                                                         ifa->ifa_address) {
689                                         break; /* found */
690                                 }
691                         }
692                 }
693                 /* we didn't get a match, maybe the application is
694                    4.3BSD-style and passed in junk so we fall back to
695                    comparing just the label */
696                 if (!ifa) {
697                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
698                              ifap = &ifa->ifa_next)
699                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
700                                         break;
701                 }
702         }
703
704         ret = -EADDRNOTAVAIL;
705         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
706                 goto done;
707
708         switch (cmd) {
709         case SIOCGIFADDR:       /* Get interface address */
710                 sin->sin_addr.s_addr = ifa->ifa_local;
711                 goto rarok;
712
713         case SIOCGIFBRDADDR:    /* Get the broadcast address */
714                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
715                 goto rarok;
716
717         case SIOCGIFDSTADDR:    /* Get the destination address */
718                 sin->sin_addr.s_addr = ifa->ifa_address;
719                 goto rarok;
720
721         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
722                 sin->sin_addr.s_addr = ifa->ifa_mask;
723                 goto rarok;
724
725         case SIOCSIFFLAGS:
726                 if (colon) {
727                         ret = -EADDRNOTAVAIL;
728                         if (!ifa)
729                                 break;
730                         ret = 0;
731                         if (!(ifr.ifr_flags & IFF_UP))
732                                 inet_del_ifa(in_dev, ifap, 1);
733                         break;
734                 }
735                 ret = dev_change_flags(dev, ifr.ifr_flags);
736                 break;
737
738         case SIOCSIFADDR:       /* Set interface address (and family) */
739                 ret = -EINVAL;
740                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
741                         break;
742
743                 if (!ifa) {
744                         ret = -ENOBUFS;
745                         if ((ifa = inet_alloc_ifa()) == NULL)
746                                 break;
747                         if (colon)
748                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
749                         else
750                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
751                 } else {
752                         ret = 0;
753                         if (ifa->ifa_local == sin->sin_addr.s_addr)
754                                 break;
755                         inet_del_ifa(in_dev, ifap, 0);
756                         ifa->ifa_broadcast = 0;
757                         ifa->ifa_anycast = 0;
758                 }
759
760                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
761
762                 if (!(dev->flags & IFF_POINTOPOINT)) {
763                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
764                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
765                         if ((dev->flags & IFF_BROADCAST) &&
766                             ifa->ifa_prefixlen < 31)
767                                 ifa->ifa_broadcast = ifa->ifa_address |
768                                                      ~ifa->ifa_mask;
769                 } else {
770                         ifa->ifa_prefixlen = 32;
771                         ifa->ifa_mask = inet_make_mask(32);
772                 }
773                 ret = inet_set_ifa(dev, ifa);
774                 break;
775
776         case SIOCSIFBRDADDR:    /* Set the broadcast address */
777                 ret = 0;
778                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
779                         inet_del_ifa(in_dev, ifap, 0);
780                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
781                         inet_insert_ifa(ifa);
782                 }
783                 break;
784
785         case SIOCSIFDSTADDR:    /* Set the destination address */
786                 ret = 0;
787                 if (ifa->ifa_address == sin->sin_addr.s_addr)
788                         break;
789                 ret = -EINVAL;
790                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
791                         break;
792                 ret = 0;
793                 inet_del_ifa(in_dev, ifap, 0);
794                 ifa->ifa_address = sin->sin_addr.s_addr;
795                 inet_insert_ifa(ifa);
796                 break;
797
798         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
799
800                 /*
801                  *      The mask we set must be legal.
802                  */
803                 ret = -EINVAL;
804                 if (bad_mask(sin->sin_addr.s_addr, 0))
805                         break;
806                 ret = 0;
807                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
808                         __be32 old_mask = ifa->ifa_mask;
809                         inet_del_ifa(in_dev, ifap, 0);
810                         ifa->ifa_mask = sin->sin_addr.s_addr;
811                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
812
813                         /* See if current broadcast address matches
814                          * with current netmask, then recalculate
815                          * the broadcast address. Otherwise it's a
816                          * funny address, so don't touch it since
817                          * the user seems to know what (s)he's doing...
818                          */
819                         if ((dev->flags & IFF_BROADCAST) &&
820                             (ifa->ifa_prefixlen < 31) &&
821                             (ifa->ifa_broadcast ==
822                              (ifa->ifa_local|~old_mask))) {
823                                 ifa->ifa_broadcast = (ifa->ifa_local |
824                                                       ~sin->sin_addr.s_addr);
825                         }
826                         inet_insert_ifa(ifa);
827                 }
828                 break;
829         }
830 done:
831         rtnl_unlock();
832 out:
833         return ret;
834 rarok:
835         rtnl_unlock();
836         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
837         goto out;
838 }
839
840 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
841 {
842         struct in_device *in_dev = __in_dev_get_rtnl(dev);
843         struct in_ifaddr *ifa;
844         struct ifreq ifr;
845         int done = 0;
846
847         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
848                 goto out;
849
850         for (; ifa; ifa = ifa->ifa_next) {
851                 if (!buf) {
852                         done += sizeof(ifr);
853                         continue;
854                 }
855                 if (len < (int) sizeof(ifr))
856                         break;
857                 memset(&ifr, 0, sizeof(struct ifreq));
858                 if (ifa->ifa_label)
859                         strcpy(ifr.ifr_name, ifa->ifa_label);
860                 else
861                         strcpy(ifr.ifr_name, dev->name);
862
863                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
864                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
865                                                                 ifa->ifa_local;
866
867                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
868                         done = -EFAULT;
869                         break;
870                 }
871                 buf  += sizeof(struct ifreq);
872                 len  -= sizeof(struct ifreq);
873                 done += sizeof(struct ifreq);
874         }
875 out:
876         return done;
877 }
878
879 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
880 {
881         __be32 addr = 0;
882         struct in_device *in_dev;
883
884         rcu_read_lock();
885         in_dev = __in_dev_get_rcu(dev);
886         if (!in_dev)
887                 goto no_in_dev;
888
889         for_primary_ifa(in_dev) {
890                 if (ifa->ifa_scope > scope)
891                         continue;
892                 if (!dst || inet_ifa_match(dst, ifa)) {
893                         addr = ifa->ifa_local;
894                         break;
895                 }
896                 if (!addr)
897                         addr = ifa->ifa_local;
898         } endfor_ifa(in_dev);
899 no_in_dev:
900         rcu_read_unlock();
901
902         if (addr)
903                 goto out;
904
905         /* Not loopback addresses on loopback should be preferred
906            in this case. It is importnat that lo is the first interface
907            in dev_base list.
908          */
909         read_lock(&dev_base_lock);
910         rcu_read_lock();
911         for_each_netdev(&init_net, dev) {
912                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
913                         continue;
914
915                 for_primary_ifa(in_dev) {
916                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
917                             ifa->ifa_scope <= scope) {
918                                 addr = ifa->ifa_local;
919                                 goto out_unlock_both;
920                         }
921                 } endfor_ifa(in_dev);
922         }
923 out_unlock_both:
924         read_unlock(&dev_base_lock);
925         rcu_read_unlock();
926 out:
927         return addr;
928 }
929
930 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
931                               __be32 local, int scope)
932 {
933         int same = 0;
934         __be32 addr = 0;
935
936         for_ifa(in_dev) {
937                 if (!addr &&
938                     (local == ifa->ifa_local || !local) &&
939                     ifa->ifa_scope <= scope) {
940                         addr = ifa->ifa_local;
941                         if (same)
942                                 break;
943                 }
944                 if (!same) {
945                         same = (!local || inet_ifa_match(local, ifa)) &&
946                                 (!dst || inet_ifa_match(dst, ifa));
947                         if (same && addr) {
948                                 if (local || !dst)
949                                         break;
950                                 /* Is the selected addr into dst subnet? */
951                                 if (inet_ifa_match(addr, ifa))
952                                         break;
953                                 /* No, then can we use new local src? */
954                                 if (ifa->ifa_scope <= scope) {
955                                         addr = ifa->ifa_local;
956                                         break;
957                                 }
958                                 /* search for large dst subnet for addr */
959                                 same = 0;
960                         }
961                 }
962         } endfor_ifa(in_dev);
963
964         return same? addr : 0;
965 }
966
967 /*
968  * Confirm that local IP address exists using wildcards:
969  * - dev: only on this interface, 0=any interface
970  * - dst: only in the same subnet as dst, 0=any dst
971  * - local: address, 0=autoselect the local address
972  * - scope: maximum allowed scope value for the local address
973  */
974 __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
975 {
976         __be32 addr = 0;
977         struct in_device *in_dev;
978
979         if (dev) {
980                 rcu_read_lock();
981                 if ((in_dev = __in_dev_get_rcu(dev)))
982                         addr = confirm_addr_indev(in_dev, dst, local, scope);
983                 rcu_read_unlock();
984
985                 return addr;
986         }
987
988         read_lock(&dev_base_lock);
989         rcu_read_lock();
990         for_each_netdev(&init_net, dev) {
991                 if ((in_dev = __in_dev_get_rcu(dev))) {
992                         addr = confirm_addr_indev(in_dev, dst, local, scope);
993                         if (addr)
994                                 break;
995                 }
996         }
997         rcu_read_unlock();
998         read_unlock(&dev_base_lock);
999
1000         return addr;
1001 }
1002
1003 /*
1004  *      Device notifier
1005  */
1006
1007 int register_inetaddr_notifier(struct notifier_block *nb)
1008 {
1009         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1010 }
1011
1012 int unregister_inetaddr_notifier(struct notifier_block *nb)
1013 {
1014         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1015 }
1016
1017 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1018  * alias numbering and to create unique labels if possible.
1019 */
1020 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1021 {
1022         struct in_ifaddr *ifa;
1023         int named = 0;
1024
1025         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1026                 char old[IFNAMSIZ], *dot;
1027
1028                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1029                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1030                 if (named++ == 0)
1031                         continue;
1032                 dot = strchr(old, ':');
1033                 if (dot == NULL) {
1034                         sprintf(old, ":%d", named);
1035                         dot = old;
1036                 }
1037                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1038                         strcat(ifa->ifa_label, dot);
1039                 } else {
1040                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1041                 }
1042         }
1043 }
1044
1045 /* Called only under RTNL semaphore */
1046
1047 static int inetdev_event(struct notifier_block *this, unsigned long event,
1048                          void *ptr)
1049 {
1050         struct net_device *dev = ptr;
1051         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1052
1053         if (dev->nd_net != &init_net)
1054                 return NOTIFY_DONE;
1055
1056         ASSERT_RTNL();
1057
1058         if (!in_dev) {
1059                 if (event == NETDEV_REGISTER) {
1060                         in_dev = inetdev_init(dev);
1061                         if (!in_dev)
1062                                 return notifier_from_errno(-ENOMEM);
1063                         if (dev->flags & IFF_LOOPBACK) {
1064                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1065                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1066                         }
1067                 }
1068                 goto out;
1069         }
1070
1071         switch (event) {
1072         case NETDEV_REGISTER:
1073                 printk(KERN_DEBUG "inetdev_event: bug\n");
1074                 dev->ip_ptr = NULL;
1075                 break;
1076         case NETDEV_UP:
1077                 if (dev->mtu < 68)
1078                         break;
1079                 if (dev->flags & IFF_LOOPBACK) {
1080                         struct in_ifaddr *ifa;
1081                         if ((ifa = inet_alloc_ifa()) != NULL) {
1082                                 ifa->ifa_local =
1083                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1084                                 ifa->ifa_prefixlen = 8;
1085                                 ifa->ifa_mask = inet_make_mask(8);
1086                                 in_dev_hold(in_dev);
1087                                 ifa->ifa_dev = in_dev;
1088                                 ifa->ifa_scope = RT_SCOPE_HOST;
1089                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1090                                 inet_insert_ifa(ifa);
1091                         }
1092                 }
1093                 ip_mc_up(in_dev);
1094                 break;
1095         case NETDEV_DOWN:
1096                 ip_mc_down(in_dev);
1097                 break;
1098         case NETDEV_CHANGEMTU:
1099                 if (dev->mtu >= 68)
1100                         break;
1101                 /* MTU falled under 68, disable IP */
1102         case NETDEV_UNREGISTER:
1103                 inetdev_destroy(in_dev);
1104                 break;
1105         case NETDEV_CHANGENAME:
1106                 /* Do not notify about label change, this event is
1107                  * not interesting to applications using netlink.
1108                  */
1109                 inetdev_changename(dev, in_dev);
1110
1111                 devinet_sysctl_unregister(in_dev);
1112                 devinet_sysctl_register(in_dev);
1113                 break;
1114         }
1115 out:
1116         return NOTIFY_DONE;
1117 }
1118
1119 static struct notifier_block ip_netdev_notifier = {
1120         .notifier_call =inetdev_event,
1121 };
1122
1123 static inline size_t inet_nlmsg_size(void)
1124 {
1125         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1126                + nla_total_size(4) /* IFA_ADDRESS */
1127                + nla_total_size(4) /* IFA_LOCAL */
1128                + nla_total_size(4) /* IFA_BROADCAST */
1129                + nla_total_size(4) /* IFA_ANYCAST */
1130                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1131 }
1132
1133 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1134                             u32 pid, u32 seq, int event, unsigned int flags)
1135 {
1136         struct ifaddrmsg *ifm;
1137         struct nlmsghdr  *nlh;
1138
1139         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1140         if (nlh == NULL)
1141                 return -EMSGSIZE;
1142
1143         ifm = nlmsg_data(nlh);
1144         ifm->ifa_family = AF_INET;
1145         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1146         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1147         ifm->ifa_scope = ifa->ifa_scope;
1148         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1149
1150         if (ifa->ifa_address)
1151                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1152
1153         if (ifa->ifa_local)
1154                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1155
1156         if (ifa->ifa_broadcast)
1157                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1158
1159         if (ifa->ifa_anycast)
1160                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1161
1162         if (ifa->ifa_label[0])
1163                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1164
1165         return nlmsg_end(skb, nlh);
1166
1167 nla_put_failure:
1168         nlmsg_cancel(skb, nlh);
1169         return -EMSGSIZE;
1170 }
1171
1172 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1173 {
1174         struct net *net = skb->sk->sk_net;
1175         int idx, ip_idx;
1176         struct net_device *dev;
1177         struct in_device *in_dev;
1178         struct in_ifaddr *ifa;
1179         int s_ip_idx, s_idx = cb->args[0];
1180
1181         if (net != &init_net)
1182                 return 0;
1183
1184         s_ip_idx = ip_idx = cb->args[1];
1185         idx = 0;
1186         for_each_netdev(&init_net, dev) {
1187                 if (idx < s_idx)
1188                         goto cont;
1189                 if (idx > s_idx)
1190                         s_ip_idx = 0;
1191                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1192                         goto cont;
1193
1194                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1195                      ifa = ifa->ifa_next, ip_idx++) {
1196                         if (ip_idx < s_ip_idx)
1197                                 continue;
1198                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1199                                              cb->nlh->nlmsg_seq,
1200                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1201                                 goto done;
1202                 }
1203 cont:
1204                 idx++;
1205         }
1206
1207 done:
1208         cb->args[0] = idx;
1209         cb->args[1] = ip_idx;
1210
1211         return skb->len;
1212 }
1213
1214 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1215                       u32 pid)
1216 {
1217         struct sk_buff *skb;
1218         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1219         int err = -ENOBUFS;
1220
1221         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1222         if (skb == NULL)
1223                 goto errout;
1224
1225         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1226         if (err < 0) {
1227                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1228                 WARN_ON(err == -EMSGSIZE);
1229                 kfree_skb(skb);
1230                 goto errout;
1231         }
1232         err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1233 errout:
1234         if (err < 0)
1235                 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_IFADDR, err);
1236 }
1237
1238 #ifdef CONFIG_SYSCTL
1239
1240 static void devinet_copy_dflt_conf(struct net *net, int i)
1241 {
1242         struct net_device *dev;
1243
1244         read_lock(&dev_base_lock);
1245         for_each_netdev(net, dev) {
1246                 struct in_device *in_dev;
1247                 rcu_read_lock();
1248                 in_dev = __in_dev_get_rcu(dev);
1249                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1250                         in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
1251                 rcu_read_unlock();
1252         }
1253         read_unlock(&dev_base_lock);
1254 }
1255
1256 static void inet_forward_change(struct net *net)
1257 {
1258         struct net_device *dev;
1259         int on = IPV4_DEVCONF_ALL(FORWARDING);
1260
1261         IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
1262         IPV4_DEVCONF_DFLT(FORWARDING) = on;
1263
1264         read_lock(&dev_base_lock);
1265         for_each_netdev(net, dev) {
1266                 struct in_device *in_dev;
1267                 rcu_read_lock();
1268                 in_dev = __in_dev_get_rcu(dev);
1269                 if (in_dev)
1270                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1271                 rcu_read_unlock();
1272         }
1273         read_unlock(&dev_base_lock);
1274
1275         rt_cache_flush(0);
1276 }
1277
1278 static int devinet_conf_proc(ctl_table *ctl, int write,
1279                              struct file* filp, void __user *buffer,
1280                              size_t *lenp, loff_t *ppos)
1281 {
1282         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1283
1284         if (write) {
1285                 struct ipv4_devconf *cnf = ctl->extra1;
1286                 struct net *net = ctl->extra2;
1287                 int i = (int *)ctl->data - cnf->data;
1288
1289                 set_bit(i, cnf->state);
1290
1291                 if (cnf == &ipv4_devconf_dflt)
1292                         devinet_copy_dflt_conf(net, i);
1293         }
1294
1295         return ret;
1296 }
1297
1298 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1299                                void __user *oldval, size_t __user *oldlenp,
1300                                void __user *newval, size_t newlen)
1301 {
1302         struct ipv4_devconf *cnf;
1303         struct net *net;
1304         int *valp = table->data;
1305         int new;
1306         int i;
1307
1308         if (!newval || !newlen)
1309                 return 0;
1310
1311         if (newlen != sizeof(int))
1312                 return -EINVAL;
1313
1314         if (get_user(new, (int __user *)newval))
1315                 return -EFAULT;
1316
1317         if (new == *valp)
1318                 return 0;
1319
1320         if (oldval && oldlenp) {
1321                 size_t len;
1322
1323                 if (get_user(len, oldlenp))
1324                         return -EFAULT;
1325
1326                 if (len) {
1327                         if (len > table->maxlen)
1328                                 len = table->maxlen;
1329                         if (copy_to_user(oldval, valp, len))
1330                                 return -EFAULT;
1331                         if (put_user(len, oldlenp))
1332                                 return -EFAULT;
1333                 }
1334         }
1335
1336         *valp = new;
1337
1338         cnf = table->extra1;
1339         net = table->extra2;
1340         i = (int *)table->data - cnf->data;
1341
1342         set_bit(i, cnf->state);
1343
1344         if (cnf == &ipv4_devconf_dflt)
1345                 devinet_copy_dflt_conf(net, i);
1346
1347         return 1;
1348 }
1349
1350 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1351                                   struct file* filp, void __user *buffer,
1352                                   size_t *lenp, loff_t *ppos)
1353 {
1354         int *valp = ctl->data;
1355         int val = *valp;
1356         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1357
1358         if (write && *valp != val) {
1359                 struct net *net = ctl->extra2;
1360
1361                 if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
1362                         inet_forward_change(net);
1363                 else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
1364                         rt_cache_flush(0);
1365         }
1366
1367         return ret;
1368 }
1369
1370 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1371                          struct file* filp, void __user *buffer,
1372                          size_t *lenp, loff_t *ppos)
1373 {
1374         int *valp = ctl->data;
1375         int val = *valp;
1376         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1377
1378         if (write && *valp != val)
1379                 rt_cache_flush(0);
1380
1381         return ret;
1382 }
1383
1384 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1385                                   void __user *oldval, size_t __user *oldlenp,
1386                                   void __user *newval, size_t newlen)
1387 {
1388         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1389                                       newval, newlen);
1390
1391         if (ret == 1)
1392                 rt_cache_flush(0);
1393
1394         return ret;
1395 }
1396
1397
1398 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1399         { \
1400                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1401                 .procname       = name, \
1402                 .data           = ipv4_devconf.data + \
1403                                   NET_IPV4_CONF_ ## attr - 1, \
1404                 .maxlen         = sizeof(int), \
1405                 .mode           = mval, \
1406                 .proc_handler   = proc, \
1407                 .strategy       = sysctl, \
1408                 .extra1         = &ipv4_devconf, \
1409         }
1410
1411 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1412         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1413                              devinet_conf_sysctl)
1414
1415 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1416         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1417                              devinet_conf_sysctl)
1418
1419 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1420         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1421
1422 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1423         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1424                                      ipv4_doint_and_flush_strategy)
1425
1426 static struct devinet_sysctl_table {
1427         struct ctl_table_header *sysctl_header;
1428         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1429         char *dev_name;
1430 } devinet_sysctl = {
1431         .devinet_vars = {
1432                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1433                                              devinet_sysctl_forward,
1434                                              devinet_conf_sysctl),
1435                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1436
1437                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1438                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1439                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1440                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1441                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1442                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1443                                         "accept_source_route"),
1444                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1445                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1446                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1447                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1448                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1449                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1450                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1451                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1452                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1453
1454                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1455                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1456                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1457                                               "force_igmp_version"),
1458                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1459                                               "promote_secondaries"),
1460         },
1461 };
1462
1463 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1464                 int ctl_name, struct ipv4_devconf *p)
1465 {
1466         int i;
1467         struct devinet_sysctl_table *t;
1468
1469 #define DEVINET_CTL_PATH_DEV    3
1470
1471         struct ctl_path devinet_ctl_path[] = {
1472                 { .procname = "net", .ctl_name = CTL_NET, },
1473                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1474                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1475                 { /* to be set */ },
1476                 { },
1477         };
1478
1479         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1480         if (!t)
1481                 goto out;
1482
1483         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1484                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1485                 t->devinet_vars[i].extra1 = p;
1486                 t->devinet_vars[i].extra2 = net;
1487         }
1488
1489         /*
1490          * Make a copy of dev_name, because '.procname' is regarded as const
1491          * by sysctl and we wouldn't want anyone to change it under our feet
1492          * (see SIOCSIFNAME).
1493          */
1494         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1495         if (!t->dev_name)
1496                 goto free;
1497
1498         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1499         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1500
1501         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1502                         t->devinet_vars);
1503         if (!t->sysctl_header)
1504                 goto free_procname;
1505
1506         p->sysctl = t;
1507         return 0;
1508
1509 free_procname:
1510         kfree(t->dev_name);
1511 free:
1512         kfree(t);
1513 out:
1514         return -ENOBUFS;
1515 }
1516
1517 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1518 {
1519         struct devinet_sysctl_table *t = cnf->sysctl;
1520
1521         if (t == NULL)
1522                 return;
1523
1524         cnf->sysctl = NULL;
1525         unregister_sysctl_table(t->sysctl_header);
1526         kfree(t->dev_name);
1527         kfree(t);
1528 }
1529
1530 static void devinet_sysctl_register(struct in_device *idev)
1531 {
1532         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1533                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1534         __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1535                         idev->dev->ifindex, &idev->cnf);
1536 }
1537
1538 static void devinet_sysctl_unregister(struct in_device *idev)
1539 {
1540         __devinet_sysctl_unregister(&idev->cnf);
1541         neigh_sysctl_unregister(idev->arp_parms);
1542 }
1543 #endif
1544
1545 static struct ctl_table ctl_forward_entry[] = {
1546         {
1547                 .ctl_name       = NET_IPV4_FORWARD,
1548                 .procname       = "ip_forward",
1549                 .data           = &ipv4_devconf.data[
1550                                         NET_IPV4_CONF_FORWARDING - 1],
1551                 .maxlen         = sizeof(int),
1552                 .mode           = 0644,
1553                 .proc_handler   = devinet_sysctl_forward,
1554                 .strategy       = devinet_conf_sysctl,
1555                 .extra1         = &ipv4_devconf,
1556                 .extra2         = &init_net,
1557         },
1558         { },
1559 };
1560
1561 static __net_initdata struct ctl_path net_ipv4_path[] = {
1562         { .procname = "net", .ctl_name = CTL_NET, },
1563         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1564         { },
1565 };
1566
1567 static __net_init int devinet_init_net(struct net *net)
1568 {
1569         int err;
1570         struct ctl_table *tbl;
1571         struct ipv4_devconf *all, *dflt;
1572         struct ctl_table_header *forw_hdr;
1573
1574         err = -ENOMEM;
1575         all = &ipv4_devconf;
1576         dflt = &ipv4_devconf_dflt;
1577         tbl = ctl_forward_entry;
1578
1579         if (net != &init_net) {
1580                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1581                 if (all == NULL)
1582                         goto err_alloc_all;
1583
1584                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1585                 if (dflt == NULL)
1586                         goto err_alloc_dflt;
1587
1588                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1589                 if (tbl == NULL)
1590                         goto err_alloc_ctl;
1591
1592                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1593                 tbl[0].extra1 = all;
1594                 tbl[0].extra2 = net;
1595         }
1596
1597 #ifdef CONFIG_SYSCTL
1598         err = __devinet_sysctl_register(net, "all",
1599                         NET_PROTO_CONF_ALL, all);
1600         if (err < 0)
1601                 goto err_reg_all;
1602
1603         err = __devinet_sysctl_register(net, "default",
1604                         NET_PROTO_CONF_DEFAULT, dflt);
1605         if (err < 0)
1606                 goto err_reg_dflt;
1607
1608         err = -ENOMEM;
1609         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1610         if (forw_hdr == NULL)
1611                 goto err_reg_ctl;
1612 #endif
1613
1614         net->ipv4.forw_hdr = forw_hdr;
1615         net->ipv4.devconf_all = all;
1616         net->ipv4.devconf_dflt = dflt;
1617         return 0;
1618
1619 #ifdef CONFIG_SYSCTL
1620 err_reg_ctl:
1621         __devinet_sysctl_unregister(dflt);
1622 err_reg_dflt:
1623         __devinet_sysctl_unregister(all);
1624 err_reg_all:
1625         if (tbl != ctl_forward_entry)
1626                 kfree(tbl);
1627 #endif
1628 err_alloc_ctl:
1629         if (dflt != &ipv4_devconf_dflt)
1630                 kfree(dflt);
1631 err_alloc_dflt:
1632         if (all != &ipv4_devconf)
1633                 kfree(all);
1634 err_alloc_all:
1635         return err;
1636 }
1637
1638 static __net_exit void devinet_exit_net(struct net *net)
1639 {
1640         struct ctl_table *tbl;
1641
1642         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1643 #ifdef CONFIG_SYSCTL
1644         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1645         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1646         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1647 #endif
1648         kfree(tbl);
1649         kfree(net->ipv4.devconf_dflt);
1650         kfree(net->ipv4.devconf_all);
1651 }
1652
1653 static __net_initdata struct pernet_operations devinet_ops = {
1654         .init = devinet_init_net,
1655         .exit = devinet_exit_net,
1656 };
1657
1658 void __init devinet_init(void)
1659 {
1660         register_pernet_subsys(&devinet_ops);
1661
1662         register_gifconf(PF_INET, inet_gifconf);
1663         register_netdevice_notifier(&ip_netdev_notifier);
1664
1665         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1666         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1667         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1668 }
1669
1670 EXPORT_SYMBOL(in_dev_finish_destroy);
1671 EXPORT_SYMBOL(inet_select_addr);
1672 EXPORT_SYMBOL(inetdev_by_index);
1673 EXPORT_SYMBOL(register_inetaddr_notifier);
1674 EXPORT_SYMBOL(unregister_inetaddr_notifier);