]> Pileus Git - ~andy/linux/blob - net/ipv6/route.c
ipv6/multipath: remove flag NLM_F_EXCL after the first nexthop
[~andy/linux] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
155 {
156         struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
157         if (!n) {
158                 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
159                 if (IS_ERR(n))
160                         return PTR_ERR(n);
161         }
162         rt->n = n;
163
164         return 0;
165 }
166
167 static struct dst_ops ip6_dst_ops_template = {
168         .family                 =       AF_INET6,
169         .protocol               =       cpu_to_be16(ETH_P_IPV6),
170         .gc                     =       ip6_dst_gc,
171         .gc_thresh              =       1024,
172         .check                  =       ip6_dst_check,
173         .default_advmss         =       ip6_default_advmss,
174         .mtu                    =       ip6_mtu,
175         .cow_metrics            =       ipv6_cow_metrics,
176         .destroy                =       ip6_dst_destroy,
177         .ifdown                 =       ip6_dst_ifdown,
178         .negative_advice        =       ip6_negative_advice,
179         .link_failure           =       ip6_link_failure,
180         .update_pmtu            =       ip6_rt_update_pmtu,
181         .redirect               =       rt6_do_redirect,
182         .local_out              =       __ip6_local_out,
183         .neigh_lookup           =       ip6_neigh_lookup,
184 };
185
186 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
187 {
188         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
189
190         return mtu ? : dst->dev->mtu;
191 }
192
193 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
194                                          struct sk_buff *skb, u32 mtu)
195 {
196 }
197
198 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
199                                       struct sk_buff *skb)
200 {
201 }
202
203 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
204                                          unsigned long old)
205 {
206         return NULL;
207 }
208
209 static struct dst_ops ip6_dst_blackhole_ops = {
210         .family                 =       AF_INET6,
211         .protocol               =       cpu_to_be16(ETH_P_IPV6),
212         .destroy                =       ip6_dst_destroy,
213         .check                  =       ip6_dst_check,
214         .mtu                    =       ip6_blackhole_mtu,
215         .default_advmss         =       ip6_default_advmss,
216         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
217         .redirect               =       ip6_rt_blackhole_redirect,
218         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
219         .neigh_lookup           =       ip6_neigh_lookup,
220 };
221
222 static const u32 ip6_template_metrics[RTAX_MAX] = {
223         [RTAX_HOPLIMIT - 1] = 255,
224 };
225
226 static const struct rt6_info ip6_null_entry_template = {
227         .dst = {
228                 .__refcnt       = ATOMIC_INIT(1),
229                 .__use          = 1,
230                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
231                 .error          = -ENETUNREACH,
232                 .input          = ip6_pkt_discard,
233                 .output         = ip6_pkt_discard_out,
234         },
235         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
236         .rt6i_protocol  = RTPROT_KERNEL,
237         .rt6i_metric    = ~(u32) 0,
238         .rt6i_ref       = ATOMIC_INIT(1),
239 };
240
241 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
242
243 static int ip6_pkt_prohibit(struct sk_buff *skb);
244 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
245
246 static const struct rt6_info ip6_prohibit_entry_template = {
247         .dst = {
248                 .__refcnt       = ATOMIC_INIT(1),
249                 .__use          = 1,
250                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
251                 .error          = -EACCES,
252                 .input          = ip6_pkt_prohibit,
253                 .output         = ip6_pkt_prohibit_out,
254         },
255         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
256         .rt6i_protocol  = RTPROT_KERNEL,
257         .rt6i_metric    = ~(u32) 0,
258         .rt6i_ref       = ATOMIC_INIT(1),
259 };
260
261 static const struct rt6_info ip6_blk_hole_entry_template = {
262         .dst = {
263                 .__refcnt       = ATOMIC_INIT(1),
264                 .__use          = 1,
265                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
266                 .error          = -EINVAL,
267                 .input          = dst_discard,
268                 .output         = dst_discard,
269         },
270         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
271         .rt6i_protocol  = RTPROT_KERNEL,
272         .rt6i_metric    = ~(u32) 0,
273         .rt6i_ref       = ATOMIC_INIT(1),
274 };
275
276 #endif
277
278 /* allocate dst with ip6_dst_ops */
279 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
280                                              struct net_device *dev,
281                                              int flags,
282                                              struct fib6_table *table)
283 {
284         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
285                                         0, DST_OBSOLETE_FORCE_CHK, flags);
286
287         if (rt) {
288                 struct dst_entry *dst = &rt->dst;
289
290                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
291                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
292                 rt->rt6i_genid = rt_genid(net);
293                 INIT_LIST_HEAD(&rt->rt6i_siblings);
294                 rt->rt6i_nsiblings = 0;
295         }
296         return rt;
297 }
298
299 static void ip6_dst_destroy(struct dst_entry *dst)
300 {
301         struct rt6_info *rt = (struct rt6_info *)dst;
302         struct inet6_dev *idev = rt->rt6i_idev;
303
304         if (rt->n)
305                 neigh_release(rt->n);
306
307         if (!(rt->dst.flags & DST_HOST))
308                 dst_destroy_metrics_generic(dst);
309
310         if (idev) {
311                 rt->rt6i_idev = NULL;
312                 in6_dev_put(idev);
313         }
314
315         if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
316                 dst_release(dst->from);
317
318         if (rt6_has_peer(rt)) {
319                 struct inet_peer *peer = rt6_peer_ptr(rt);
320                 inet_putpeer(peer);
321         }
322 }
323
324 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
325
326 static u32 rt6_peer_genid(void)
327 {
328         return atomic_read(&__rt6_peer_genid);
329 }
330
331 void rt6_bind_peer(struct rt6_info *rt, int create)
332 {
333         struct inet_peer_base *base;
334         struct inet_peer *peer;
335
336         base = inetpeer_base_ptr(rt->_rt6i_peer);
337         if (!base)
338                 return;
339
340         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
341         if (peer) {
342                 if (!rt6_set_peer(rt, peer))
343                         inet_putpeer(peer);
344                 else
345                         rt->rt6i_peer_genid = rt6_peer_genid();
346         }
347 }
348
349 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
350                            int how)
351 {
352         struct rt6_info *rt = (struct rt6_info *)dst;
353         struct inet6_dev *idev = rt->rt6i_idev;
354         struct net_device *loopback_dev =
355                 dev_net(dev)->loopback_dev;
356
357         if (dev != loopback_dev) {
358                 if (idev && idev->dev == dev) {
359                         struct inet6_dev *loopback_idev =
360                                 in6_dev_get(loopback_dev);
361                         if (loopback_idev) {
362                                 rt->rt6i_idev = loopback_idev;
363                                 in6_dev_put(idev);
364                         }
365                 }
366                 if (rt->n && rt->n->dev == dev) {
367                         rt->n->dev = loopback_dev;
368                         dev_hold(loopback_dev);
369                         dev_put(dev);
370                 }
371         }
372 }
373
374 static bool rt6_check_expired(const struct rt6_info *rt)
375 {
376         if (rt->rt6i_flags & RTF_EXPIRES) {
377                 if (time_after(jiffies, rt->dst.expires))
378                         return true;
379         } else if (rt->dst.from) {
380                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
381         }
382         return false;
383 }
384
385 static bool rt6_need_strict(const struct in6_addr *daddr)
386 {
387         return ipv6_addr_type(daddr) &
388                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
389 }
390
391 /* Multipath route selection:
392  *   Hash based function using packet header and flowlabel.
393  * Adapted from fib_info_hashfn()
394  */
395 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
396                                const struct flowi6 *fl6)
397 {
398         unsigned int val = fl6->flowi6_proto;
399
400         val ^= (__force u32)fl6->daddr.s6_addr32[0];
401         val ^= (__force u32)fl6->daddr.s6_addr32[1];
402         val ^= (__force u32)fl6->daddr.s6_addr32[2];
403         val ^= (__force u32)fl6->daddr.s6_addr32[3];
404
405         val ^= (__force u32)fl6->saddr.s6_addr32[0];
406         val ^= (__force u32)fl6->saddr.s6_addr32[1];
407         val ^= (__force u32)fl6->saddr.s6_addr32[2];
408         val ^= (__force u32)fl6->saddr.s6_addr32[3];
409
410         /* Work only if this not encapsulated */
411         switch (fl6->flowi6_proto) {
412         case IPPROTO_UDP:
413         case IPPROTO_TCP:
414         case IPPROTO_SCTP:
415                 val ^= (__force u16)fl6->fl6_sport;
416                 val ^= (__force u16)fl6->fl6_dport;
417                 break;
418
419         case IPPROTO_ICMPV6:
420                 val ^= (__force u16)fl6->fl6_icmp_type;
421                 val ^= (__force u16)fl6->fl6_icmp_code;
422                 break;
423         }
424         /* RFC6438 recommands to use flowlabel */
425         val ^= (__force u32)fl6->flowlabel;
426
427         /* Perhaps, we need to tune, this function? */
428         val = val ^ (val >> 7) ^ (val >> 12);
429         return val % candidate_count;
430 }
431
432 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
433                                              struct flowi6 *fl6)
434 {
435         struct rt6_info *sibling, *next_sibling;
436         int route_choosen;
437
438         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
439         /* Don't change the route, if route_choosen == 0
440          * (siblings does not include ourself)
441          */
442         if (route_choosen)
443                 list_for_each_entry_safe(sibling, next_sibling,
444                                 &match->rt6i_siblings, rt6i_siblings) {
445                         route_choosen--;
446                         if (route_choosen == 0) {
447                                 match = sibling;
448                                 break;
449                         }
450                 }
451         return match;
452 }
453
454 /*
455  *      Route lookup. Any table->tb6_lock is implied.
456  */
457
458 static inline struct rt6_info *rt6_device_match(struct net *net,
459                                                     struct rt6_info *rt,
460                                                     const struct in6_addr *saddr,
461                                                     int oif,
462                                                     int flags)
463 {
464         struct rt6_info *local = NULL;
465         struct rt6_info *sprt;
466
467         if (!oif && ipv6_addr_any(saddr))
468                 goto out;
469
470         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
471                 struct net_device *dev = sprt->dst.dev;
472
473                 if (oif) {
474                         if (dev->ifindex == oif)
475                                 return sprt;
476                         if (dev->flags & IFF_LOOPBACK) {
477                                 if (!sprt->rt6i_idev ||
478                                     sprt->rt6i_idev->dev->ifindex != oif) {
479                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
480                                                 continue;
481                                         if (local && (!oif ||
482                                                       local->rt6i_idev->dev->ifindex == oif))
483                                                 continue;
484                                 }
485                                 local = sprt;
486                         }
487                 } else {
488                         if (ipv6_chk_addr(net, saddr, dev,
489                                           flags & RT6_LOOKUP_F_IFACE))
490                                 return sprt;
491                 }
492         }
493
494         if (oif) {
495                 if (local)
496                         return local;
497
498                 if (flags & RT6_LOOKUP_F_IFACE)
499                         return net->ipv6.ip6_null_entry;
500         }
501 out:
502         return rt;
503 }
504
505 #ifdef CONFIG_IPV6_ROUTER_PREF
506 static void rt6_probe(struct rt6_info *rt)
507 {
508         struct neighbour *neigh;
509         /*
510          * Okay, this does not seem to be appropriate
511          * for now, however, we need to check if it
512          * is really so; aka Router Reachability Probing.
513          *
514          * Router Reachability Probe MUST be rate-limited
515          * to no more than one per minute.
516          */
517         neigh = rt ? rt->n : NULL;
518         if (!neigh || (neigh->nud_state & NUD_VALID))
519                 return;
520         read_lock_bh(&neigh->lock);
521         if (!(neigh->nud_state & NUD_VALID) &&
522             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
523                 struct in6_addr mcaddr;
524                 struct in6_addr *target;
525
526                 neigh->updated = jiffies;
527                 read_unlock_bh(&neigh->lock);
528
529                 target = (struct in6_addr *)&neigh->primary_key;
530                 addrconf_addr_solict_mult(target, &mcaddr);
531                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
532         } else {
533                 read_unlock_bh(&neigh->lock);
534         }
535 }
536 #else
537 static inline void rt6_probe(struct rt6_info *rt)
538 {
539 }
540 #endif
541
542 /*
543  * Default Router Selection (RFC 2461 6.3.6)
544  */
545 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
546 {
547         struct net_device *dev = rt->dst.dev;
548         if (!oif || dev->ifindex == oif)
549                 return 2;
550         if ((dev->flags & IFF_LOOPBACK) &&
551             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
552                 return 1;
553         return 0;
554 }
555
556 static inline int rt6_check_neigh(struct rt6_info *rt)
557 {
558         struct neighbour *neigh;
559         int m;
560
561         neigh = rt->n;
562         if (rt->rt6i_flags & RTF_NONEXTHOP ||
563             !(rt->rt6i_flags & RTF_GATEWAY))
564                 m = 1;
565         else if (neigh) {
566                 read_lock_bh(&neigh->lock);
567                 if (neigh->nud_state & NUD_VALID)
568                         m = 2;
569 #ifdef CONFIG_IPV6_ROUTER_PREF
570                 else if (neigh->nud_state & NUD_FAILED)
571                         m = 0;
572 #endif
573                 else
574                         m = 1;
575                 read_unlock_bh(&neigh->lock);
576         } else
577                 m = 0;
578         return m;
579 }
580
581 static int rt6_score_route(struct rt6_info *rt, int oif,
582                            int strict)
583 {
584         int m, n;
585
586         m = rt6_check_dev(rt, oif);
587         if (!m && (strict & RT6_LOOKUP_F_IFACE))
588                 return -1;
589 #ifdef CONFIG_IPV6_ROUTER_PREF
590         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
591 #endif
592         n = rt6_check_neigh(rt);
593         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
594                 return -1;
595         return m;
596 }
597
598 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
599                                    int *mpri, struct rt6_info *match)
600 {
601         int m;
602
603         if (rt6_check_expired(rt))
604                 goto out;
605
606         m = rt6_score_route(rt, oif, strict);
607         if (m < 0)
608                 goto out;
609
610         if (m > *mpri) {
611                 if (strict & RT6_LOOKUP_F_REACHABLE)
612                         rt6_probe(match);
613                 *mpri = m;
614                 match = rt;
615         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
616                 rt6_probe(rt);
617         }
618
619 out:
620         return match;
621 }
622
623 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
624                                      struct rt6_info *rr_head,
625                                      u32 metric, int oif, int strict)
626 {
627         struct rt6_info *rt, *match;
628         int mpri = -1;
629
630         match = NULL;
631         for (rt = rr_head; rt && rt->rt6i_metric == metric;
632              rt = rt->dst.rt6_next)
633                 match = find_match(rt, oif, strict, &mpri, match);
634         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
635              rt = rt->dst.rt6_next)
636                 match = find_match(rt, oif, strict, &mpri, match);
637
638         return match;
639 }
640
641 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
642 {
643         struct rt6_info *match, *rt0;
644         struct net *net;
645
646         rt0 = fn->rr_ptr;
647         if (!rt0)
648                 fn->rr_ptr = rt0 = fn->leaf;
649
650         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
651
652         if (!match &&
653             (strict & RT6_LOOKUP_F_REACHABLE)) {
654                 struct rt6_info *next = rt0->dst.rt6_next;
655
656                 /* no entries matched; do round-robin */
657                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
658                         next = fn->leaf;
659
660                 if (next != rt0)
661                         fn->rr_ptr = next;
662         }
663
664         net = dev_net(rt0->dst.dev);
665         return match ? match : net->ipv6.ip6_null_entry;
666 }
667
668 #ifdef CONFIG_IPV6_ROUTE_INFO
669 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
670                   const struct in6_addr *gwaddr)
671 {
672         struct net *net = dev_net(dev);
673         struct route_info *rinfo = (struct route_info *) opt;
674         struct in6_addr prefix_buf, *prefix;
675         unsigned int pref;
676         unsigned long lifetime;
677         struct rt6_info *rt;
678
679         if (len < sizeof(struct route_info)) {
680                 return -EINVAL;
681         }
682
683         /* Sanity check for prefix_len and length */
684         if (rinfo->length > 3) {
685                 return -EINVAL;
686         } else if (rinfo->prefix_len > 128) {
687                 return -EINVAL;
688         } else if (rinfo->prefix_len > 64) {
689                 if (rinfo->length < 2) {
690                         return -EINVAL;
691                 }
692         } else if (rinfo->prefix_len > 0) {
693                 if (rinfo->length < 1) {
694                         return -EINVAL;
695                 }
696         }
697
698         pref = rinfo->route_pref;
699         if (pref == ICMPV6_ROUTER_PREF_INVALID)
700                 return -EINVAL;
701
702         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
703
704         if (rinfo->length == 3)
705                 prefix = (struct in6_addr *)rinfo->prefix;
706         else {
707                 /* this function is safe */
708                 ipv6_addr_prefix(&prefix_buf,
709                                  (struct in6_addr *)rinfo->prefix,
710                                  rinfo->prefix_len);
711                 prefix = &prefix_buf;
712         }
713
714         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
715                                 dev->ifindex);
716
717         if (rt && !lifetime) {
718                 ip6_del_rt(rt);
719                 rt = NULL;
720         }
721
722         if (!rt && lifetime)
723                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
724                                         pref);
725         else if (rt)
726                 rt->rt6i_flags = RTF_ROUTEINFO |
727                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
728
729         if (rt) {
730                 if (!addrconf_finite_timeout(lifetime))
731                         rt6_clean_expires(rt);
732                 else
733                         rt6_set_expires(rt, jiffies + HZ * lifetime);
734
735                 dst_release(&rt->dst);
736         }
737         return 0;
738 }
739 #endif
740
741 #define BACKTRACK(__net, saddr)                 \
742 do { \
743         if (rt == __net->ipv6.ip6_null_entry) { \
744                 struct fib6_node *pn; \
745                 while (1) { \
746                         if (fn->fn_flags & RTN_TL_ROOT) \
747                                 goto out; \
748                         pn = fn->parent; \
749                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
750                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
751                         else \
752                                 fn = pn; \
753                         if (fn->fn_flags & RTN_RTINFO) \
754                                 goto restart; \
755                 } \
756         } \
757 } while (0)
758
759 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
760                                              struct fib6_table *table,
761                                              struct flowi6 *fl6, int flags)
762 {
763         struct fib6_node *fn;
764         struct rt6_info *rt;
765
766         read_lock_bh(&table->tb6_lock);
767         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
768 restart:
769         rt = fn->leaf;
770         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
771         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
772                 rt = rt6_multipath_select(rt, fl6);
773         BACKTRACK(net, &fl6->saddr);
774 out:
775         dst_use(&rt->dst, jiffies);
776         read_unlock_bh(&table->tb6_lock);
777         return rt;
778
779 }
780
781 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
782                                     int flags)
783 {
784         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
785 }
786 EXPORT_SYMBOL_GPL(ip6_route_lookup);
787
788 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
789                             const struct in6_addr *saddr, int oif, int strict)
790 {
791         struct flowi6 fl6 = {
792                 .flowi6_oif = oif,
793                 .daddr = *daddr,
794         };
795         struct dst_entry *dst;
796         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
797
798         if (saddr) {
799                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
800                 flags |= RT6_LOOKUP_F_HAS_SADDR;
801         }
802
803         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
804         if (dst->error == 0)
805                 return (struct rt6_info *) dst;
806
807         dst_release(dst);
808
809         return NULL;
810 }
811
812 EXPORT_SYMBOL(rt6_lookup);
813
814 /* ip6_ins_rt is called with FREE table->tb6_lock.
815    It takes new route entry, the addition fails by any reason the
816    route is freed. In any case, if caller does not hold it, it may
817    be destroyed.
818  */
819
820 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
821 {
822         int err;
823         struct fib6_table *table;
824
825         table = rt->rt6i_table;
826         write_lock_bh(&table->tb6_lock);
827         err = fib6_add(&table->tb6_root, rt, info);
828         write_unlock_bh(&table->tb6_lock);
829
830         return err;
831 }
832
833 int ip6_ins_rt(struct rt6_info *rt)
834 {
835         struct nl_info info = {
836                 .nl_net = dev_net(rt->dst.dev),
837         };
838         return __ip6_ins_rt(rt, &info);
839 }
840
841 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
842                                       const struct in6_addr *daddr,
843                                       const struct in6_addr *saddr)
844 {
845         struct rt6_info *rt;
846
847         /*
848          *      Clone the route.
849          */
850
851         rt = ip6_rt_copy(ort, daddr);
852
853         if (rt) {
854                 int attempts = !in_softirq();
855
856                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
857                         if (ort->rt6i_dst.plen != 128 &&
858                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
859                                 rt->rt6i_flags |= RTF_ANYCAST;
860                         rt->rt6i_gateway = *daddr;
861                 }
862
863                 rt->rt6i_flags |= RTF_CACHE;
864
865 #ifdef CONFIG_IPV6_SUBTREES
866                 if (rt->rt6i_src.plen && saddr) {
867                         rt->rt6i_src.addr = *saddr;
868                         rt->rt6i_src.plen = 128;
869                 }
870 #endif
871
872         retry:
873                 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
874                         struct net *net = dev_net(rt->dst.dev);
875                         int saved_rt_min_interval =
876                                 net->ipv6.sysctl.ip6_rt_gc_min_interval;
877                         int saved_rt_elasticity =
878                                 net->ipv6.sysctl.ip6_rt_gc_elasticity;
879
880                         if (attempts-- > 0) {
881                                 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
882                                 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
883
884                                 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
885
886                                 net->ipv6.sysctl.ip6_rt_gc_elasticity =
887                                         saved_rt_elasticity;
888                                 net->ipv6.sysctl.ip6_rt_gc_min_interval =
889                                         saved_rt_min_interval;
890                                 goto retry;
891                         }
892
893                         net_warn_ratelimited("Neighbour table overflow\n");
894                         dst_free(&rt->dst);
895                         return NULL;
896                 }
897         }
898
899         return rt;
900 }
901
902 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
903                                         const struct in6_addr *daddr)
904 {
905         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
906
907         if (rt) {
908                 rt->rt6i_flags |= RTF_CACHE;
909                 rt->n = neigh_clone(ort->n);
910         }
911         return rt;
912 }
913
914 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
915                                       struct flowi6 *fl6, int flags)
916 {
917         struct fib6_node *fn;
918         struct rt6_info *rt, *nrt;
919         int strict = 0;
920         int attempts = 3;
921         int err;
922         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
923
924         strict |= flags & RT6_LOOKUP_F_IFACE;
925
926 relookup:
927         read_lock_bh(&table->tb6_lock);
928
929 restart_2:
930         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
931
932 restart:
933         rt = rt6_select(fn, oif, strict | reachable);
934         if (rt->rt6i_nsiblings && oif == 0)
935                 rt = rt6_multipath_select(rt, fl6);
936         BACKTRACK(net, &fl6->saddr);
937         if (rt == net->ipv6.ip6_null_entry ||
938             rt->rt6i_flags & RTF_CACHE)
939                 goto out;
940
941         dst_hold(&rt->dst);
942         read_unlock_bh(&table->tb6_lock);
943
944         if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
945                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
946         else if (!(rt->dst.flags & DST_HOST))
947                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
948         else
949                 goto out2;
950
951         dst_release(&rt->dst);
952         rt = nrt ? : net->ipv6.ip6_null_entry;
953
954         dst_hold(&rt->dst);
955         if (nrt) {
956                 err = ip6_ins_rt(nrt);
957                 if (!err)
958                         goto out2;
959         }
960
961         if (--attempts <= 0)
962                 goto out2;
963
964         /*
965          * Race condition! In the gap, when table->tb6_lock was
966          * released someone could insert this route.  Relookup.
967          */
968         dst_release(&rt->dst);
969         goto relookup;
970
971 out:
972         if (reachable) {
973                 reachable = 0;
974                 goto restart_2;
975         }
976         dst_hold(&rt->dst);
977         read_unlock_bh(&table->tb6_lock);
978 out2:
979         rt->dst.lastuse = jiffies;
980         rt->dst.__use++;
981
982         return rt;
983 }
984
985 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
986                                             struct flowi6 *fl6, int flags)
987 {
988         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
989 }
990
991 static struct dst_entry *ip6_route_input_lookup(struct net *net,
992                                                 struct net_device *dev,
993                                                 struct flowi6 *fl6, int flags)
994 {
995         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
996                 flags |= RT6_LOOKUP_F_IFACE;
997
998         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
999 }
1000
1001 void ip6_route_input(struct sk_buff *skb)
1002 {
1003         const struct ipv6hdr *iph = ipv6_hdr(skb);
1004         struct net *net = dev_net(skb->dev);
1005         int flags = RT6_LOOKUP_F_HAS_SADDR;
1006         struct flowi6 fl6 = {
1007                 .flowi6_iif = skb->dev->ifindex,
1008                 .daddr = iph->daddr,
1009                 .saddr = iph->saddr,
1010                 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
1011                 .flowi6_mark = skb->mark,
1012                 .flowi6_proto = iph->nexthdr,
1013         };
1014
1015         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1016 }
1017
1018 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1019                                              struct flowi6 *fl6, int flags)
1020 {
1021         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1022 }
1023
1024 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1025                                     struct flowi6 *fl6)
1026 {
1027         int flags = 0;
1028
1029         fl6->flowi6_iif = LOOPBACK_IFINDEX;
1030
1031         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1032                 flags |= RT6_LOOKUP_F_IFACE;
1033
1034         if (!ipv6_addr_any(&fl6->saddr))
1035                 flags |= RT6_LOOKUP_F_HAS_SADDR;
1036         else if (sk)
1037                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1038
1039         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1040 }
1041
1042 EXPORT_SYMBOL(ip6_route_output);
1043
1044 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1045 {
1046         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1047         struct dst_entry *new = NULL;
1048
1049         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1050         if (rt) {
1051                 new = &rt->dst;
1052
1053                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1054                 rt6_init_peer(rt, net->ipv6.peers);
1055
1056                 new->__use = 1;
1057                 new->input = dst_discard;
1058                 new->output = dst_discard;
1059
1060                 if (dst_metrics_read_only(&ort->dst))
1061                         new->_metrics = ort->dst._metrics;
1062                 else
1063                         dst_copy_metrics(new, &ort->dst);
1064                 rt->rt6i_idev = ort->rt6i_idev;
1065                 if (rt->rt6i_idev)
1066                         in6_dev_hold(rt->rt6i_idev);
1067
1068                 rt->rt6i_gateway = ort->rt6i_gateway;
1069                 rt->rt6i_flags = ort->rt6i_flags;
1070                 rt6_clean_expires(rt);
1071                 rt->rt6i_metric = 0;
1072
1073                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1074 #ifdef CONFIG_IPV6_SUBTREES
1075                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1076 #endif
1077
1078                 dst_free(new);
1079         }
1080
1081         dst_release(dst_orig);
1082         return new ? new : ERR_PTR(-ENOMEM);
1083 }
1084
1085 /*
1086  *      Destination cache support functions
1087  */
1088
1089 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1090 {
1091         struct rt6_info *rt;
1092
1093         rt = (struct rt6_info *) dst;
1094
1095         /* All IPV6 dsts are created with ->obsolete set to the value
1096          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1097          * into this function always.
1098          */
1099         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1100                 return NULL;
1101
1102         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1103                 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1104                         if (!rt6_has_peer(rt))
1105                                 rt6_bind_peer(rt, 0);
1106                         rt->rt6i_peer_genid = rt6_peer_genid();
1107                 }
1108                 return dst;
1109         }
1110         return NULL;
1111 }
1112
1113 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1114 {
1115         struct rt6_info *rt = (struct rt6_info *) dst;
1116
1117         if (rt) {
1118                 if (rt->rt6i_flags & RTF_CACHE) {
1119                         if (rt6_check_expired(rt)) {
1120                                 ip6_del_rt(rt);
1121                                 dst = NULL;
1122                         }
1123                 } else {
1124                         dst_release(dst);
1125                         dst = NULL;
1126                 }
1127         }
1128         return dst;
1129 }
1130
1131 static void ip6_link_failure(struct sk_buff *skb)
1132 {
1133         struct rt6_info *rt;
1134
1135         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1136
1137         rt = (struct rt6_info *) skb_dst(skb);
1138         if (rt) {
1139                 if (rt->rt6i_flags & RTF_CACHE)
1140                         rt6_update_expires(rt, 0);
1141                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1142                         rt->rt6i_node->fn_sernum = -1;
1143         }
1144 }
1145
1146 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1147                                struct sk_buff *skb, u32 mtu)
1148 {
1149         struct rt6_info *rt6 = (struct rt6_info*)dst;
1150
1151         dst_confirm(dst);
1152         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1153                 struct net *net = dev_net(dst->dev);
1154
1155                 rt6->rt6i_flags |= RTF_MODIFIED;
1156                 if (mtu < IPV6_MIN_MTU) {
1157                         u32 features = dst_metric(dst, RTAX_FEATURES);
1158                         mtu = IPV6_MIN_MTU;
1159                         features |= RTAX_FEATURE_ALLFRAG;
1160                         dst_metric_set(dst, RTAX_FEATURES, features);
1161                 }
1162                 dst_metric_set(dst, RTAX_MTU, mtu);
1163                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1164         }
1165 }
1166
1167 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1168                      int oif, u32 mark)
1169 {
1170         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1171         struct dst_entry *dst;
1172         struct flowi6 fl6;
1173
1174         memset(&fl6, 0, sizeof(fl6));
1175         fl6.flowi6_oif = oif;
1176         fl6.flowi6_mark = mark;
1177         fl6.flowi6_flags = 0;
1178         fl6.daddr = iph->daddr;
1179         fl6.saddr = iph->saddr;
1180         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1181
1182         dst = ip6_route_output(net, NULL, &fl6);
1183         if (!dst->error)
1184                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1185         dst_release(dst);
1186 }
1187 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1188
1189 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1190 {
1191         ip6_update_pmtu(skb, sock_net(sk), mtu,
1192                         sk->sk_bound_dev_if, sk->sk_mark);
1193 }
1194 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1195
1196 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1197 {
1198         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1199         struct dst_entry *dst;
1200         struct flowi6 fl6;
1201
1202         memset(&fl6, 0, sizeof(fl6));
1203         fl6.flowi6_oif = oif;
1204         fl6.flowi6_mark = mark;
1205         fl6.flowi6_flags = 0;
1206         fl6.daddr = iph->daddr;
1207         fl6.saddr = iph->saddr;
1208         fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1209
1210         dst = ip6_route_output(net, NULL, &fl6);
1211         if (!dst->error)
1212                 rt6_do_redirect(dst, NULL, skb);
1213         dst_release(dst);
1214 }
1215 EXPORT_SYMBOL_GPL(ip6_redirect);
1216
1217 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1218 {
1219         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1220 }
1221 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1222
1223 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1224 {
1225         struct net_device *dev = dst->dev;
1226         unsigned int mtu = dst_mtu(dst);
1227         struct net *net = dev_net(dev);
1228
1229         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1230
1231         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1232                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1233
1234         /*
1235          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1236          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1237          * IPV6_MAXPLEN is also valid and means: "any MSS,
1238          * rely only on pmtu discovery"
1239          */
1240         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1241                 mtu = IPV6_MAXPLEN;
1242         return mtu;
1243 }
1244
1245 static unsigned int ip6_mtu(const struct dst_entry *dst)
1246 {
1247         struct inet6_dev *idev;
1248         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1249
1250         if (mtu)
1251                 return mtu;
1252
1253         mtu = IPV6_MIN_MTU;
1254
1255         rcu_read_lock();
1256         idev = __in6_dev_get(dst->dev);
1257         if (idev)
1258                 mtu = idev->cnf.mtu6;
1259         rcu_read_unlock();
1260
1261         return mtu;
1262 }
1263
1264 static struct dst_entry *icmp6_dst_gc_list;
1265 static DEFINE_SPINLOCK(icmp6_dst_lock);
1266
1267 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1268                                   struct neighbour *neigh,
1269                                   struct flowi6 *fl6)
1270 {
1271         struct dst_entry *dst;
1272         struct rt6_info *rt;
1273         struct inet6_dev *idev = in6_dev_get(dev);
1274         struct net *net = dev_net(dev);
1275
1276         if (unlikely(!idev))
1277                 return ERR_PTR(-ENODEV);
1278
1279         rt = ip6_dst_alloc(net, dev, 0, NULL);
1280         if (unlikely(!rt)) {
1281                 in6_dev_put(idev);
1282                 dst = ERR_PTR(-ENOMEM);
1283                 goto out;
1284         }
1285
1286         if (neigh)
1287                 neigh_hold(neigh);
1288         else {
1289                 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1290                 if (IS_ERR(neigh)) {
1291                         in6_dev_put(idev);
1292                         dst_free(&rt->dst);
1293                         return ERR_CAST(neigh);
1294                 }
1295         }
1296
1297         rt->dst.flags |= DST_HOST;
1298         rt->dst.output  = ip6_output;
1299         rt->n = neigh;
1300         atomic_set(&rt->dst.__refcnt, 1);
1301         rt->rt6i_dst.addr = fl6->daddr;
1302         rt->rt6i_dst.plen = 128;
1303         rt->rt6i_idev     = idev;
1304         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1305
1306         spin_lock_bh(&icmp6_dst_lock);
1307         rt->dst.next = icmp6_dst_gc_list;
1308         icmp6_dst_gc_list = &rt->dst;
1309         spin_unlock_bh(&icmp6_dst_lock);
1310
1311         fib6_force_start_gc(net);
1312
1313         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1314
1315 out:
1316         return dst;
1317 }
1318
1319 int icmp6_dst_gc(void)
1320 {
1321         struct dst_entry *dst, **pprev;
1322         int more = 0;
1323
1324         spin_lock_bh(&icmp6_dst_lock);
1325         pprev = &icmp6_dst_gc_list;
1326
1327         while ((dst = *pprev) != NULL) {
1328                 if (!atomic_read(&dst->__refcnt)) {
1329                         *pprev = dst->next;
1330                         dst_free(dst);
1331                 } else {
1332                         pprev = &dst->next;
1333                         ++more;
1334                 }
1335         }
1336
1337         spin_unlock_bh(&icmp6_dst_lock);
1338
1339         return more;
1340 }
1341
1342 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1343                             void *arg)
1344 {
1345         struct dst_entry *dst, **pprev;
1346
1347         spin_lock_bh(&icmp6_dst_lock);
1348         pprev = &icmp6_dst_gc_list;
1349         while ((dst = *pprev) != NULL) {
1350                 struct rt6_info *rt = (struct rt6_info *) dst;
1351                 if (func(rt, arg)) {
1352                         *pprev = dst->next;
1353                         dst_free(dst);
1354                 } else {
1355                         pprev = &dst->next;
1356                 }
1357         }
1358         spin_unlock_bh(&icmp6_dst_lock);
1359 }
1360
1361 static int ip6_dst_gc(struct dst_ops *ops)
1362 {
1363         unsigned long now = jiffies;
1364         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1365         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1366         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1367         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1368         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1369         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1370         int entries;
1371
1372         entries = dst_entries_get_fast(ops);
1373         if (time_after(rt_last_gc + rt_min_interval, now) &&
1374             entries <= rt_max_size)
1375                 goto out;
1376
1377         net->ipv6.ip6_rt_gc_expire++;
1378         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1379         net->ipv6.ip6_rt_last_gc = now;
1380         entries = dst_entries_get_slow(ops);
1381         if (entries < ops->gc_thresh)
1382                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1383 out:
1384         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1385         return entries > rt_max_size;
1386 }
1387
1388 /* Clean host part of a prefix. Not necessary in radix tree,
1389    but results in cleaner routing tables.
1390
1391    Remove it only when all the things will work!
1392  */
1393
1394 int ip6_dst_hoplimit(struct dst_entry *dst)
1395 {
1396         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1397         if (hoplimit == 0) {
1398                 struct net_device *dev = dst->dev;
1399                 struct inet6_dev *idev;
1400
1401                 rcu_read_lock();
1402                 idev = __in6_dev_get(dev);
1403                 if (idev)
1404                         hoplimit = idev->cnf.hop_limit;
1405                 else
1406                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1407                 rcu_read_unlock();
1408         }
1409         return hoplimit;
1410 }
1411 EXPORT_SYMBOL(ip6_dst_hoplimit);
1412
1413 /*
1414  *
1415  */
1416
1417 int ip6_route_add(struct fib6_config *cfg)
1418 {
1419         int err;
1420         struct net *net = cfg->fc_nlinfo.nl_net;
1421         struct rt6_info *rt = NULL;
1422         struct net_device *dev = NULL;
1423         struct inet6_dev *idev = NULL;
1424         struct fib6_table *table;
1425         int addr_type;
1426
1427         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1428                 return -EINVAL;
1429 #ifndef CONFIG_IPV6_SUBTREES
1430         if (cfg->fc_src_len)
1431                 return -EINVAL;
1432 #endif
1433         if (cfg->fc_ifindex) {
1434                 err = -ENODEV;
1435                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1436                 if (!dev)
1437                         goto out;
1438                 idev = in6_dev_get(dev);
1439                 if (!idev)
1440                         goto out;
1441         }
1442
1443         if (cfg->fc_metric == 0)
1444                 cfg->fc_metric = IP6_RT_PRIO_USER;
1445
1446         err = -ENOBUFS;
1447         if (cfg->fc_nlinfo.nlh &&
1448             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1449                 table = fib6_get_table(net, cfg->fc_table);
1450                 if (!table) {
1451                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1452                         table = fib6_new_table(net, cfg->fc_table);
1453                 }
1454         } else {
1455                 table = fib6_new_table(net, cfg->fc_table);
1456         }
1457
1458         if (!table)
1459                 goto out;
1460
1461         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1462
1463         if (!rt) {
1464                 err = -ENOMEM;
1465                 goto out;
1466         }
1467
1468         if (cfg->fc_flags & RTF_EXPIRES)
1469                 rt6_set_expires(rt, jiffies +
1470                                 clock_t_to_jiffies(cfg->fc_expires));
1471         else
1472                 rt6_clean_expires(rt);
1473
1474         if (cfg->fc_protocol == RTPROT_UNSPEC)
1475                 cfg->fc_protocol = RTPROT_BOOT;
1476         rt->rt6i_protocol = cfg->fc_protocol;
1477
1478         addr_type = ipv6_addr_type(&cfg->fc_dst);
1479
1480         if (addr_type & IPV6_ADDR_MULTICAST)
1481                 rt->dst.input = ip6_mc_input;
1482         else if (cfg->fc_flags & RTF_LOCAL)
1483                 rt->dst.input = ip6_input;
1484         else
1485                 rt->dst.input = ip6_forward;
1486
1487         rt->dst.output = ip6_output;
1488
1489         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1490         rt->rt6i_dst.plen = cfg->fc_dst_len;
1491         if (rt->rt6i_dst.plen == 128)
1492                rt->dst.flags |= DST_HOST;
1493
1494         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1495                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1496                 if (!metrics) {
1497                         err = -ENOMEM;
1498                         goto out;
1499                 }
1500                 dst_init_metrics(&rt->dst, metrics, 0);
1501         }
1502 #ifdef CONFIG_IPV6_SUBTREES
1503         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1504         rt->rt6i_src.plen = cfg->fc_src_len;
1505 #endif
1506
1507         rt->rt6i_metric = cfg->fc_metric;
1508
1509         /* We cannot add true routes via loopback here,
1510            they would result in kernel looping; promote them to reject routes
1511          */
1512         if ((cfg->fc_flags & RTF_REJECT) ||
1513             (dev && (dev->flags & IFF_LOOPBACK) &&
1514              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1515              !(cfg->fc_flags & RTF_LOCAL))) {
1516                 /* hold loopback dev/idev if we haven't done so. */
1517                 if (dev != net->loopback_dev) {
1518                         if (dev) {
1519                                 dev_put(dev);
1520                                 in6_dev_put(idev);
1521                         }
1522                         dev = net->loopback_dev;
1523                         dev_hold(dev);
1524                         idev = in6_dev_get(dev);
1525                         if (!idev) {
1526                                 err = -ENODEV;
1527                                 goto out;
1528                         }
1529                 }
1530                 rt->dst.output = ip6_pkt_discard_out;
1531                 rt->dst.input = ip6_pkt_discard;
1532                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1533                 switch (cfg->fc_type) {
1534                 case RTN_BLACKHOLE:
1535                         rt->dst.error = -EINVAL;
1536                         break;
1537                 case RTN_PROHIBIT:
1538                         rt->dst.error = -EACCES;
1539                         break;
1540                 case RTN_THROW:
1541                         rt->dst.error = -EAGAIN;
1542                         break;
1543                 default:
1544                         rt->dst.error = -ENETUNREACH;
1545                         break;
1546                 }
1547                 goto install_route;
1548         }
1549
1550         if (cfg->fc_flags & RTF_GATEWAY) {
1551                 const struct in6_addr *gw_addr;
1552                 int gwa_type;
1553
1554                 gw_addr = &cfg->fc_gateway;
1555                 rt->rt6i_gateway = *gw_addr;
1556                 gwa_type = ipv6_addr_type(gw_addr);
1557
1558                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1559                         struct rt6_info *grt;
1560
1561                         /* IPv6 strictly inhibits using not link-local
1562                            addresses as nexthop address.
1563                            Otherwise, router will not able to send redirects.
1564                            It is very good, but in some (rare!) circumstances
1565                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1566                            some exceptions. --ANK
1567                          */
1568                         err = -EINVAL;
1569                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1570                                 goto out;
1571
1572                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1573
1574                         err = -EHOSTUNREACH;
1575                         if (!grt)
1576                                 goto out;
1577                         if (dev) {
1578                                 if (dev != grt->dst.dev) {
1579                                         dst_release(&grt->dst);
1580                                         goto out;
1581                                 }
1582                         } else {
1583                                 dev = grt->dst.dev;
1584                                 idev = grt->rt6i_idev;
1585                                 dev_hold(dev);
1586                                 in6_dev_hold(grt->rt6i_idev);
1587                         }
1588                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1589                                 err = 0;
1590                         dst_release(&grt->dst);
1591
1592                         if (err)
1593                                 goto out;
1594                 }
1595                 err = -EINVAL;
1596                 if (!dev || (dev->flags & IFF_LOOPBACK))
1597                         goto out;
1598         }
1599
1600         err = -ENODEV;
1601         if (!dev)
1602                 goto out;
1603
1604         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1605                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1606                         err = -EINVAL;
1607                         goto out;
1608                 }
1609                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1610                 rt->rt6i_prefsrc.plen = 128;
1611         } else
1612                 rt->rt6i_prefsrc.plen = 0;
1613
1614         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1615                 err = rt6_bind_neighbour(rt, dev);
1616                 if (err)
1617                         goto out;
1618         }
1619
1620         rt->rt6i_flags = cfg->fc_flags;
1621
1622 install_route:
1623         if (cfg->fc_mx) {
1624                 struct nlattr *nla;
1625                 int remaining;
1626
1627                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1628                         int type = nla_type(nla);
1629
1630                         if (type) {
1631                                 if (type > RTAX_MAX) {
1632                                         err = -EINVAL;
1633                                         goto out;
1634                                 }
1635
1636                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1637                         }
1638                 }
1639         }
1640
1641         rt->dst.dev = dev;
1642         rt->rt6i_idev = idev;
1643         rt->rt6i_table = table;
1644
1645         cfg->fc_nlinfo.nl_net = dev_net(dev);
1646
1647         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1648
1649 out:
1650         if (dev)
1651                 dev_put(dev);
1652         if (idev)
1653                 in6_dev_put(idev);
1654         if (rt)
1655                 dst_free(&rt->dst);
1656         return err;
1657 }
1658
1659 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1660 {
1661         int err;
1662         struct fib6_table *table;
1663         struct net *net = dev_net(rt->dst.dev);
1664
1665         if (rt == net->ipv6.ip6_null_entry) {
1666                 err = -ENOENT;
1667                 goto out;
1668         }
1669
1670         table = rt->rt6i_table;
1671         write_lock_bh(&table->tb6_lock);
1672         err = fib6_del(rt, info);
1673         write_unlock_bh(&table->tb6_lock);
1674
1675 out:
1676         dst_release(&rt->dst);
1677         return err;
1678 }
1679
1680 int ip6_del_rt(struct rt6_info *rt)
1681 {
1682         struct nl_info info = {
1683                 .nl_net = dev_net(rt->dst.dev),
1684         };
1685         return __ip6_del_rt(rt, &info);
1686 }
1687
1688 static int ip6_route_del(struct fib6_config *cfg)
1689 {
1690         struct fib6_table *table;
1691         struct fib6_node *fn;
1692         struct rt6_info *rt;
1693         int err = -ESRCH;
1694
1695         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1696         if (!table)
1697                 return err;
1698
1699         read_lock_bh(&table->tb6_lock);
1700
1701         fn = fib6_locate(&table->tb6_root,
1702                          &cfg->fc_dst, cfg->fc_dst_len,
1703                          &cfg->fc_src, cfg->fc_src_len);
1704
1705         if (fn) {
1706                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1707                         if (cfg->fc_ifindex &&
1708                             (!rt->dst.dev ||
1709                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1710                                 continue;
1711                         if (cfg->fc_flags & RTF_GATEWAY &&
1712                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1713                                 continue;
1714                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1715                                 continue;
1716                         dst_hold(&rt->dst);
1717                         read_unlock_bh(&table->tb6_lock);
1718
1719                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1720                 }
1721         }
1722         read_unlock_bh(&table->tb6_lock);
1723
1724         return err;
1725 }
1726
1727 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1728 {
1729         struct net *net = dev_net(skb->dev);
1730         struct netevent_redirect netevent;
1731         struct rt6_info *rt, *nrt = NULL;
1732         const struct in6_addr *target;
1733         struct ndisc_options ndopts;
1734         const struct in6_addr *dest;
1735         struct neighbour *old_neigh;
1736         struct inet6_dev *in6_dev;
1737         struct neighbour *neigh;
1738         struct icmp6hdr *icmph;
1739         int optlen, on_link;
1740         u8 *lladdr;
1741
1742         optlen = skb->tail - skb->transport_header;
1743         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1744
1745         if (optlen < 0) {
1746                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1747                 return;
1748         }
1749
1750         icmph = icmp6_hdr(skb);
1751         target = (const struct in6_addr *) (icmph + 1);
1752         dest = target + 1;
1753
1754         if (ipv6_addr_is_multicast(dest)) {
1755                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1756                 return;
1757         }
1758
1759         on_link = 0;
1760         if (ipv6_addr_equal(dest, target)) {
1761                 on_link = 1;
1762         } else if (ipv6_addr_type(target) !=
1763                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1764                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1765                 return;
1766         }
1767
1768         in6_dev = __in6_dev_get(skb->dev);
1769         if (!in6_dev)
1770                 return;
1771         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1772                 return;
1773
1774         /* RFC2461 8.1:
1775          *      The IP source address of the Redirect MUST be the same as the current
1776          *      first-hop router for the specified ICMP Destination Address.
1777          */
1778
1779         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1780                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1781                 return;
1782         }
1783
1784         lladdr = NULL;
1785         if (ndopts.nd_opts_tgt_lladdr) {
1786                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1787                                              skb->dev);
1788                 if (!lladdr) {
1789                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1790                         return;
1791                 }
1792         }
1793
1794         rt = (struct rt6_info *) dst;
1795         if (rt == net->ipv6.ip6_null_entry) {
1796                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1797                 return;
1798         }
1799
1800         /* Redirect received -> path was valid.
1801          * Look, redirects are sent only in response to data packets,
1802          * so that this nexthop apparently is reachable. --ANK
1803          */
1804         dst_confirm(&rt->dst);
1805
1806         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1807         if (!neigh)
1808                 return;
1809
1810         /* Duplicate redirect: silently ignore. */
1811         old_neigh = rt->n;
1812         if (neigh == old_neigh)
1813                 goto out;
1814
1815         /*
1816          *      We have finally decided to accept it.
1817          */
1818
1819         neigh_update(neigh, lladdr, NUD_STALE,
1820                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1821                      NEIGH_UPDATE_F_OVERRIDE|
1822                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1823                                      NEIGH_UPDATE_F_ISROUTER))
1824                      );
1825
1826         nrt = ip6_rt_copy(rt, dest);
1827         if (!nrt)
1828                 goto out;
1829
1830         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1831         if (on_link)
1832                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1833
1834         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1835         nrt->n = neigh_clone(neigh);
1836
1837         if (ip6_ins_rt(nrt))
1838                 goto out;
1839
1840         netevent.old = &rt->dst;
1841         netevent.old_neigh = old_neigh;
1842         netevent.new = &nrt->dst;
1843         netevent.new_neigh = neigh;
1844         netevent.daddr = dest;
1845         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1846
1847         if (rt->rt6i_flags & RTF_CACHE) {
1848                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1849                 ip6_del_rt(rt);
1850         }
1851
1852 out:
1853         neigh_release(neigh);
1854 }
1855
1856 /*
1857  *      Misc support functions
1858  */
1859
1860 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1861                                     const struct in6_addr *dest)
1862 {
1863         struct net *net = dev_net(ort->dst.dev);
1864         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1865                                             ort->rt6i_table);
1866
1867         if (rt) {
1868                 rt->dst.input = ort->dst.input;
1869                 rt->dst.output = ort->dst.output;
1870                 rt->dst.flags |= DST_HOST;
1871
1872                 rt->rt6i_dst.addr = *dest;
1873                 rt->rt6i_dst.plen = 128;
1874                 dst_copy_metrics(&rt->dst, &ort->dst);
1875                 rt->dst.error = ort->dst.error;
1876                 rt->rt6i_idev = ort->rt6i_idev;
1877                 if (rt->rt6i_idev)
1878                         in6_dev_hold(rt->rt6i_idev);
1879                 rt->dst.lastuse = jiffies;
1880
1881                 rt->rt6i_gateway = ort->rt6i_gateway;
1882                 rt->rt6i_flags = ort->rt6i_flags;
1883                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1884                     (RTF_DEFAULT | RTF_ADDRCONF))
1885                         rt6_set_from(rt, ort);
1886                 else
1887                         rt6_clean_expires(rt);
1888                 rt->rt6i_metric = 0;
1889
1890 #ifdef CONFIG_IPV6_SUBTREES
1891                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1892 #endif
1893                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1894                 rt->rt6i_table = ort->rt6i_table;
1895         }
1896         return rt;
1897 }
1898
1899 #ifdef CONFIG_IPV6_ROUTE_INFO
1900 static struct rt6_info *rt6_get_route_info(struct net *net,
1901                                            const struct in6_addr *prefix, int prefixlen,
1902                                            const struct in6_addr *gwaddr, int ifindex)
1903 {
1904         struct fib6_node *fn;
1905         struct rt6_info *rt = NULL;
1906         struct fib6_table *table;
1907
1908         table = fib6_get_table(net, RT6_TABLE_INFO);
1909         if (!table)
1910                 return NULL;
1911
1912         read_lock_bh(&table->tb6_lock);
1913         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1914         if (!fn)
1915                 goto out;
1916
1917         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1918                 if (rt->dst.dev->ifindex != ifindex)
1919                         continue;
1920                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1921                         continue;
1922                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1923                         continue;
1924                 dst_hold(&rt->dst);
1925                 break;
1926         }
1927 out:
1928         read_unlock_bh(&table->tb6_lock);
1929         return rt;
1930 }
1931
1932 static struct rt6_info *rt6_add_route_info(struct net *net,
1933                                            const struct in6_addr *prefix, int prefixlen,
1934                                            const struct in6_addr *gwaddr, int ifindex,
1935                                            unsigned int pref)
1936 {
1937         struct fib6_config cfg = {
1938                 .fc_table       = RT6_TABLE_INFO,
1939                 .fc_metric      = IP6_RT_PRIO_USER,
1940                 .fc_ifindex     = ifindex,
1941                 .fc_dst_len     = prefixlen,
1942                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1943                                   RTF_UP | RTF_PREF(pref),
1944                 .fc_nlinfo.portid = 0,
1945                 .fc_nlinfo.nlh = NULL,
1946                 .fc_nlinfo.nl_net = net,
1947         };
1948
1949         cfg.fc_dst = *prefix;
1950         cfg.fc_gateway = *gwaddr;
1951
1952         /* We should treat it as a default route if prefix length is 0. */
1953         if (!prefixlen)
1954                 cfg.fc_flags |= RTF_DEFAULT;
1955
1956         ip6_route_add(&cfg);
1957
1958         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1959 }
1960 #endif
1961
1962 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1963 {
1964         struct rt6_info *rt;
1965         struct fib6_table *table;
1966
1967         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1968         if (!table)
1969                 return NULL;
1970
1971         read_lock_bh(&table->tb6_lock);
1972         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1973                 if (dev == rt->dst.dev &&
1974                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1975                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1976                         break;
1977         }
1978         if (rt)
1979                 dst_hold(&rt->dst);
1980         read_unlock_bh(&table->tb6_lock);
1981         return rt;
1982 }
1983
1984 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1985                                      struct net_device *dev,
1986                                      unsigned int pref)
1987 {
1988         struct fib6_config cfg = {
1989                 .fc_table       = RT6_TABLE_DFLT,
1990                 .fc_metric      = IP6_RT_PRIO_USER,
1991                 .fc_ifindex     = dev->ifindex,
1992                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1993                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1994                 .fc_nlinfo.portid = 0,
1995                 .fc_nlinfo.nlh = NULL,
1996                 .fc_nlinfo.nl_net = dev_net(dev),
1997         };
1998
1999         cfg.fc_gateway = *gwaddr;
2000
2001         ip6_route_add(&cfg);
2002
2003         return rt6_get_dflt_router(gwaddr, dev);
2004 }
2005
2006 void rt6_purge_dflt_routers(struct net *net)
2007 {
2008         struct rt6_info *rt;
2009         struct fib6_table *table;
2010
2011         /* NOTE: Keep consistent with rt6_get_dflt_router */
2012         table = fib6_get_table(net, RT6_TABLE_DFLT);
2013         if (!table)
2014                 return;
2015
2016 restart:
2017         read_lock_bh(&table->tb6_lock);
2018         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2019                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
2020                         dst_hold(&rt->dst);
2021                         read_unlock_bh(&table->tb6_lock);
2022                         ip6_del_rt(rt);
2023                         goto restart;
2024                 }
2025         }
2026         read_unlock_bh(&table->tb6_lock);
2027 }
2028
2029 static void rtmsg_to_fib6_config(struct net *net,
2030                                  struct in6_rtmsg *rtmsg,
2031                                  struct fib6_config *cfg)
2032 {
2033         memset(cfg, 0, sizeof(*cfg));
2034
2035         cfg->fc_table = RT6_TABLE_MAIN;
2036         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2037         cfg->fc_metric = rtmsg->rtmsg_metric;
2038         cfg->fc_expires = rtmsg->rtmsg_info;
2039         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2040         cfg->fc_src_len = rtmsg->rtmsg_src_len;
2041         cfg->fc_flags = rtmsg->rtmsg_flags;
2042
2043         cfg->fc_nlinfo.nl_net = net;
2044
2045         cfg->fc_dst = rtmsg->rtmsg_dst;
2046         cfg->fc_src = rtmsg->rtmsg_src;
2047         cfg->fc_gateway = rtmsg->rtmsg_gateway;
2048 }
2049
2050 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2051 {
2052         struct fib6_config cfg;
2053         struct in6_rtmsg rtmsg;
2054         int err;
2055
2056         switch(cmd) {
2057         case SIOCADDRT:         /* Add a route */
2058         case SIOCDELRT:         /* Delete a route */
2059                 if (!capable(CAP_NET_ADMIN))
2060                         return -EPERM;
2061                 err = copy_from_user(&rtmsg, arg,
2062                                      sizeof(struct in6_rtmsg));
2063                 if (err)
2064                         return -EFAULT;
2065
2066                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2067
2068                 rtnl_lock();
2069                 switch (cmd) {
2070                 case SIOCADDRT:
2071                         err = ip6_route_add(&cfg);
2072                         break;
2073                 case SIOCDELRT:
2074                         err = ip6_route_del(&cfg);
2075                         break;
2076                 default:
2077                         err = -EINVAL;
2078                 }
2079                 rtnl_unlock();
2080
2081                 return err;
2082         }
2083
2084         return -EINVAL;
2085 }
2086
2087 /*
2088  *      Drop the packet on the floor
2089  */
2090
2091 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2092 {
2093         int type;
2094         struct dst_entry *dst = skb_dst(skb);
2095         switch (ipstats_mib_noroutes) {
2096         case IPSTATS_MIB_INNOROUTES:
2097                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2098                 if (type == IPV6_ADDR_ANY) {
2099                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2100                                       IPSTATS_MIB_INADDRERRORS);
2101                         break;
2102                 }
2103                 /* FALLTHROUGH */
2104         case IPSTATS_MIB_OUTNOROUTES:
2105                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2106                               ipstats_mib_noroutes);
2107                 break;
2108         }
2109         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2110         kfree_skb(skb);
2111         return 0;
2112 }
2113
2114 static int ip6_pkt_discard(struct sk_buff *skb)
2115 {
2116         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2117 }
2118
2119 static int ip6_pkt_discard_out(struct sk_buff *skb)
2120 {
2121         skb->dev = skb_dst(skb)->dev;
2122         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2123 }
2124
2125 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2126
2127 static int ip6_pkt_prohibit(struct sk_buff *skb)
2128 {
2129         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2130 }
2131
2132 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2133 {
2134         skb->dev = skb_dst(skb)->dev;
2135         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2136 }
2137
2138 #endif
2139
2140 /*
2141  *      Allocate a dst for local (unicast / anycast) address.
2142  */
2143
2144 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2145                                     const struct in6_addr *addr,
2146                                     bool anycast)
2147 {
2148         struct net *net = dev_net(idev->dev);
2149         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2150         int err;
2151
2152         if (!rt) {
2153                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2154                 return ERR_PTR(-ENOMEM);
2155         }
2156
2157         in6_dev_hold(idev);
2158
2159         rt->dst.flags |= DST_HOST;
2160         rt->dst.input = ip6_input;
2161         rt->dst.output = ip6_output;
2162         rt->rt6i_idev = idev;
2163
2164         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2165         if (anycast)
2166                 rt->rt6i_flags |= RTF_ANYCAST;
2167         else
2168                 rt->rt6i_flags |= RTF_LOCAL;
2169         err = rt6_bind_neighbour(rt, rt->dst.dev);
2170         if (err) {
2171                 dst_free(&rt->dst);
2172                 return ERR_PTR(err);
2173         }
2174
2175         rt->rt6i_dst.addr = *addr;
2176         rt->rt6i_dst.plen = 128;
2177         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2178
2179         atomic_set(&rt->dst.__refcnt, 1);
2180
2181         return rt;
2182 }
2183
2184 int ip6_route_get_saddr(struct net *net,
2185                         struct rt6_info *rt,
2186                         const struct in6_addr *daddr,
2187                         unsigned int prefs,
2188                         struct in6_addr *saddr)
2189 {
2190         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2191         int err = 0;
2192         if (rt->rt6i_prefsrc.plen)
2193                 *saddr = rt->rt6i_prefsrc.addr;
2194         else
2195                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2196                                          daddr, prefs, saddr);
2197         return err;
2198 }
2199
2200 /* remove deleted ip from prefsrc entries */
2201 struct arg_dev_net_ip {
2202         struct net_device *dev;
2203         struct net *net;
2204         struct in6_addr *addr;
2205 };
2206
2207 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2208 {
2209         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2210         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2211         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2212
2213         if (((void *)rt->dst.dev == dev || !dev) &&
2214             rt != net->ipv6.ip6_null_entry &&
2215             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2216                 /* remove prefsrc entry */
2217                 rt->rt6i_prefsrc.plen = 0;
2218         }
2219         return 0;
2220 }
2221
2222 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2223 {
2224         struct net *net = dev_net(ifp->idev->dev);
2225         struct arg_dev_net_ip adni = {
2226                 .dev = ifp->idev->dev,
2227                 .net = net,
2228                 .addr = &ifp->addr,
2229         };
2230         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2231 }
2232
2233 struct arg_dev_net {
2234         struct net_device *dev;
2235         struct net *net;
2236 };
2237
2238 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2239 {
2240         const struct arg_dev_net *adn = arg;
2241         const struct net_device *dev = adn->dev;
2242
2243         if ((rt->dst.dev == dev || !dev) &&
2244             rt != adn->net->ipv6.ip6_null_entry)
2245                 return -1;
2246
2247         return 0;
2248 }
2249
2250 void rt6_ifdown(struct net *net, struct net_device *dev)
2251 {
2252         struct arg_dev_net adn = {
2253                 .dev = dev,
2254                 .net = net,
2255         };
2256
2257         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2258         icmp6_clean_all(fib6_ifdown, &adn);
2259 }
2260
2261 struct rt6_mtu_change_arg {
2262         struct net_device *dev;
2263         unsigned int mtu;
2264 };
2265
2266 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2267 {
2268         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2269         struct inet6_dev *idev;
2270
2271         /* In IPv6 pmtu discovery is not optional,
2272            so that RTAX_MTU lock cannot disable it.
2273            We still use this lock to block changes
2274            caused by addrconf/ndisc.
2275         */
2276
2277         idev = __in6_dev_get(arg->dev);
2278         if (!idev)
2279                 return 0;
2280
2281         /* For administrative MTU increase, there is no way to discover
2282            IPv6 PMTU increase, so PMTU increase should be updated here.
2283            Since RFC 1981 doesn't include administrative MTU increase
2284            update PMTU increase is a MUST. (i.e. jumbo frame)
2285          */
2286         /*
2287            If new MTU is less than route PMTU, this new MTU will be the
2288            lowest MTU in the path, update the route PMTU to reflect PMTU
2289            decreases; if new MTU is greater than route PMTU, and the
2290            old MTU is the lowest MTU in the path, update the route PMTU
2291            to reflect the increase. In this case if the other nodes' MTU
2292            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2293            PMTU discouvery.
2294          */
2295         if (rt->dst.dev == arg->dev &&
2296             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2297             (dst_mtu(&rt->dst) >= arg->mtu ||
2298              (dst_mtu(&rt->dst) < arg->mtu &&
2299               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2300                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2301         }
2302         return 0;
2303 }
2304
2305 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2306 {
2307         struct rt6_mtu_change_arg arg = {
2308                 .dev = dev,
2309                 .mtu = mtu,
2310         };
2311
2312         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2313 }
2314
2315 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2316         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2317         [RTA_OIF]               = { .type = NLA_U32 },
2318         [RTA_IIF]               = { .type = NLA_U32 },
2319         [RTA_PRIORITY]          = { .type = NLA_U32 },
2320         [RTA_METRICS]           = { .type = NLA_NESTED },
2321         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2322 };
2323
2324 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2325                               struct fib6_config *cfg)
2326 {
2327         struct rtmsg *rtm;
2328         struct nlattr *tb[RTA_MAX+1];
2329         int err;
2330
2331         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2332         if (err < 0)
2333                 goto errout;
2334
2335         err = -EINVAL;
2336         rtm = nlmsg_data(nlh);
2337         memset(cfg, 0, sizeof(*cfg));
2338
2339         cfg->fc_table = rtm->rtm_table;
2340         cfg->fc_dst_len = rtm->rtm_dst_len;
2341         cfg->fc_src_len = rtm->rtm_src_len;
2342         cfg->fc_flags = RTF_UP;
2343         cfg->fc_protocol = rtm->rtm_protocol;
2344         cfg->fc_type = rtm->rtm_type;
2345
2346         if (rtm->rtm_type == RTN_UNREACHABLE ||
2347             rtm->rtm_type == RTN_BLACKHOLE ||
2348             rtm->rtm_type == RTN_PROHIBIT ||
2349             rtm->rtm_type == RTN_THROW)
2350                 cfg->fc_flags |= RTF_REJECT;
2351
2352         if (rtm->rtm_type == RTN_LOCAL)
2353                 cfg->fc_flags |= RTF_LOCAL;
2354
2355         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2356         cfg->fc_nlinfo.nlh = nlh;
2357         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2358
2359         if (tb[RTA_GATEWAY]) {
2360                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2361                 cfg->fc_flags |= RTF_GATEWAY;
2362         }
2363
2364         if (tb[RTA_DST]) {
2365                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2366
2367                 if (nla_len(tb[RTA_DST]) < plen)
2368                         goto errout;
2369
2370                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2371         }
2372
2373         if (tb[RTA_SRC]) {
2374                 int plen = (rtm->rtm_src_len + 7) >> 3;
2375
2376                 if (nla_len(tb[RTA_SRC]) < plen)
2377                         goto errout;
2378
2379                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2380         }
2381
2382         if (tb[RTA_PREFSRC])
2383                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2384
2385         if (tb[RTA_OIF])
2386                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2387
2388         if (tb[RTA_PRIORITY])
2389                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2390
2391         if (tb[RTA_METRICS]) {
2392                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2393                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2394         }
2395
2396         if (tb[RTA_TABLE])
2397                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2398
2399         if (tb[RTA_MULTIPATH]) {
2400                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2401                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2402         }
2403
2404         err = 0;
2405 errout:
2406         return err;
2407 }
2408
2409 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2410 {
2411         struct fib6_config r_cfg;
2412         struct rtnexthop *rtnh;
2413         int remaining;
2414         int attrlen;
2415         int err = 0, last_err = 0;
2416
2417 beginning:
2418         rtnh = (struct rtnexthop *)cfg->fc_mp;
2419         remaining = cfg->fc_mp_len;
2420
2421         /* Parse a Multipath Entry */
2422         while (rtnh_ok(rtnh, remaining)) {
2423                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2424                 if (rtnh->rtnh_ifindex)
2425                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2426
2427                 attrlen = rtnh_attrlen(rtnh);
2428                 if (attrlen > 0) {
2429                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2430
2431                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2432                         if (nla) {
2433                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2434                                 r_cfg.fc_flags |= RTF_GATEWAY;
2435                         }
2436                 }
2437                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2438                 if (err) {
2439                         last_err = err;
2440                         /* If we are trying to remove a route, do not stop the
2441                          * loop when ip6_route_del() fails (because next hop is
2442                          * already gone), we should try to remove all next hops.
2443                          */
2444                         if (add) {
2445                                 /* If add fails, we should try to delete all
2446                                  * next hops that have been already added.
2447                                  */
2448                                 add = 0;
2449                                 goto beginning;
2450                         }
2451                 }
2452                 /* Because each route is added like a single route we remove
2453                  * this flag after the first nexthop (if there is a collision,
2454                  * we have already fail to add the first nexthop:
2455                  * fib6_add_rt2node() has reject it).
2456                  */
2457                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2458                 rtnh = rtnh_next(rtnh, &remaining);
2459         }
2460
2461         return last_err;
2462 }
2463
2464 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2465 {
2466         struct fib6_config cfg;
2467         int err;
2468
2469         err = rtm_to_fib6_config(skb, nlh, &cfg);
2470         if (err < 0)
2471                 return err;
2472
2473         if (cfg.fc_mp)
2474                 return ip6_route_multipath(&cfg, 0);
2475         else
2476                 return ip6_route_del(&cfg);
2477 }
2478
2479 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2480 {
2481         struct fib6_config cfg;
2482         int err;
2483
2484         err = rtm_to_fib6_config(skb, nlh, &cfg);
2485         if (err < 0)
2486                 return err;
2487
2488         if (cfg.fc_mp)
2489                 return ip6_route_multipath(&cfg, 1);
2490         else
2491                 return ip6_route_add(&cfg);
2492 }
2493
2494 static inline size_t rt6_nlmsg_size(void)
2495 {
2496         return NLMSG_ALIGN(sizeof(struct rtmsg))
2497                + nla_total_size(16) /* RTA_SRC */
2498                + nla_total_size(16) /* RTA_DST */
2499                + nla_total_size(16) /* RTA_GATEWAY */
2500                + nla_total_size(16) /* RTA_PREFSRC */
2501                + nla_total_size(4) /* RTA_TABLE */
2502                + nla_total_size(4) /* RTA_IIF */
2503                + nla_total_size(4) /* RTA_OIF */
2504                + nla_total_size(4) /* RTA_PRIORITY */
2505                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2506                + nla_total_size(sizeof(struct rta_cacheinfo));
2507 }
2508
2509 static int rt6_fill_node(struct net *net,
2510                          struct sk_buff *skb, struct rt6_info *rt,
2511                          struct in6_addr *dst, struct in6_addr *src,
2512                          int iif, int type, u32 portid, u32 seq,
2513                          int prefix, int nowait, unsigned int flags)
2514 {
2515         struct rtmsg *rtm;
2516         struct nlmsghdr *nlh;
2517         long expires;
2518         u32 table;
2519         struct neighbour *n;
2520
2521         if (prefix) {   /* user wants prefix routes only */
2522                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2523                         /* success since this is not a prefix route */
2524                         return 1;
2525                 }
2526         }
2527
2528         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2529         if (!nlh)
2530                 return -EMSGSIZE;
2531
2532         rtm = nlmsg_data(nlh);
2533         rtm->rtm_family = AF_INET6;
2534         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2535         rtm->rtm_src_len = rt->rt6i_src.plen;
2536         rtm->rtm_tos = 0;
2537         if (rt->rt6i_table)
2538                 table = rt->rt6i_table->tb6_id;
2539         else
2540                 table = RT6_TABLE_UNSPEC;
2541         rtm->rtm_table = table;
2542         if (nla_put_u32(skb, RTA_TABLE, table))
2543                 goto nla_put_failure;
2544         if (rt->rt6i_flags & RTF_REJECT) {
2545                 switch (rt->dst.error) {
2546                 case -EINVAL:
2547                         rtm->rtm_type = RTN_BLACKHOLE;
2548                         break;
2549                 case -EACCES:
2550                         rtm->rtm_type = RTN_PROHIBIT;
2551                         break;
2552                 case -EAGAIN:
2553                         rtm->rtm_type = RTN_THROW;
2554                         break;
2555                 default:
2556                         rtm->rtm_type = RTN_UNREACHABLE;
2557                         break;
2558                 }
2559         }
2560         else if (rt->rt6i_flags & RTF_LOCAL)
2561                 rtm->rtm_type = RTN_LOCAL;
2562         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2563                 rtm->rtm_type = RTN_LOCAL;
2564         else
2565                 rtm->rtm_type = RTN_UNICAST;
2566         rtm->rtm_flags = 0;
2567         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2568         rtm->rtm_protocol = rt->rt6i_protocol;
2569         if (rt->rt6i_flags & RTF_DYNAMIC)
2570                 rtm->rtm_protocol = RTPROT_REDIRECT;
2571         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2572                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2573                         rtm->rtm_protocol = RTPROT_RA;
2574                 else
2575                         rtm->rtm_protocol = RTPROT_KERNEL;
2576         }
2577
2578         if (rt->rt6i_flags & RTF_CACHE)
2579                 rtm->rtm_flags |= RTM_F_CLONED;
2580
2581         if (dst) {
2582                 if (nla_put(skb, RTA_DST, 16, dst))
2583                         goto nla_put_failure;
2584                 rtm->rtm_dst_len = 128;
2585         } else if (rtm->rtm_dst_len)
2586                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2587                         goto nla_put_failure;
2588 #ifdef CONFIG_IPV6_SUBTREES
2589         if (src) {
2590                 if (nla_put(skb, RTA_SRC, 16, src))
2591                         goto nla_put_failure;
2592                 rtm->rtm_src_len = 128;
2593         } else if (rtm->rtm_src_len &&
2594                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2595                 goto nla_put_failure;
2596 #endif
2597         if (iif) {
2598 #ifdef CONFIG_IPV6_MROUTE
2599                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2600                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2601                         if (err <= 0) {
2602                                 if (!nowait) {
2603                                         if (err == 0)
2604                                                 return 0;
2605                                         goto nla_put_failure;
2606                                 } else {
2607                                         if (err == -EMSGSIZE)
2608                                                 goto nla_put_failure;
2609                                 }
2610                         }
2611                 } else
2612 #endif
2613                         if (nla_put_u32(skb, RTA_IIF, iif))
2614                                 goto nla_put_failure;
2615         } else if (dst) {
2616                 struct in6_addr saddr_buf;
2617                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2618                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2619                         goto nla_put_failure;
2620         }
2621
2622         if (rt->rt6i_prefsrc.plen) {
2623                 struct in6_addr saddr_buf;
2624                 saddr_buf = rt->rt6i_prefsrc.addr;
2625                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2626                         goto nla_put_failure;
2627         }
2628
2629         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2630                 goto nla_put_failure;
2631
2632         n = rt->n;
2633         if (n) {
2634                 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2635                         goto nla_put_failure;
2636         }
2637
2638         if (rt->dst.dev &&
2639             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2640                 goto nla_put_failure;
2641         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2642                 goto nla_put_failure;
2643
2644         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2645
2646         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2647                 goto nla_put_failure;
2648
2649         return nlmsg_end(skb, nlh);
2650
2651 nla_put_failure:
2652         nlmsg_cancel(skb, nlh);
2653         return -EMSGSIZE;
2654 }
2655
2656 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2657 {
2658         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2659         int prefix;
2660
2661         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2662                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2663                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2664         } else
2665                 prefix = 0;
2666
2667         return rt6_fill_node(arg->net,
2668                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2669                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2670                      prefix, 0, NLM_F_MULTI);
2671 }
2672
2673 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2674 {
2675         struct net *net = sock_net(in_skb->sk);
2676         struct nlattr *tb[RTA_MAX+1];
2677         struct rt6_info *rt;
2678         struct sk_buff *skb;
2679         struct rtmsg *rtm;
2680         struct flowi6 fl6;
2681         int err, iif = 0, oif = 0;
2682
2683         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2684         if (err < 0)
2685                 goto errout;
2686
2687         err = -EINVAL;
2688         memset(&fl6, 0, sizeof(fl6));
2689
2690         if (tb[RTA_SRC]) {
2691                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2692                         goto errout;
2693
2694                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2695         }
2696
2697         if (tb[RTA_DST]) {
2698                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2699                         goto errout;
2700
2701                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2702         }
2703
2704         if (tb[RTA_IIF])
2705                 iif = nla_get_u32(tb[RTA_IIF]);
2706
2707         if (tb[RTA_OIF])
2708                 oif = nla_get_u32(tb[RTA_OIF]);
2709
2710         if (iif) {
2711                 struct net_device *dev;
2712                 int flags = 0;
2713
2714                 dev = __dev_get_by_index(net, iif);
2715                 if (!dev) {
2716                         err = -ENODEV;
2717                         goto errout;
2718                 }
2719
2720                 fl6.flowi6_iif = iif;
2721
2722                 if (!ipv6_addr_any(&fl6.saddr))
2723                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2724
2725                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2726                                                                flags);
2727         } else {
2728                 fl6.flowi6_oif = oif;
2729
2730                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2731         }
2732
2733         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2734         if (!skb) {
2735                 dst_release(&rt->dst);
2736                 err = -ENOBUFS;
2737                 goto errout;
2738         }
2739
2740         /* Reserve room for dummy headers, this skb can pass
2741            through good chunk of routing engine.
2742          */
2743         skb_reset_mac_header(skb);
2744         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2745
2746         skb_dst_set(skb, &rt->dst);
2747
2748         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2749                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2750                             nlh->nlmsg_seq, 0, 0, 0);
2751         if (err < 0) {
2752                 kfree_skb(skb);
2753                 goto errout;
2754         }
2755
2756         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2757 errout:
2758         return err;
2759 }
2760
2761 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2762 {
2763         struct sk_buff *skb;
2764         struct net *net = info->nl_net;
2765         u32 seq;
2766         int err;
2767
2768         err = -ENOBUFS;
2769         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2770
2771         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2772         if (!skb)
2773                 goto errout;
2774
2775         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2776                                 event, info->portid, seq, 0, 0, 0);
2777         if (err < 0) {
2778                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2779                 WARN_ON(err == -EMSGSIZE);
2780                 kfree_skb(skb);
2781                 goto errout;
2782         }
2783         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2784                     info->nlh, gfp_any());
2785         return;
2786 errout:
2787         if (err < 0)
2788                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2789 }
2790
2791 static int ip6_route_dev_notify(struct notifier_block *this,
2792                                 unsigned long event, void *data)
2793 {
2794         struct net_device *dev = (struct net_device *)data;
2795         struct net *net = dev_net(dev);
2796
2797         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2798                 net->ipv6.ip6_null_entry->dst.dev = dev;
2799                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2800 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2801                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2802                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2803                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2804                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2805 #endif
2806         }
2807
2808         return NOTIFY_OK;
2809 }
2810
2811 /*
2812  *      /proc
2813  */
2814
2815 #ifdef CONFIG_PROC_FS
2816
2817 struct rt6_proc_arg
2818 {
2819         char *buffer;
2820         int offset;
2821         int length;
2822         int skip;
2823         int len;
2824 };
2825
2826 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2827 {
2828         struct seq_file *m = p_arg;
2829         struct neighbour *n;
2830
2831         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2832
2833 #ifdef CONFIG_IPV6_SUBTREES
2834         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2835 #else
2836         seq_puts(m, "00000000000000000000000000000000 00 ");
2837 #endif
2838         n = rt->n;
2839         if (n) {
2840                 seq_printf(m, "%pi6", n->primary_key);
2841         } else {
2842                 seq_puts(m, "00000000000000000000000000000000");
2843         }
2844         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2845                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2846                    rt->dst.__use, rt->rt6i_flags,
2847                    rt->dst.dev ? rt->dst.dev->name : "");
2848         return 0;
2849 }
2850
2851 static int ipv6_route_show(struct seq_file *m, void *v)
2852 {
2853         struct net *net = (struct net *)m->private;
2854         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2855         return 0;
2856 }
2857
2858 static int ipv6_route_open(struct inode *inode, struct file *file)
2859 {
2860         return single_open_net(inode, file, ipv6_route_show);
2861 }
2862
2863 static const struct file_operations ipv6_route_proc_fops = {
2864         .owner          = THIS_MODULE,
2865         .open           = ipv6_route_open,
2866         .read           = seq_read,
2867         .llseek         = seq_lseek,
2868         .release        = single_release_net,
2869 };
2870
2871 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2872 {
2873         struct net *net = (struct net *)seq->private;
2874         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2875                    net->ipv6.rt6_stats->fib_nodes,
2876                    net->ipv6.rt6_stats->fib_route_nodes,
2877                    net->ipv6.rt6_stats->fib_rt_alloc,
2878                    net->ipv6.rt6_stats->fib_rt_entries,
2879                    net->ipv6.rt6_stats->fib_rt_cache,
2880                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2881                    net->ipv6.rt6_stats->fib_discarded_routes);
2882
2883         return 0;
2884 }
2885
2886 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2887 {
2888         return single_open_net(inode, file, rt6_stats_seq_show);
2889 }
2890
2891 static const struct file_operations rt6_stats_seq_fops = {
2892         .owner   = THIS_MODULE,
2893         .open    = rt6_stats_seq_open,
2894         .read    = seq_read,
2895         .llseek  = seq_lseek,
2896         .release = single_release_net,
2897 };
2898 #endif  /* CONFIG_PROC_FS */
2899
2900 #ifdef CONFIG_SYSCTL
2901
2902 static
2903 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2904                               void __user *buffer, size_t *lenp, loff_t *ppos)
2905 {
2906         struct net *net;
2907         int delay;
2908         if (!write)
2909                 return -EINVAL;
2910
2911         net = (struct net *)ctl->extra1;
2912         delay = net->ipv6.sysctl.flush_delay;
2913         proc_dointvec(ctl, write, buffer, lenp, ppos);
2914         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2915         return 0;
2916 }
2917
2918 ctl_table ipv6_route_table_template[] = {
2919         {
2920                 .procname       =       "flush",
2921                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2922                 .maxlen         =       sizeof(int),
2923                 .mode           =       0200,
2924                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2925         },
2926         {
2927                 .procname       =       "gc_thresh",
2928                 .data           =       &ip6_dst_ops_template.gc_thresh,
2929                 .maxlen         =       sizeof(int),
2930                 .mode           =       0644,
2931                 .proc_handler   =       proc_dointvec,
2932         },
2933         {
2934                 .procname       =       "max_size",
2935                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2936                 .maxlen         =       sizeof(int),
2937                 .mode           =       0644,
2938                 .proc_handler   =       proc_dointvec,
2939         },
2940         {
2941                 .procname       =       "gc_min_interval",
2942                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2943                 .maxlen         =       sizeof(int),
2944                 .mode           =       0644,
2945                 .proc_handler   =       proc_dointvec_jiffies,
2946         },
2947         {
2948                 .procname       =       "gc_timeout",
2949                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2950                 .maxlen         =       sizeof(int),
2951                 .mode           =       0644,
2952                 .proc_handler   =       proc_dointvec_jiffies,
2953         },
2954         {
2955                 .procname       =       "gc_interval",
2956                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2957                 .maxlen         =       sizeof(int),
2958                 .mode           =       0644,
2959                 .proc_handler   =       proc_dointvec_jiffies,
2960         },
2961         {
2962                 .procname       =       "gc_elasticity",
2963                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2964                 .maxlen         =       sizeof(int),
2965                 .mode           =       0644,
2966                 .proc_handler   =       proc_dointvec,
2967         },
2968         {
2969                 .procname       =       "mtu_expires",
2970                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2971                 .maxlen         =       sizeof(int),
2972                 .mode           =       0644,
2973                 .proc_handler   =       proc_dointvec_jiffies,
2974         },
2975         {
2976                 .procname       =       "min_adv_mss",
2977                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2978                 .maxlen         =       sizeof(int),
2979                 .mode           =       0644,
2980                 .proc_handler   =       proc_dointvec,
2981         },
2982         {
2983                 .procname       =       "gc_min_interval_ms",
2984                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2985                 .maxlen         =       sizeof(int),
2986                 .mode           =       0644,
2987                 .proc_handler   =       proc_dointvec_ms_jiffies,
2988         },
2989         { }
2990 };
2991
2992 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2993 {
2994         struct ctl_table *table;
2995
2996         table = kmemdup(ipv6_route_table_template,
2997                         sizeof(ipv6_route_table_template),
2998                         GFP_KERNEL);
2999
3000         if (table) {
3001                 table[0].data = &net->ipv6.sysctl.flush_delay;
3002                 table[0].extra1 = net;
3003                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3004                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3005                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3006                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3007                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3008                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3009                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3010                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3011                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3012         }
3013
3014         return table;
3015 }
3016 #endif
3017
3018 static int __net_init ip6_route_net_init(struct net *net)
3019 {
3020         int ret = -ENOMEM;
3021
3022         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3023                sizeof(net->ipv6.ip6_dst_ops));
3024
3025         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3026                 goto out_ip6_dst_ops;
3027
3028         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3029                                            sizeof(*net->ipv6.ip6_null_entry),
3030                                            GFP_KERNEL);
3031         if (!net->ipv6.ip6_null_entry)
3032                 goto out_ip6_dst_entries;
3033         net->ipv6.ip6_null_entry->dst.path =
3034                 (struct dst_entry *)net->ipv6.ip6_null_entry;
3035         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3036         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3037                          ip6_template_metrics, true);
3038
3039 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3040         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3041                                                sizeof(*net->ipv6.ip6_prohibit_entry),
3042                                                GFP_KERNEL);
3043         if (!net->ipv6.ip6_prohibit_entry)
3044                 goto out_ip6_null_entry;
3045         net->ipv6.ip6_prohibit_entry->dst.path =
3046                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3047         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3048         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3049                          ip6_template_metrics, true);
3050
3051         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3052                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
3053                                                GFP_KERNEL);
3054         if (!net->ipv6.ip6_blk_hole_entry)
3055                 goto out_ip6_prohibit_entry;
3056         net->ipv6.ip6_blk_hole_entry->dst.path =
3057                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3058         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3059         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3060                          ip6_template_metrics, true);
3061 #endif
3062
3063         net->ipv6.sysctl.flush_delay = 0;
3064         net->ipv6.sysctl.ip6_rt_max_size = 4096;
3065         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3066         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3067         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3068         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3069         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3070         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3071
3072         net->ipv6.ip6_rt_gc_expire = 30*HZ;
3073
3074         ret = 0;
3075 out:
3076         return ret;
3077
3078 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3079 out_ip6_prohibit_entry:
3080         kfree(net->ipv6.ip6_prohibit_entry);
3081 out_ip6_null_entry:
3082         kfree(net->ipv6.ip6_null_entry);
3083 #endif
3084 out_ip6_dst_entries:
3085         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3086 out_ip6_dst_ops:
3087         goto out;
3088 }
3089
3090 static void __net_exit ip6_route_net_exit(struct net *net)
3091 {
3092         kfree(net->ipv6.ip6_null_entry);
3093 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3094         kfree(net->ipv6.ip6_prohibit_entry);
3095         kfree(net->ipv6.ip6_blk_hole_entry);
3096 #endif
3097         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3098 }
3099
3100 static int __net_init ip6_route_net_init_late(struct net *net)
3101 {
3102 #ifdef CONFIG_PROC_FS
3103         proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
3104         proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
3105 #endif
3106         return 0;
3107 }
3108
3109 static void __net_exit ip6_route_net_exit_late(struct net *net)
3110 {
3111 #ifdef CONFIG_PROC_FS
3112         proc_net_remove(net, "ipv6_route");
3113         proc_net_remove(net, "rt6_stats");
3114 #endif
3115 }
3116
3117 static struct pernet_operations ip6_route_net_ops = {
3118         .init = ip6_route_net_init,
3119         .exit = ip6_route_net_exit,
3120 };
3121
3122 static int __net_init ipv6_inetpeer_init(struct net *net)
3123 {
3124         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3125
3126         if (!bp)
3127                 return -ENOMEM;
3128         inet_peer_base_init(bp);
3129         net->ipv6.peers = bp;
3130         return 0;
3131 }
3132
3133 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3134 {
3135         struct inet_peer_base *bp = net->ipv6.peers;
3136
3137         net->ipv6.peers = NULL;
3138         inetpeer_invalidate_tree(bp);
3139         kfree(bp);
3140 }
3141
3142 static struct pernet_operations ipv6_inetpeer_ops = {
3143         .init   =       ipv6_inetpeer_init,
3144         .exit   =       ipv6_inetpeer_exit,
3145 };
3146
3147 static struct pernet_operations ip6_route_net_late_ops = {
3148         .init = ip6_route_net_init_late,
3149         .exit = ip6_route_net_exit_late,
3150 };
3151
3152 static struct notifier_block ip6_route_dev_notifier = {
3153         .notifier_call = ip6_route_dev_notify,
3154         .priority = 0,
3155 };
3156
3157 int __init ip6_route_init(void)
3158 {
3159         int ret;
3160
3161         ret = -ENOMEM;
3162         ip6_dst_ops_template.kmem_cachep =
3163                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3164                                   SLAB_HWCACHE_ALIGN, NULL);
3165         if (!ip6_dst_ops_template.kmem_cachep)
3166                 goto out;
3167
3168         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3169         if (ret)
3170                 goto out_kmem_cache;
3171
3172         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3173         if (ret)
3174                 goto out_dst_entries;
3175
3176         ret = register_pernet_subsys(&ip6_route_net_ops);
3177         if (ret)
3178                 goto out_register_inetpeer;
3179
3180         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3181
3182         /* Registering of the loopback is done before this portion of code,
3183          * the loopback reference in rt6_info will not be taken, do it
3184          * manually for init_net */
3185         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3186         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3187   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3188         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3189         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3190         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3191         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3192   #endif
3193         ret = fib6_init();
3194         if (ret)
3195                 goto out_register_subsys;
3196
3197         ret = xfrm6_init();
3198         if (ret)
3199                 goto out_fib6_init;
3200
3201         ret = fib6_rules_init();
3202         if (ret)
3203                 goto xfrm6_init;
3204
3205         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3206         if (ret)
3207                 goto fib6_rules_init;
3208
3209         ret = -ENOBUFS;
3210         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3211             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3212             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3213                 goto out_register_late_subsys;
3214
3215         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3216         if (ret)
3217                 goto out_register_late_subsys;
3218
3219 out:
3220         return ret;
3221
3222 out_register_late_subsys:
3223         unregister_pernet_subsys(&ip6_route_net_late_ops);
3224 fib6_rules_init:
3225         fib6_rules_cleanup();
3226 xfrm6_init:
3227         xfrm6_fini();
3228 out_fib6_init:
3229         fib6_gc_cleanup();
3230 out_register_subsys:
3231         unregister_pernet_subsys(&ip6_route_net_ops);
3232 out_register_inetpeer:
3233         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3234 out_dst_entries:
3235         dst_entries_destroy(&ip6_dst_blackhole_ops);
3236 out_kmem_cache:
3237         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3238         goto out;
3239 }
3240
3241 void ip6_route_cleanup(void)
3242 {
3243         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3244         unregister_pernet_subsys(&ip6_route_net_late_ops);
3245         fib6_rules_cleanup();
3246         xfrm6_fini();
3247         fib6_gc_cleanup();
3248         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3249         unregister_pernet_subsys(&ip6_route_net_ops);
3250         dst_entries_destroy(&ip6_dst_blackhole_ops);
3251         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3252 }