]> Pileus Git - ~andy/linux/blob - net/ipv6/route.c
regulator: lp8788: Implement list_voltage for lp8788_ldo_voltage_fixed_ops
[~andy/linux] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13
14 /*      Changes:
15  *
16  *      YOSHIFUJI Hideaki @USAGI
17  *              reworked default router selection.
18  *              - respect outgoing interface
19  *              - select from (probably) reachable routers (i.e.
20  *              routers in REACHABLE, STALE, DELAY or PROBE states).
21  *              - always select the same router if it is (probably)
22  *              reachable.  otherwise, round-robin the list.
23  *      Ville Nuorvala
24  *              Fixed routing subtrees.
25  */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
69                                     const struct in6_addr *dest);
70 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
71 static unsigned int      ip6_default_advmss(const struct dst_entry *dst);
72 static unsigned int      ip6_mtu(const struct dst_entry *dst);
73 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
74 static void             ip6_dst_destroy(struct dst_entry *);
75 static void             ip6_dst_ifdown(struct dst_entry *,
76                                        struct net_device *dev, int how);
77 static int               ip6_dst_gc(struct dst_ops *ops);
78
79 static int              ip6_pkt_discard(struct sk_buff *skb);
80 static int              ip6_pkt_discard_out(struct sk_buff *skb);
81 static void             ip6_link_failure(struct sk_buff *skb);
82 static void             ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
83                                            struct sk_buff *skb, u32 mtu);
84 static void             rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
85                                         struct sk_buff *skb);
86
87 #ifdef CONFIG_IPV6_ROUTE_INFO
88 static struct rt6_info *rt6_add_route_info(struct net *net,
89                                            const struct in6_addr *prefix, int prefixlen,
90                                            const struct in6_addr *gwaddr, int ifindex,
91                                            unsigned int pref);
92 static struct rt6_info *rt6_get_route_info(struct net *net,
93                                            const struct in6_addr *prefix, int prefixlen,
94                                            const struct in6_addr *gwaddr, int ifindex);
95 #endif
96
97 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
98 {
99         struct rt6_info *rt = (struct rt6_info *) dst;
100         struct inet_peer *peer;
101         u32 *p = NULL;
102
103         if (!(rt->dst.flags & DST_HOST))
104                 return NULL;
105
106         peer = rt6_get_peer_create(rt);
107         if (peer) {
108                 u32 *old_p = __DST_METRICS_PTR(old);
109                 unsigned long prev, new;
110
111                 p = peer->metrics;
112                 if (inet_metrics_new(peer))
113                         memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
114
115                 new = (unsigned long) p;
116                 prev = cmpxchg(&dst->_metrics, old, new);
117
118                 if (prev != old) {
119                         p = __DST_METRICS_PTR(prev);
120                         if (prev & DST_METRICS_READ_ONLY)
121                                 p = NULL;
122                 }
123         }
124         return p;
125 }
126
127 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
128                                              struct sk_buff *skb,
129                                              const void *daddr)
130 {
131         struct in6_addr *p = &rt->rt6i_gateway;
132
133         if (!ipv6_addr_any(p))
134                 return (const void *) p;
135         else if (skb)
136                 return &ipv6_hdr(skb)->daddr;
137         return daddr;
138 }
139
140 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
141                                           struct sk_buff *skb,
142                                           const void *daddr)
143 {
144         struct rt6_info *rt = (struct rt6_info *) dst;
145         struct neighbour *n;
146
147         daddr = choose_neigh_daddr(rt, skb, daddr);
148         n = __ipv6_neigh_lookup(dst->dev, daddr);
149         if (n)
150                 return n;
151         return neigh_create(&nd_tbl, daddr, dst->dev);
152 }
153
154 static struct dst_ops ip6_dst_ops_template = {
155         .family                 =       AF_INET6,
156         .protocol               =       cpu_to_be16(ETH_P_IPV6),
157         .gc                     =       ip6_dst_gc,
158         .gc_thresh              =       1024,
159         .check                  =       ip6_dst_check,
160         .default_advmss         =       ip6_default_advmss,
161         .mtu                    =       ip6_mtu,
162         .cow_metrics            =       ipv6_cow_metrics,
163         .destroy                =       ip6_dst_destroy,
164         .ifdown                 =       ip6_dst_ifdown,
165         .negative_advice        =       ip6_negative_advice,
166         .link_failure           =       ip6_link_failure,
167         .update_pmtu            =       ip6_rt_update_pmtu,
168         .redirect               =       rt6_do_redirect,
169         .local_out              =       __ip6_local_out,
170         .neigh_lookup           =       ip6_neigh_lookup,
171 };
172
173 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
174 {
175         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
176
177         return mtu ? : dst->dev->mtu;
178 }
179
180 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
181                                          struct sk_buff *skb, u32 mtu)
182 {
183 }
184
185 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
186                                       struct sk_buff *skb)
187 {
188 }
189
190 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
191                                          unsigned long old)
192 {
193         return NULL;
194 }
195
196 static struct dst_ops ip6_dst_blackhole_ops = {
197         .family                 =       AF_INET6,
198         .protocol               =       cpu_to_be16(ETH_P_IPV6),
199         .destroy                =       ip6_dst_destroy,
200         .check                  =       ip6_dst_check,
201         .mtu                    =       ip6_blackhole_mtu,
202         .default_advmss         =       ip6_default_advmss,
203         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
204         .redirect               =       ip6_rt_blackhole_redirect,
205         .cow_metrics            =       ip6_rt_blackhole_cow_metrics,
206         .neigh_lookup           =       ip6_neigh_lookup,
207 };
208
209 static const u32 ip6_template_metrics[RTAX_MAX] = {
210         [RTAX_HOPLIMIT - 1] = 0,
211 };
212
213 static const struct rt6_info ip6_null_entry_template = {
214         .dst = {
215                 .__refcnt       = ATOMIC_INIT(1),
216                 .__use          = 1,
217                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
218                 .error          = -ENETUNREACH,
219                 .input          = ip6_pkt_discard,
220                 .output         = ip6_pkt_discard_out,
221         },
222         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
223         .rt6i_protocol  = RTPROT_KERNEL,
224         .rt6i_metric    = ~(u32) 0,
225         .rt6i_ref       = ATOMIC_INIT(1),
226 };
227
228 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
229
230 static int ip6_pkt_prohibit(struct sk_buff *skb);
231 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
232
233 static const struct rt6_info ip6_prohibit_entry_template = {
234         .dst = {
235                 .__refcnt       = ATOMIC_INIT(1),
236                 .__use          = 1,
237                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
238                 .error          = -EACCES,
239                 .input          = ip6_pkt_prohibit,
240                 .output         = ip6_pkt_prohibit_out,
241         },
242         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
243         .rt6i_protocol  = RTPROT_KERNEL,
244         .rt6i_metric    = ~(u32) 0,
245         .rt6i_ref       = ATOMIC_INIT(1),
246 };
247
248 static const struct rt6_info ip6_blk_hole_entry_template = {
249         .dst = {
250                 .__refcnt       = ATOMIC_INIT(1),
251                 .__use          = 1,
252                 .obsolete       = DST_OBSOLETE_FORCE_CHK,
253                 .error          = -EINVAL,
254                 .input          = dst_discard,
255                 .output         = dst_discard,
256         },
257         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
258         .rt6i_protocol  = RTPROT_KERNEL,
259         .rt6i_metric    = ~(u32) 0,
260         .rt6i_ref       = ATOMIC_INIT(1),
261 };
262
263 #endif
264
265 /* allocate dst with ip6_dst_ops */
266 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
267                                              struct net_device *dev,
268                                              int flags,
269                                              struct fib6_table *table)
270 {
271         struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
272                                         0, DST_OBSOLETE_FORCE_CHK, flags);
273
274         if (rt) {
275                 struct dst_entry *dst = &rt->dst;
276
277                 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
278                 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279                 rt->rt6i_genid = rt_genid(net);
280                 INIT_LIST_HEAD(&rt->rt6i_siblings);
281                 rt->rt6i_nsiblings = 0;
282         }
283         return rt;
284 }
285
286 static void ip6_dst_destroy(struct dst_entry *dst)
287 {
288         struct rt6_info *rt = (struct rt6_info *)dst;
289         struct inet6_dev *idev = rt->rt6i_idev;
290         struct dst_entry *from = dst->from;
291
292         if (!(rt->dst.flags & DST_HOST))
293                 dst_destroy_metrics_generic(dst);
294
295         if (idev) {
296                 rt->rt6i_idev = NULL;
297                 in6_dev_put(idev);
298         }
299
300         dst->from = NULL;
301         dst_release(from);
302
303         if (rt6_has_peer(rt)) {
304                 struct inet_peer *peer = rt6_peer_ptr(rt);
305                 inet_putpeer(peer);
306         }
307 }
308
309 void rt6_bind_peer(struct rt6_info *rt, int create)
310 {
311         struct inet_peer_base *base;
312         struct inet_peer *peer;
313
314         base = inetpeer_base_ptr(rt->_rt6i_peer);
315         if (!base)
316                 return;
317
318         peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
319         if (peer) {
320                 if (!rt6_set_peer(rt, peer))
321                         inet_putpeer(peer);
322         }
323 }
324
325 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
326                            int how)
327 {
328         struct rt6_info *rt = (struct rt6_info *)dst;
329         struct inet6_dev *idev = rt->rt6i_idev;
330         struct net_device *loopback_dev =
331                 dev_net(dev)->loopback_dev;
332
333         if (dev != loopback_dev) {
334                 if (idev && idev->dev == dev) {
335                         struct inet6_dev *loopback_idev =
336                                 in6_dev_get(loopback_dev);
337                         if (loopback_idev) {
338                                 rt->rt6i_idev = loopback_idev;
339                                 in6_dev_put(idev);
340                         }
341                 }
342         }
343 }
344
345 static bool rt6_check_expired(const struct rt6_info *rt)
346 {
347         if (rt->rt6i_flags & RTF_EXPIRES) {
348                 if (time_after(jiffies, rt->dst.expires))
349                         return true;
350         } else if (rt->dst.from) {
351                 return rt6_check_expired((struct rt6_info *) rt->dst.from);
352         }
353         return false;
354 }
355
356 static bool rt6_need_strict(const struct in6_addr *daddr)
357 {
358         return ipv6_addr_type(daddr) &
359                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
360 }
361
362 /* Multipath route selection:
363  *   Hash based function using packet header and flowlabel.
364  * Adapted from fib_info_hashfn()
365  */
366 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
367                                const struct flowi6 *fl6)
368 {
369         unsigned int val = fl6->flowi6_proto;
370
371         val ^= ipv6_addr_hash(&fl6->daddr);
372         val ^= ipv6_addr_hash(&fl6->saddr);
373
374         /* Work only if this not encapsulated */
375         switch (fl6->flowi6_proto) {
376         case IPPROTO_UDP:
377         case IPPROTO_TCP:
378         case IPPROTO_SCTP:
379                 val ^= (__force u16)fl6->fl6_sport;
380                 val ^= (__force u16)fl6->fl6_dport;
381                 break;
382
383         case IPPROTO_ICMPV6:
384                 val ^= (__force u16)fl6->fl6_icmp_type;
385                 val ^= (__force u16)fl6->fl6_icmp_code;
386                 break;
387         }
388         /* RFC6438 recommands to use flowlabel */
389         val ^= (__force u32)fl6->flowlabel;
390
391         /* Perhaps, we need to tune, this function? */
392         val = val ^ (val >> 7) ^ (val >> 12);
393         return val % candidate_count;
394 }
395
396 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
397                                              struct flowi6 *fl6)
398 {
399         struct rt6_info *sibling, *next_sibling;
400         int route_choosen;
401
402         route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
403         /* Don't change the route, if route_choosen == 0
404          * (siblings does not include ourself)
405          */
406         if (route_choosen)
407                 list_for_each_entry_safe(sibling, next_sibling,
408                                 &match->rt6i_siblings, rt6i_siblings) {
409                         route_choosen--;
410                         if (route_choosen == 0) {
411                                 match = sibling;
412                                 break;
413                         }
414                 }
415         return match;
416 }
417
418 /*
419  *      Route lookup. Any table->tb6_lock is implied.
420  */
421
422 static inline struct rt6_info *rt6_device_match(struct net *net,
423                                                     struct rt6_info *rt,
424                                                     const struct in6_addr *saddr,
425                                                     int oif,
426                                                     int flags)
427 {
428         struct rt6_info *local = NULL;
429         struct rt6_info *sprt;
430
431         if (!oif && ipv6_addr_any(saddr))
432                 goto out;
433
434         for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
435                 struct net_device *dev = sprt->dst.dev;
436
437                 if (oif) {
438                         if (dev->ifindex == oif)
439                                 return sprt;
440                         if (dev->flags & IFF_LOOPBACK) {
441                                 if (!sprt->rt6i_idev ||
442                                     sprt->rt6i_idev->dev->ifindex != oif) {
443                                         if (flags & RT6_LOOKUP_F_IFACE && oif)
444                                                 continue;
445                                         if (local && (!oif ||
446                                                       local->rt6i_idev->dev->ifindex == oif))
447                                                 continue;
448                                 }
449                                 local = sprt;
450                         }
451                 } else {
452                         if (ipv6_chk_addr(net, saddr, dev,
453                                           flags & RT6_LOOKUP_F_IFACE))
454                                 return sprt;
455                 }
456         }
457
458         if (oif) {
459                 if (local)
460                         return local;
461
462                 if (flags & RT6_LOOKUP_F_IFACE)
463                         return net->ipv6.ip6_null_entry;
464         }
465 out:
466         return rt;
467 }
468
469 #ifdef CONFIG_IPV6_ROUTER_PREF
470 static void rt6_probe(struct rt6_info *rt)
471 {
472         struct neighbour *neigh;
473         /*
474          * Okay, this does not seem to be appropriate
475          * for now, however, we need to check if it
476          * is really so; aka Router Reachability Probing.
477          *
478          * Router Reachability Probe MUST be rate-limited
479          * to no more than one per minute.
480          */
481         if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
482                 return;
483         rcu_read_lock_bh();
484         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
485         if (neigh) {
486                 write_lock(&neigh->lock);
487                 if (neigh->nud_state & NUD_VALID)
488                         goto out;
489         }
490
491         if (!neigh ||
492             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
493                 struct in6_addr mcaddr;
494                 struct in6_addr *target;
495
496                 if (neigh) {
497                         neigh->updated = jiffies;
498                         write_unlock(&neigh->lock);
499                 }
500
501                 target = (struct in6_addr *)&rt->rt6i_gateway;
502                 addrconf_addr_solict_mult(target, &mcaddr);
503                 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
504         } else {
505 out:
506                 write_unlock(&neigh->lock);
507         }
508         rcu_read_unlock_bh();
509 }
510 #else
511 static inline void rt6_probe(struct rt6_info *rt)
512 {
513 }
514 #endif
515
516 /*
517  * Default Router Selection (RFC 2461 6.3.6)
518  */
519 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
520 {
521         struct net_device *dev = rt->dst.dev;
522         if (!oif || dev->ifindex == oif)
523                 return 2;
524         if ((dev->flags & IFF_LOOPBACK) &&
525             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
526                 return 1;
527         return 0;
528 }
529
530 static inline bool rt6_check_neigh(struct rt6_info *rt)
531 {
532         struct neighbour *neigh;
533         bool ret = false;
534
535         if (rt->rt6i_flags & RTF_NONEXTHOP ||
536             !(rt->rt6i_flags & RTF_GATEWAY))
537                 return true;
538
539         rcu_read_lock_bh();
540         neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
541         if (neigh) {
542                 read_lock(&neigh->lock);
543                 if (neigh->nud_state & NUD_VALID)
544                         ret = true;
545 #ifdef CONFIG_IPV6_ROUTER_PREF
546                 else if (!(neigh->nud_state & NUD_FAILED))
547                         ret = true;
548 #endif
549                 read_unlock(&neigh->lock);
550         }
551         rcu_read_unlock_bh();
552
553         return ret;
554 }
555
556 static int rt6_score_route(struct rt6_info *rt, int oif,
557                            int strict)
558 {
559         int m;
560
561         m = rt6_check_dev(rt, oif);
562         if (!m && (strict & RT6_LOOKUP_F_IFACE))
563                 return -1;
564 #ifdef CONFIG_IPV6_ROUTER_PREF
565         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
566 #endif
567         if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
568                 return -1;
569         return m;
570 }
571
572 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
573                                    int *mpri, struct rt6_info *match)
574 {
575         int m;
576
577         if (rt6_check_expired(rt))
578                 goto out;
579
580         m = rt6_score_route(rt, oif, strict);
581         if (m < 0)
582                 goto out;
583
584         if (m > *mpri) {
585                 if (strict & RT6_LOOKUP_F_REACHABLE)
586                         rt6_probe(match);
587                 *mpri = m;
588                 match = rt;
589         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
590                 rt6_probe(rt);
591         }
592
593 out:
594         return match;
595 }
596
597 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
598                                      struct rt6_info *rr_head,
599                                      u32 metric, int oif, int strict)
600 {
601         struct rt6_info *rt, *match;
602         int mpri = -1;
603
604         match = NULL;
605         for (rt = rr_head; rt && rt->rt6i_metric == metric;
606              rt = rt->dst.rt6_next)
607                 match = find_match(rt, oif, strict, &mpri, match);
608         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
609              rt = rt->dst.rt6_next)
610                 match = find_match(rt, oif, strict, &mpri, match);
611
612         return match;
613 }
614
615 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
616 {
617         struct rt6_info *match, *rt0;
618         struct net *net;
619
620         rt0 = fn->rr_ptr;
621         if (!rt0)
622                 fn->rr_ptr = rt0 = fn->leaf;
623
624         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
625
626         if (!match &&
627             (strict & RT6_LOOKUP_F_REACHABLE)) {
628                 struct rt6_info *next = rt0->dst.rt6_next;
629
630                 /* no entries matched; do round-robin */
631                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
632                         next = fn->leaf;
633
634                 if (next != rt0)
635                         fn->rr_ptr = next;
636         }
637
638         net = dev_net(rt0->dst.dev);
639         return match ? match : net->ipv6.ip6_null_entry;
640 }
641
642 #ifdef CONFIG_IPV6_ROUTE_INFO
643 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
644                   const struct in6_addr *gwaddr)
645 {
646         struct net *net = dev_net(dev);
647         struct route_info *rinfo = (struct route_info *) opt;
648         struct in6_addr prefix_buf, *prefix;
649         unsigned int pref;
650         unsigned long lifetime;
651         struct rt6_info *rt;
652
653         if (len < sizeof(struct route_info)) {
654                 return -EINVAL;
655         }
656
657         /* Sanity check for prefix_len and length */
658         if (rinfo->length > 3) {
659                 return -EINVAL;
660         } else if (rinfo->prefix_len > 128) {
661                 return -EINVAL;
662         } else if (rinfo->prefix_len > 64) {
663                 if (rinfo->length < 2) {
664                         return -EINVAL;
665                 }
666         } else if (rinfo->prefix_len > 0) {
667                 if (rinfo->length < 1) {
668                         return -EINVAL;
669                 }
670         }
671
672         pref = rinfo->route_pref;
673         if (pref == ICMPV6_ROUTER_PREF_INVALID)
674                 return -EINVAL;
675
676         lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
677
678         if (rinfo->length == 3)
679                 prefix = (struct in6_addr *)rinfo->prefix;
680         else {
681                 /* this function is safe */
682                 ipv6_addr_prefix(&prefix_buf,
683                                  (struct in6_addr *)rinfo->prefix,
684                                  rinfo->prefix_len);
685                 prefix = &prefix_buf;
686         }
687
688         rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
689                                 dev->ifindex);
690
691         if (rt && !lifetime) {
692                 ip6_del_rt(rt);
693                 rt = NULL;
694         }
695
696         if (!rt && lifetime)
697                 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
698                                         pref);
699         else if (rt)
700                 rt->rt6i_flags = RTF_ROUTEINFO |
701                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
702
703         if (rt) {
704                 if (!addrconf_finite_timeout(lifetime))
705                         rt6_clean_expires(rt);
706                 else
707                         rt6_set_expires(rt, jiffies + HZ * lifetime);
708
709                 ip6_rt_put(rt);
710         }
711         return 0;
712 }
713 #endif
714
715 #define BACKTRACK(__net, saddr)                 \
716 do { \
717         if (rt == __net->ipv6.ip6_null_entry) { \
718                 struct fib6_node *pn; \
719                 while (1) { \
720                         if (fn->fn_flags & RTN_TL_ROOT) \
721                                 goto out; \
722                         pn = fn->parent; \
723                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
724                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
725                         else \
726                                 fn = pn; \
727                         if (fn->fn_flags & RTN_RTINFO) \
728                                 goto restart; \
729                 } \
730         } \
731 } while (0)
732
733 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
734                                              struct fib6_table *table,
735                                              struct flowi6 *fl6, int flags)
736 {
737         struct fib6_node *fn;
738         struct rt6_info *rt;
739
740         read_lock_bh(&table->tb6_lock);
741         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
742 restart:
743         rt = fn->leaf;
744         rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
745         if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
746                 rt = rt6_multipath_select(rt, fl6);
747         BACKTRACK(net, &fl6->saddr);
748 out:
749         dst_use(&rt->dst, jiffies);
750         read_unlock_bh(&table->tb6_lock);
751         return rt;
752
753 }
754
755 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
756                                     int flags)
757 {
758         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
759 }
760 EXPORT_SYMBOL_GPL(ip6_route_lookup);
761
762 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
763                             const struct in6_addr *saddr, int oif, int strict)
764 {
765         struct flowi6 fl6 = {
766                 .flowi6_oif = oif,
767                 .daddr = *daddr,
768         };
769         struct dst_entry *dst;
770         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
771
772         if (saddr) {
773                 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
774                 flags |= RT6_LOOKUP_F_HAS_SADDR;
775         }
776
777         dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
778         if (dst->error == 0)
779                 return (struct rt6_info *) dst;
780
781         dst_release(dst);
782
783         return NULL;
784 }
785
786 EXPORT_SYMBOL(rt6_lookup);
787
788 /* ip6_ins_rt is called with FREE table->tb6_lock.
789    It takes new route entry, the addition fails by any reason the
790    route is freed. In any case, if caller does not hold it, it may
791    be destroyed.
792  */
793
794 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
795 {
796         int err;
797         struct fib6_table *table;
798
799         table = rt->rt6i_table;
800         write_lock_bh(&table->tb6_lock);
801         err = fib6_add(&table->tb6_root, rt, info);
802         write_unlock_bh(&table->tb6_lock);
803
804         return err;
805 }
806
807 int ip6_ins_rt(struct rt6_info *rt)
808 {
809         struct nl_info info = {
810                 .nl_net = dev_net(rt->dst.dev),
811         };
812         return __ip6_ins_rt(rt, &info);
813 }
814
815 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
816                                       const struct in6_addr *daddr,
817                                       const struct in6_addr *saddr)
818 {
819         struct rt6_info *rt;
820
821         /*
822          *      Clone the route.
823          */
824
825         rt = ip6_rt_copy(ort, daddr);
826
827         if (rt) {
828                 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
829                         if (ort->rt6i_dst.plen != 128 &&
830                             ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
831                                 rt->rt6i_flags |= RTF_ANYCAST;
832                         rt->rt6i_gateway = *daddr;
833                 }
834
835                 rt->rt6i_flags |= RTF_CACHE;
836
837 #ifdef CONFIG_IPV6_SUBTREES
838                 if (rt->rt6i_src.plen && saddr) {
839                         rt->rt6i_src.addr = *saddr;
840                         rt->rt6i_src.plen = 128;
841                 }
842 #endif
843         }
844
845         return rt;
846 }
847
848 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
849                                         const struct in6_addr *daddr)
850 {
851         struct rt6_info *rt = ip6_rt_copy(ort, daddr);
852
853         if (rt)
854                 rt->rt6i_flags |= RTF_CACHE;
855         return rt;
856 }
857
858 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
859                                       struct flowi6 *fl6, int flags)
860 {
861         struct fib6_node *fn;
862         struct rt6_info *rt, *nrt;
863         int strict = 0;
864         int attempts = 3;
865         int err;
866         int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
867
868         strict |= flags & RT6_LOOKUP_F_IFACE;
869
870 relookup:
871         read_lock_bh(&table->tb6_lock);
872
873 restart_2:
874         fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
875
876 restart:
877         rt = rt6_select(fn, oif, strict | reachable);
878         if (rt->rt6i_nsiblings && oif == 0)
879                 rt = rt6_multipath_select(rt, fl6);
880         BACKTRACK(net, &fl6->saddr);
881         if (rt == net->ipv6.ip6_null_entry ||
882             rt->rt6i_flags & RTF_CACHE)
883                 goto out;
884
885         dst_hold(&rt->dst);
886         read_unlock_bh(&table->tb6_lock);
887
888         if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
889                 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
890         else if (!(rt->dst.flags & DST_HOST))
891                 nrt = rt6_alloc_clone(rt, &fl6->daddr);
892         else
893                 goto out2;
894
895         ip6_rt_put(rt);
896         rt = nrt ? : net->ipv6.ip6_null_entry;
897
898         dst_hold(&rt->dst);
899         if (nrt) {
900                 err = ip6_ins_rt(nrt);
901                 if (!err)
902                         goto out2;
903         }
904
905         if (--attempts <= 0)
906                 goto out2;
907
908         /*
909          * Race condition! In the gap, when table->tb6_lock was
910          * released someone could insert this route.  Relookup.
911          */
912         ip6_rt_put(rt);
913         goto relookup;
914
915 out:
916         if (reachable) {
917                 reachable = 0;
918                 goto restart_2;
919         }
920         dst_hold(&rt->dst);
921         read_unlock_bh(&table->tb6_lock);
922 out2:
923         rt->dst.lastuse = jiffies;
924         rt->dst.__use++;
925
926         return rt;
927 }
928
929 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
930                                             struct flowi6 *fl6, int flags)
931 {
932         return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
933 }
934
935 static struct dst_entry *ip6_route_input_lookup(struct net *net,
936                                                 struct net_device *dev,
937                                                 struct flowi6 *fl6, int flags)
938 {
939         if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
940                 flags |= RT6_LOOKUP_F_IFACE;
941
942         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
943 }
944
945 void ip6_route_input(struct sk_buff *skb)
946 {
947         const struct ipv6hdr *iph = ipv6_hdr(skb);
948         struct net *net = dev_net(skb->dev);
949         int flags = RT6_LOOKUP_F_HAS_SADDR;
950         struct flowi6 fl6 = {
951                 .flowi6_iif = skb->dev->ifindex,
952                 .daddr = iph->daddr,
953                 .saddr = iph->saddr,
954                 .flowlabel = ip6_flowinfo(iph),
955                 .flowi6_mark = skb->mark,
956                 .flowi6_proto = iph->nexthdr,
957         };
958
959         skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
960 }
961
962 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
963                                              struct flowi6 *fl6, int flags)
964 {
965         return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
966 }
967
968 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
969                                     struct flowi6 *fl6)
970 {
971         int flags = 0;
972
973         fl6->flowi6_iif = LOOPBACK_IFINDEX;
974
975         if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
976                 flags |= RT6_LOOKUP_F_IFACE;
977
978         if (!ipv6_addr_any(&fl6->saddr))
979                 flags |= RT6_LOOKUP_F_HAS_SADDR;
980         else if (sk)
981                 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
982
983         return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
984 }
985
986 EXPORT_SYMBOL(ip6_route_output);
987
988 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
989 {
990         struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
991         struct dst_entry *new = NULL;
992
993         rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
994         if (rt) {
995                 new = &rt->dst;
996
997                 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
998                 rt6_init_peer(rt, net->ipv6.peers);
999
1000                 new->__use = 1;
1001                 new->input = dst_discard;
1002                 new->output = dst_discard;
1003
1004                 if (dst_metrics_read_only(&ort->dst))
1005                         new->_metrics = ort->dst._metrics;
1006                 else
1007                         dst_copy_metrics(new, &ort->dst);
1008                 rt->rt6i_idev = ort->rt6i_idev;
1009                 if (rt->rt6i_idev)
1010                         in6_dev_hold(rt->rt6i_idev);
1011
1012                 rt->rt6i_gateway = ort->rt6i_gateway;
1013                 rt->rt6i_flags = ort->rt6i_flags;
1014                 rt->rt6i_metric = 0;
1015
1016                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1017 #ifdef CONFIG_IPV6_SUBTREES
1018                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1019 #endif
1020
1021                 dst_free(new);
1022         }
1023
1024         dst_release(dst_orig);
1025         return new ? new : ERR_PTR(-ENOMEM);
1026 }
1027
1028 /*
1029  *      Destination cache support functions
1030  */
1031
1032 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1033 {
1034         struct rt6_info *rt;
1035
1036         rt = (struct rt6_info *) dst;
1037
1038         /* All IPV6 dsts are created with ->obsolete set to the value
1039          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1040          * into this function always.
1041          */
1042         if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
1043                 return NULL;
1044
1045         if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1046                 return dst;
1047
1048         return NULL;
1049 }
1050
1051 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1052 {
1053         struct rt6_info *rt = (struct rt6_info *) dst;
1054
1055         if (rt) {
1056                 if (rt->rt6i_flags & RTF_CACHE) {
1057                         if (rt6_check_expired(rt)) {
1058                                 ip6_del_rt(rt);
1059                                 dst = NULL;
1060                         }
1061                 } else {
1062                         dst_release(dst);
1063                         dst = NULL;
1064                 }
1065         }
1066         return dst;
1067 }
1068
1069 static void ip6_link_failure(struct sk_buff *skb)
1070 {
1071         struct rt6_info *rt;
1072
1073         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1074
1075         rt = (struct rt6_info *) skb_dst(skb);
1076         if (rt) {
1077                 if (rt->rt6i_flags & RTF_CACHE)
1078                         rt6_update_expires(rt, 0);
1079                 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1080                         rt->rt6i_node->fn_sernum = -1;
1081         }
1082 }
1083
1084 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1085                                struct sk_buff *skb, u32 mtu)
1086 {
1087         struct rt6_info *rt6 = (struct rt6_info*)dst;
1088
1089         dst_confirm(dst);
1090         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1091                 struct net *net = dev_net(dst->dev);
1092
1093                 rt6->rt6i_flags |= RTF_MODIFIED;
1094                 if (mtu < IPV6_MIN_MTU) {
1095                         u32 features = dst_metric(dst, RTAX_FEATURES);
1096                         mtu = IPV6_MIN_MTU;
1097                         features |= RTAX_FEATURE_ALLFRAG;
1098                         dst_metric_set(dst, RTAX_FEATURES, features);
1099                 }
1100                 dst_metric_set(dst, RTAX_MTU, mtu);
1101                 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1102         }
1103 }
1104
1105 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1106                      int oif, u32 mark)
1107 {
1108         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1109         struct dst_entry *dst;
1110         struct flowi6 fl6;
1111
1112         memset(&fl6, 0, sizeof(fl6));
1113         fl6.flowi6_oif = oif;
1114         fl6.flowi6_mark = mark;
1115         fl6.flowi6_flags = 0;
1116         fl6.daddr = iph->daddr;
1117         fl6.saddr = iph->saddr;
1118         fl6.flowlabel = ip6_flowinfo(iph);
1119
1120         dst = ip6_route_output(net, NULL, &fl6);
1121         if (!dst->error)
1122                 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1123         dst_release(dst);
1124 }
1125 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1126
1127 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1128 {
1129         ip6_update_pmtu(skb, sock_net(sk), mtu,
1130                         sk->sk_bound_dev_if, sk->sk_mark);
1131 }
1132 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1133
1134 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1135 {
1136         const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1137         struct dst_entry *dst;
1138         struct flowi6 fl6;
1139
1140         memset(&fl6, 0, sizeof(fl6));
1141         fl6.flowi6_oif = oif;
1142         fl6.flowi6_mark = mark;
1143         fl6.flowi6_flags = 0;
1144         fl6.daddr = iph->daddr;
1145         fl6.saddr = iph->saddr;
1146         fl6.flowlabel = ip6_flowinfo(iph);
1147
1148         dst = ip6_route_output(net, NULL, &fl6);
1149         if (!dst->error)
1150                 rt6_do_redirect(dst, NULL, skb);
1151         dst_release(dst);
1152 }
1153 EXPORT_SYMBOL_GPL(ip6_redirect);
1154
1155 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1156 {
1157         ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1158 }
1159 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1160
1161 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1162 {
1163         struct net_device *dev = dst->dev;
1164         unsigned int mtu = dst_mtu(dst);
1165         struct net *net = dev_net(dev);
1166
1167         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1168
1169         if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1170                 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1171
1172         /*
1173          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1174          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1175          * IPV6_MAXPLEN is also valid and means: "any MSS,
1176          * rely only on pmtu discovery"
1177          */
1178         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1179                 mtu = IPV6_MAXPLEN;
1180         return mtu;
1181 }
1182
1183 static unsigned int ip6_mtu(const struct dst_entry *dst)
1184 {
1185         struct inet6_dev *idev;
1186         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1187
1188         if (mtu)
1189                 return mtu;
1190
1191         mtu = IPV6_MIN_MTU;
1192
1193         rcu_read_lock();
1194         idev = __in6_dev_get(dst->dev);
1195         if (idev)
1196                 mtu = idev->cnf.mtu6;
1197         rcu_read_unlock();
1198
1199         return mtu;
1200 }
1201
1202 static struct dst_entry *icmp6_dst_gc_list;
1203 static DEFINE_SPINLOCK(icmp6_dst_lock);
1204
1205 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1206                                   struct flowi6 *fl6)
1207 {
1208         struct dst_entry *dst;
1209         struct rt6_info *rt;
1210         struct inet6_dev *idev = in6_dev_get(dev);
1211         struct net *net = dev_net(dev);
1212
1213         if (unlikely(!idev))
1214                 return ERR_PTR(-ENODEV);
1215
1216         rt = ip6_dst_alloc(net, dev, 0, NULL);
1217         if (unlikely(!rt)) {
1218                 in6_dev_put(idev);
1219                 dst = ERR_PTR(-ENOMEM);
1220                 goto out;
1221         }
1222
1223         rt->dst.flags |= DST_HOST;
1224         rt->dst.output  = ip6_output;
1225         atomic_set(&rt->dst.__refcnt, 1);
1226         rt->rt6i_dst.addr = fl6->daddr;
1227         rt->rt6i_dst.plen = 128;
1228         rt->rt6i_idev     = idev;
1229         dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1230
1231         spin_lock_bh(&icmp6_dst_lock);
1232         rt->dst.next = icmp6_dst_gc_list;
1233         icmp6_dst_gc_list = &rt->dst;
1234         spin_unlock_bh(&icmp6_dst_lock);
1235
1236         fib6_force_start_gc(net);
1237
1238         dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1239
1240 out:
1241         return dst;
1242 }
1243
1244 int icmp6_dst_gc(void)
1245 {
1246         struct dst_entry *dst, **pprev;
1247         int more = 0;
1248
1249         spin_lock_bh(&icmp6_dst_lock);
1250         pprev = &icmp6_dst_gc_list;
1251
1252         while ((dst = *pprev) != NULL) {
1253                 if (!atomic_read(&dst->__refcnt)) {
1254                         *pprev = dst->next;
1255                         dst_free(dst);
1256                 } else {
1257                         pprev = &dst->next;
1258                         ++more;
1259                 }
1260         }
1261
1262         spin_unlock_bh(&icmp6_dst_lock);
1263
1264         return more;
1265 }
1266
1267 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1268                             void *arg)
1269 {
1270         struct dst_entry *dst, **pprev;
1271
1272         spin_lock_bh(&icmp6_dst_lock);
1273         pprev = &icmp6_dst_gc_list;
1274         while ((dst = *pprev) != NULL) {
1275                 struct rt6_info *rt = (struct rt6_info *) dst;
1276                 if (func(rt, arg)) {
1277                         *pprev = dst->next;
1278                         dst_free(dst);
1279                 } else {
1280                         pprev = &dst->next;
1281                 }
1282         }
1283         spin_unlock_bh(&icmp6_dst_lock);
1284 }
1285
1286 static int ip6_dst_gc(struct dst_ops *ops)
1287 {
1288         unsigned long now = jiffies;
1289         struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1290         int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1291         int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1292         int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1293         int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1294         unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1295         int entries;
1296
1297         entries = dst_entries_get_fast(ops);
1298         if (time_after(rt_last_gc + rt_min_interval, now) &&
1299             entries <= rt_max_size)
1300                 goto out;
1301
1302         net->ipv6.ip6_rt_gc_expire++;
1303         fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1304         net->ipv6.ip6_rt_last_gc = now;
1305         entries = dst_entries_get_slow(ops);
1306         if (entries < ops->gc_thresh)
1307                 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1308 out:
1309         net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1310         return entries > rt_max_size;
1311 }
1312
1313 int ip6_dst_hoplimit(struct dst_entry *dst)
1314 {
1315         int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1316         if (hoplimit == 0) {
1317                 struct net_device *dev = dst->dev;
1318                 struct inet6_dev *idev;
1319
1320                 rcu_read_lock();
1321                 idev = __in6_dev_get(dev);
1322                 if (idev)
1323                         hoplimit = idev->cnf.hop_limit;
1324                 else
1325                         hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1326                 rcu_read_unlock();
1327         }
1328         return hoplimit;
1329 }
1330 EXPORT_SYMBOL(ip6_dst_hoplimit);
1331
1332 /*
1333  *
1334  */
1335
1336 int ip6_route_add(struct fib6_config *cfg)
1337 {
1338         int err;
1339         struct net *net = cfg->fc_nlinfo.nl_net;
1340         struct rt6_info *rt = NULL;
1341         struct net_device *dev = NULL;
1342         struct inet6_dev *idev = NULL;
1343         struct fib6_table *table;
1344         int addr_type;
1345
1346         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1347                 return -EINVAL;
1348 #ifndef CONFIG_IPV6_SUBTREES
1349         if (cfg->fc_src_len)
1350                 return -EINVAL;
1351 #endif
1352         if (cfg->fc_ifindex) {
1353                 err = -ENODEV;
1354                 dev = dev_get_by_index(net, cfg->fc_ifindex);
1355                 if (!dev)
1356                         goto out;
1357                 idev = in6_dev_get(dev);
1358                 if (!idev)
1359                         goto out;
1360         }
1361
1362         if (cfg->fc_metric == 0)
1363                 cfg->fc_metric = IP6_RT_PRIO_USER;
1364
1365         err = -ENOBUFS;
1366         if (cfg->fc_nlinfo.nlh &&
1367             !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1368                 table = fib6_get_table(net, cfg->fc_table);
1369                 if (!table) {
1370                         pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1371                         table = fib6_new_table(net, cfg->fc_table);
1372                 }
1373         } else {
1374                 table = fib6_new_table(net, cfg->fc_table);
1375         }
1376
1377         if (!table)
1378                 goto out;
1379
1380         rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1381
1382         if (!rt) {
1383                 err = -ENOMEM;
1384                 goto out;
1385         }
1386
1387         if (cfg->fc_flags & RTF_EXPIRES)
1388                 rt6_set_expires(rt, jiffies +
1389                                 clock_t_to_jiffies(cfg->fc_expires));
1390         else
1391                 rt6_clean_expires(rt);
1392
1393         if (cfg->fc_protocol == RTPROT_UNSPEC)
1394                 cfg->fc_protocol = RTPROT_BOOT;
1395         rt->rt6i_protocol = cfg->fc_protocol;
1396
1397         addr_type = ipv6_addr_type(&cfg->fc_dst);
1398
1399         if (addr_type & IPV6_ADDR_MULTICAST)
1400                 rt->dst.input = ip6_mc_input;
1401         else if (cfg->fc_flags & RTF_LOCAL)
1402                 rt->dst.input = ip6_input;
1403         else
1404                 rt->dst.input = ip6_forward;
1405
1406         rt->dst.output = ip6_output;
1407
1408         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1409         rt->rt6i_dst.plen = cfg->fc_dst_len;
1410         if (rt->rt6i_dst.plen == 128)
1411                rt->dst.flags |= DST_HOST;
1412
1413         if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1414                 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1415                 if (!metrics) {
1416                         err = -ENOMEM;
1417                         goto out;
1418                 }
1419                 dst_init_metrics(&rt->dst, metrics, 0);
1420         }
1421 #ifdef CONFIG_IPV6_SUBTREES
1422         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1423         rt->rt6i_src.plen = cfg->fc_src_len;
1424 #endif
1425
1426         rt->rt6i_metric = cfg->fc_metric;
1427
1428         /* We cannot add true routes via loopback here,
1429            they would result in kernel looping; promote them to reject routes
1430          */
1431         if ((cfg->fc_flags & RTF_REJECT) ||
1432             (dev && (dev->flags & IFF_LOOPBACK) &&
1433              !(addr_type & IPV6_ADDR_LOOPBACK) &&
1434              !(cfg->fc_flags & RTF_LOCAL))) {
1435                 /* hold loopback dev/idev if we haven't done so. */
1436                 if (dev != net->loopback_dev) {
1437                         if (dev) {
1438                                 dev_put(dev);
1439                                 in6_dev_put(idev);
1440                         }
1441                         dev = net->loopback_dev;
1442                         dev_hold(dev);
1443                         idev = in6_dev_get(dev);
1444                         if (!idev) {
1445                                 err = -ENODEV;
1446                                 goto out;
1447                         }
1448                 }
1449                 rt->dst.output = ip6_pkt_discard_out;
1450                 rt->dst.input = ip6_pkt_discard;
1451                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1452                 switch (cfg->fc_type) {
1453                 case RTN_BLACKHOLE:
1454                         rt->dst.error = -EINVAL;
1455                         break;
1456                 case RTN_PROHIBIT:
1457                         rt->dst.error = -EACCES;
1458                         break;
1459                 case RTN_THROW:
1460                         rt->dst.error = -EAGAIN;
1461                         break;
1462                 default:
1463                         rt->dst.error = -ENETUNREACH;
1464                         break;
1465                 }
1466                 goto install_route;
1467         }
1468
1469         if (cfg->fc_flags & RTF_GATEWAY) {
1470                 const struct in6_addr *gw_addr;
1471                 int gwa_type;
1472
1473                 gw_addr = &cfg->fc_gateway;
1474                 rt->rt6i_gateway = *gw_addr;
1475                 gwa_type = ipv6_addr_type(gw_addr);
1476
1477                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1478                         struct rt6_info *grt;
1479
1480                         /* IPv6 strictly inhibits using not link-local
1481                            addresses as nexthop address.
1482                            Otherwise, router will not able to send redirects.
1483                            It is very good, but in some (rare!) circumstances
1484                            (SIT, PtP, NBMA NOARP links) it is handy to allow
1485                            some exceptions. --ANK
1486                          */
1487                         err = -EINVAL;
1488                         if (!(gwa_type & IPV6_ADDR_UNICAST))
1489                                 goto out;
1490
1491                         grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1492
1493                         err = -EHOSTUNREACH;
1494                         if (!grt)
1495                                 goto out;
1496                         if (dev) {
1497                                 if (dev != grt->dst.dev) {
1498                                         ip6_rt_put(grt);
1499                                         goto out;
1500                                 }
1501                         } else {
1502                                 dev = grt->dst.dev;
1503                                 idev = grt->rt6i_idev;
1504                                 dev_hold(dev);
1505                                 in6_dev_hold(grt->rt6i_idev);
1506                         }
1507                         if (!(grt->rt6i_flags & RTF_GATEWAY))
1508                                 err = 0;
1509                         ip6_rt_put(grt);
1510
1511                         if (err)
1512                                 goto out;
1513                 }
1514                 err = -EINVAL;
1515                 if (!dev || (dev->flags & IFF_LOOPBACK))
1516                         goto out;
1517         }
1518
1519         err = -ENODEV;
1520         if (!dev)
1521                 goto out;
1522
1523         if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1524                 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1525                         err = -EINVAL;
1526                         goto out;
1527                 }
1528                 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1529                 rt->rt6i_prefsrc.plen = 128;
1530         } else
1531                 rt->rt6i_prefsrc.plen = 0;
1532
1533         rt->rt6i_flags = cfg->fc_flags;
1534
1535 install_route:
1536         if (cfg->fc_mx) {
1537                 struct nlattr *nla;
1538                 int remaining;
1539
1540                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1541                         int type = nla_type(nla);
1542
1543                         if (type) {
1544                                 if (type > RTAX_MAX) {
1545                                         err = -EINVAL;
1546                                         goto out;
1547                                 }
1548
1549                                 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1550                         }
1551                 }
1552         }
1553
1554         rt->dst.dev = dev;
1555         rt->rt6i_idev = idev;
1556         rt->rt6i_table = table;
1557
1558         cfg->fc_nlinfo.nl_net = dev_net(dev);
1559
1560         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1561
1562 out:
1563         if (dev)
1564                 dev_put(dev);
1565         if (idev)
1566                 in6_dev_put(idev);
1567         if (rt)
1568                 dst_free(&rt->dst);
1569         return err;
1570 }
1571
1572 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1573 {
1574         int err;
1575         struct fib6_table *table;
1576         struct net *net = dev_net(rt->dst.dev);
1577
1578         if (rt == net->ipv6.ip6_null_entry) {
1579                 err = -ENOENT;
1580                 goto out;
1581         }
1582
1583         table = rt->rt6i_table;
1584         write_lock_bh(&table->tb6_lock);
1585         err = fib6_del(rt, info);
1586         write_unlock_bh(&table->tb6_lock);
1587
1588 out:
1589         ip6_rt_put(rt);
1590         return err;
1591 }
1592
1593 int ip6_del_rt(struct rt6_info *rt)
1594 {
1595         struct nl_info info = {
1596                 .nl_net = dev_net(rt->dst.dev),
1597         };
1598         return __ip6_del_rt(rt, &info);
1599 }
1600
1601 static int ip6_route_del(struct fib6_config *cfg)
1602 {
1603         struct fib6_table *table;
1604         struct fib6_node *fn;
1605         struct rt6_info *rt;
1606         int err = -ESRCH;
1607
1608         table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1609         if (!table)
1610                 return err;
1611
1612         read_lock_bh(&table->tb6_lock);
1613
1614         fn = fib6_locate(&table->tb6_root,
1615                          &cfg->fc_dst, cfg->fc_dst_len,
1616                          &cfg->fc_src, cfg->fc_src_len);
1617
1618         if (fn) {
1619                 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1620                         if (cfg->fc_ifindex &&
1621                             (!rt->dst.dev ||
1622                              rt->dst.dev->ifindex != cfg->fc_ifindex))
1623                                 continue;
1624                         if (cfg->fc_flags & RTF_GATEWAY &&
1625                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1626                                 continue;
1627                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1628                                 continue;
1629                         dst_hold(&rt->dst);
1630                         read_unlock_bh(&table->tb6_lock);
1631
1632                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1633                 }
1634         }
1635         read_unlock_bh(&table->tb6_lock);
1636
1637         return err;
1638 }
1639
1640 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1641 {
1642         struct net *net = dev_net(skb->dev);
1643         struct netevent_redirect netevent;
1644         struct rt6_info *rt, *nrt = NULL;
1645         struct ndisc_options ndopts;
1646         struct inet6_dev *in6_dev;
1647         struct neighbour *neigh;
1648         struct rd_msg *msg;
1649         int optlen, on_link;
1650         u8 *lladdr;
1651
1652         optlen = skb->tail - skb->transport_header;
1653         optlen -= sizeof(*msg);
1654
1655         if (optlen < 0) {
1656                 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1657                 return;
1658         }
1659
1660         msg = (struct rd_msg *)icmp6_hdr(skb);
1661
1662         if (ipv6_addr_is_multicast(&msg->dest)) {
1663                 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1664                 return;
1665         }
1666
1667         on_link = 0;
1668         if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1669                 on_link = 1;
1670         } else if (ipv6_addr_type(&msg->target) !=
1671                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1672                 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1673                 return;
1674         }
1675
1676         in6_dev = __in6_dev_get(skb->dev);
1677         if (!in6_dev)
1678                 return;
1679         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1680                 return;
1681
1682         /* RFC2461 8.1:
1683          *      The IP source address of the Redirect MUST be the same as the current
1684          *      first-hop router for the specified ICMP Destination Address.
1685          */
1686
1687         if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1688                 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1689                 return;
1690         }
1691
1692         lladdr = NULL;
1693         if (ndopts.nd_opts_tgt_lladdr) {
1694                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1695                                              skb->dev);
1696                 if (!lladdr) {
1697                         net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1698                         return;
1699                 }
1700         }
1701
1702         rt = (struct rt6_info *) dst;
1703         if (rt == net->ipv6.ip6_null_entry) {
1704                 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1705                 return;
1706         }
1707
1708         /* Redirect received -> path was valid.
1709          * Look, redirects are sent only in response to data packets,
1710          * so that this nexthop apparently is reachable. --ANK
1711          */
1712         dst_confirm(&rt->dst);
1713
1714         neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1715         if (!neigh)
1716                 return;
1717
1718         /*
1719          *      We have finally decided to accept it.
1720          */
1721
1722         neigh_update(neigh, lladdr, NUD_STALE,
1723                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1724                      NEIGH_UPDATE_F_OVERRIDE|
1725                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1726                                      NEIGH_UPDATE_F_ISROUTER))
1727                      );
1728
1729         nrt = ip6_rt_copy(rt, &msg->dest);
1730         if (!nrt)
1731                 goto out;
1732
1733         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1734         if (on_link)
1735                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1736
1737         nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1738
1739         if (ip6_ins_rt(nrt))
1740                 goto out;
1741
1742         netevent.old = &rt->dst;
1743         netevent.new = &nrt->dst;
1744         netevent.daddr = &msg->dest;
1745         netevent.neigh = neigh;
1746         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1747
1748         if (rt->rt6i_flags & RTF_CACHE) {
1749                 rt = (struct rt6_info *) dst_clone(&rt->dst);
1750                 ip6_del_rt(rt);
1751         }
1752
1753 out:
1754         neigh_release(neigh);
1755 }
1756
1757 /*
1758  *      Misc support functions
1759  */
1760
1761 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1762                                     const struct in6_addr *dest)
1763 {
1764         struct net *net = dev_net(ort->dst.dev);
1765         struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1766                                             ort->rt6i_table);
1767
1768         if (rt) {
1769                 rt->dst.input = ort->dst.input;
1770                 rt->dst.output = ort->dst.output;
1771                 rt->dst.flags |= DST_HOST;
1772
1773                 rt->rt6i_dst.addr = *dest;
1774                 rt->rt6i_dst.plen = 128;
1775                 dst_copy_metrics(&rt->dst, &ort->dst);
1776                 rt->dst.error = ort->dst.error;
1777                 rt->rt6i_idev = ort->rt6i_idev;
1778                 if (rt->rt6i_idev)
1779                         in6_dev_hold(rt->rt6i_idev);
1780                 rt->dst.lastuse = jiffies;
1781
1782                 rt->rt6i_gateway = ort->rt6i_gateway;
1783                 rt->rt6i_flags = ort->rt6i_flags;
1784                 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1785                     (RTF_DEFAULT | RTF_ADDRCONF))
1786                         rt6_set_from(rt, ort);
1787                 rt->rt6i_metric = 0;
1788
1789 #ifdef CONFIG_IPV6_SUBTREES
1790                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1791 #endif
1792                 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1793                 rt->rt6i_table = ort->rt6i_table;
1794         }
1795         return rt;
1796 }
1797
1798 #ifdef CONFIG_IPV6_ROUTE_INFO
1799 static struct rt6_info *rt6_get_route_info(struct net *net,
1800                                            const struct in6_addr *prefix, int prefixlen,
1801                                            const struct in6_addr *gwaddr, int ifindex)
1802 {
1803         struct fib6_node *fn;
1804         struct rt6_info *rt = NULL;
1805         struct fib6_table *table;
1806
1807         table = fib6_get_table(net, RT6_TABLE_INFO);
1808         if (!table)
1809                 return NULL;
1810
1811         read_lock_bh(&table->tb6_lock);
1812         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1813         if (!fn)
1814                 goto out;
1815
1816         for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1817                 if (rt->dst.dev->ifindex != ifindex)
1818                         continue;
1819                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1820                         continue;
1821                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1822                         continue;
1823                 dst_hold(&rt->dst);
1824                 break;
1825         }
1826 out:
1827         read_unlock_bh(&table->tb6_lock);
1828         return rt;
1829 }
1830
1831 static struct rt6_info *rt6_add_route_info(struct net *net,
1832                                            const struct in6_addr *prefix, int prefixlen,
1833                                            const struct in6_addr *gwaddr, int ifindex,
1834                                            unsigned int pref)
1835 {
1836         struct fib6_config cfg = {
1837                 .fc_table       = RT6_TABLE_INFO,
1838                 .fc_metric      = IP6_RT_PRIO_USER,
1839                 .fc_ifindex     = ifindex,
1840                 .fc_dst_len     = prefixlen,
1841                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1842                                   RTF_UP | RTF_PREF(pref),
1843                 .fc_nlinfo.portid = 0,
1844                 .fc_nlinfo.nlh = NULL,
1845                 .fc_nlinfo.nl_net = net,
1846         };
1847
1848         cfg.fc_dst = *prefix;
1849         cfg.fc_gateway = *gwaddr;
1850
1851         /* We should treat it as a default route if prefix length is 0. */
1852         if (!prefixlen)
1853                 cfg.fc_flags |= RTF_DEFAULT;
1854
1855         ip6_route_add(&cfg);
1856
1857         return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1858 }
1859 #endif
1860
1861 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1862 {
1863         struct rt6_info *rt;
1864         struct fib6_table *table;
1865
1866         table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1867         if (!table)
1868                 return NULL;
1869
1870         read_lock_bh(&table->tb6_lock);
1871         for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1872                 if (dev == rt->dst.dev &&
1873                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1874                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1875                         break;
1876         }
1877         if (rt)
1878                 dst_hold(&rt->dst);
1879         read_unlock_bh(&table->tb6_lock);
1880         return rt;
1881 }
1882
1883 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1884                                      struct net_device *dev,
1885                                      unsigned int pref)
1886 {
1887         struct fib6_config cfg = {
1888                 .fc_table       = RT6_TABLE_DFLT,
1889                 .fc_metric      = IP6_RT_PRIO_USER,
1890                 .fc_ifindex     = dev->ifindex,
1891                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1892                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1893                 .fc_nlinfo.portid = 0,
1894                 .fc_nlinfo.nlh = NULL,
1895                 .fc_nlinfo.nl_net = dev_net(dev),
1896         };
1897
1898         cfg.fc_gateway = *gwaddr;
1899
1900         ip6_route_add(&cfg);
1901
1902         return rt6_get_dflt_router(gwaddr, dev);
1903 }
1904
1905 void rt6_purge_dflt_routers(struct net *net)
1906 {
1907         struct rt6_info *rt;
1908         struct fib6_table *table;
1909
1910         /* NOTE: Keep consistent with rt6_get_dflt_router */
1911         table = fib6_get_table(net, RT6_TABLE_DFLT);
1912         if (!table)
1913                 return;
1914
1915 restart:
1916         read_lock_bh(&table->tb6_lock);
1917         for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1918                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1919                         dst_hold(&rt->dst);
1920                         read_unlock_bh(&table->tb6_lock);
1921                         ip6_del_rt(rt);
1922                         goto restart;
1923                 }
1924         }
1925         read_unlock_bh(&table->tb6_lock);
1926 }
1927
1928 static void rtmsg_to_fib6_config(struct net *net,
1929                                  struct in6_rtmsg *rtmsg,
1930                                  struct fib6_config *cfg)
1931 {
1932         memset(cfg, 0, sizeof(*cfg));
1933
1934         cfg->fc_table = RT6_TABLE_MAIN;
1935         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1936         cfg->fc_metric = rtmsg->rtmsg_metric;
1937         cfg->fc_expires = rtmsg->rtmsg_info;
1938         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1939         cfg->fc_src_len = rtmsg->rtmsg_src_len;
1940         cfg->fc_flags = rtmsg->rtmsg_flags;
1941
1942         cfg->fc_nlinfo.nl_net = net;
1943
1944         cfg->fc_dst = rtmsg->rtmsg_dst;
1945         cfg->fc_src = rtmsg->rtmsg_src;
1946         cfg->fc_gateway = rtmsg->rtmsg_gateway;
1947 }
1948
1949 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1950 {
1951         struct fib6_config cfg;
1952         struct in6_rtmsg rtmsg;
1953         int err;
1954
1955         switch(cmd) {
1956         case SIOCADDRT:         /* Add a route */
1957         case SIOCDELRT:         /* Delete a route */
1958                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1959                         return -EPERM;
1960                 err = copy_from_user(&rtmsg, arg,
1961                                      sizeof(struct in6_rtmsg));
1962                 if (err)
1963                         return -EFAULT;
1964
1965                 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1966
1967                 rtnl_lock();
1968                 switch (cmd) {
1969                 case SIOCADDRT:
1970                         err = ip6_route_add(&cfg);
1971                         break;
1972                 case SIOCDELRT:
1973                         err = ip6_route_del(&cfg);
1974                         break;
1975                 default:
1976                         err = -EINVAL;
1977                 }
1978                 rtnl_unlock();
1979
1980                 return err;
1981         }
1982
1983         return -EINVAL;
1984 }
1985
1986 /*
1987  *      Drop the packet on the floor
1988  */
1989
1990 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1991 {
1992         int type;
1993         struct dst_entry *dst = skb_dst(skb);
1994         switch (ipstats_mib_noroutes) {
1995         case IPSTATS_MIB_INNOROUTES:
1996                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1997                 if (type == IPV6_ADDR_ANY) {
1998                         IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1999                                       IPSTATS_MIB_INADDRERRORS);
2000                         break;
2001                 }
2002                 /* FALLTHROUGH */
2003         case IPSTATS_MIB_OUTNOROUTES:
2004                 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2005                               ipstats_mib_noroutes);
2006                 break;
2007         }
2008         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2009         kfree_skb(skb);
2010         return 0;
2011 }
2012
2013 static int ip6_pkt_discard(struct sk_buff *skb)
2014 {
2015         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2016 }
2017
2018 static int ip6_pkt_discard_out(struct sk_buff *skb)
2019 {
2020         skb->dev = skb_dst(skb)->dev;
2021         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2022 }
2023
2024 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2025
2026 static int ip6_pkt_prohibit(struct sk_buff *skb)
2027 {
2028         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2029 }
2030
2031 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2032 {
2033         skb->dev = skb_dst(skb)->dev;
2034         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2035 }
2036
2037 #endif
2038
2039 /*
2040  *      Allocate a dst for local (unicast / anycast) address.
2041  */
2042
2043 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2044                                     const struct in6_addr *addr,
2045                                     bool anycast)
2046 {
2047         struct net *net = dev_net(idev->dev);
2048         struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2049
2050         if (!rt) {
2051                 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2052                 return ERR_PTR(-ENOMEM);
2053         }
2054
2055         in6_dev_hold(idev);
2056
2057         rt->dst.flags |= DST_HOST;
2058         rt->dst.input = ip6_input;
2059         rt->dst.output = ip6_output;
2060         rt->rt6i_idev = idev;
2061
2062         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2063         if (anycast)
2064                 rt->rt6i_flags |= RTF_ANYCAST;
2065         else
2066                 rt->rt6i_flags |= RTF_LOCAL;
2067
2068         rt->rt6i_dst.addr = *addr;
2069         rt->rt6i_dst.plen = 128;
2070         rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2071
2072         atomic_set(&rt->dst.__refcnt, 1);
2073
2074         return rt;
2075 }
2076
2077 int ip6_route_get_saddr(struct net *net,
2078                         struct rt6_info *rt,
2079                         const struct in6_addr *daddr,
2080                         unsigned int prefs,
2081                         struct in6_addr *saddr)
2082 {
2083         struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2084         int err = 0;
2085         if (rt->rt6i_prefsrc.plen)
2086                 *saddr = rt->rt6i_prefsrc.addr;
2087         else
2088                 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2089                                          daddr, prefs, saddr);
2090         return err;
2091 }
2092
2093 /* remove deleted ip from prefsrc entries */
2094 struct arg_dev_net_ip {
2095         struct net_device *dev;
2096         struct net *net;
2097         struct in6_addr *addr;
2098 };
2099
2100 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2101 {
2102         struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2103         struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2104         struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2105
2106         if (((void *)rt->dst.dev == dev || !dev) &&
2107             rt != net->ipv6.ip6_null_entry &&
2108             ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2109                 /* remove prefsrc entry */
2110                 rt->rt6i_prefsrc.plen = 0;
2111         }
2112         return 0;
2113 }
2114
2115 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2116 {
2117         struct net *net = dev_net(ifp->idev->dev);
2118         struct arg_dev_net_ip adni = {
2119                 .dev = ifp->idev->dev,
2120                 .net = net,
2121                 .addr = &ifp->addr,
2122         };
2123         fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2124 }
2125
2126 struct arg_dev_net {
2127         struct net_device *dev;
2128         struct net *net;
2129 };
2130
2131 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2132 {
2133         const struct arg_dev_net *adn = arg;
2134         const struct net_device *dev = adn->dev;
2135
2136         if ((rt->dst.dev == dev || !dev) &&
2137             rt != adn->net->ipv6.ip6_null_entry)
2138                 return -1;
2139
2140         return 0;
2141 }
2142
2143 void rt6_ifdown(struct net *net, struct net_device *dev)
2144 {
2145         struct arg_dev_net adn = {
2146                 .dev = dev,
2147                 .net = net,
2148         };
2149
2150         fib6_clean_all(net, fib6_ifdown, 0, &adn);
2151         icmp6_clean_all(fib6_ifdown, &adn);
2152 }
2153
2154 struct rt6_mtu_change_arg {
2155         struct net_device *dev;
2156         unsigned int mtu;
2157 };
2158
2159 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2160 {
2161         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2162         struct inet6_dev *idev;
2163
2164         /* In IPv6 pmtu discovery is not optional,
2165            so that RTAX_MTU lock cannot disable it.
2166            We still use this lock to block changes
2167            caused by addrconf/ndisc.
2168         */
2169
2170         idev = __in6_dev_get(arg->dev);
2171         if (!idev)
2172                 return 0;
2173
2174         /* For administrative MTU increase, there is no way to discover
2175            IPv6 PMTU increase, so PMTU increase should be updated here.
2176            Since RFC 1981 doesn't include administrative MTU increase
2177            update PMTU increase is a MUST. (i.e. jumbo frame)
2178          */
2179         /*
2180            If new MTU is less than route PMTU, this new MTU will be the
2181            lowest MTU in the path, update the route PMTU to reflect PMTU
2182            decreases; if new MTU is greater than route PMTU, and the
2183            old MTU is the lowest MTU in the path, update the route PMTU
2184            to reflect the increase. In this case if the other nodes' MTU
2185            also have the lowest MTU, TOO BIG MESSAGE will be lead to
2186            PMTU discouvery.
2187          */
2188         if (rt->dst.dev == arg->dev &&
2189             !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2190             (dst_mtu(&rt->dst) >= arg->mtu ||
2191              (dst_mtu(&rt->dst) < arg->mtu &&
2192               dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2193                 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2194         }
2195         return 0;
2196 }
2197
2198 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2199 {
2200         struct rt6_mtu_change_arg arg = {
2201                 .dev = dev,
2202                 .mtu = mtu,
2203         };
2204
2205         fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2206 }
2207
2208 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2209         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2210         [RTA_OIF]               = { .type = NLA_U32 },
2211         [RTA_IIF]               = { .type = NLA_U32 },
2212         [RTA_PRIORITY]          = { .type = NLA_U32 },
2213         [RTA_METRICS]           = { .type = NLA_NESTED },
2214         [RTA_MULTIPATH]         = { .len = sizeof(struct rtnexthop) },
2215 };
2216
2217 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2218                               struct fib6_config *cfg)
2219 {
2220         struct rtmsg *rtm;
2221         struct nlattr *tb[RTA_MAX+1];
2222         int err;
2223
2224         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2225         if (err < 0)
2226                 goto errout;
2227
2228         err = -EINVAL;
2229         rtm = nlmsg_data(nlh);
2230         memset(cfg, 0, sizeof(*cfg));
2231
2232         cfg->fc_table = rtm->rtm_table;
2233         cfg->fc_dst_len = rtm->rtm_dst_len;
2234         cfg->fc_src_len = rtm->rtm_src_len;
2235         cfg->fc_flags = RTF_UP;
2236         cfg->fc_protocol = rtm->rtm_protocol;
2237         cfg->fc_type = rtm->rtm_type;
2238
2239         if (rtm->rtm_type == RTN_UNREACHABLE ||
2240             rtm->rtm_type == RTN_BLACKHOLE ||
2241             rtm->rtm_type == RTN_PROHIBIT ||
2242             rtm->rtm_type == RTN_THROW)
2243                 cfg->fc_flags |= RTF_REJECT;
2244
2245         if (rtm->rtm_type == RTN_LOCAL)
2246                 cfg->fc_flags |= RTF_LOCAL;
2247
2248         cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2249         cfg->fc_nlinfo.nlh = nlh;
2250         cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2251
2252         if (tb[RTA_GATEWAY]) {
2253                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2254                 cfg->fc_flags |= RTF_GATEWAY;
2255         }
2256
2257         if (tb[RTA_DST]) {
2258                 int plen = (rtm->rtm_dst_len + 7) >> 3;
2259
2260                 if (nla_len(tb[RTA_DST]) < plen)
2261                         goto errout;
2262
2263                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2264         }
2265
2266         if (tb[RTA_SRC]) {
2267                 int plen = (rtm->rtm_src_len + 7) >> 3;
2268
2269                 if (nla_len(tb[RTA_SRC]) < plen)
2270                         goto errout;
2271
2272                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2273         }
2274
2275         if (tb[RTA_PREFSRC])
2276                 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2277
2278         if (tb[RTA_OIF])
2279                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2280
2281         if (tb[RTA_PRIORITY])
2282                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2283
2284         if (tb[RTA_METRICS]) {
2285                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2286                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2287         }
2288
2289         if (tb[RTA_TABLE])
2290                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2291
2292         if (tb[RTA_MULTIPATH]) {
2293                 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2294                 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2295         }
2296
2297         err = 0;
2298 errout:
2299         return err;
2300 }
2301
2302 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2303 {
2304         struct fib6_config r_cfg;
2305         struct rtnexthop *rtnh;
2306         int remaining;
2307         int attrlen;
2308         int err = 0, last_err = 0;
2309
2310 beginning:
2311         rtnh = (struct rtnexthop *)cfg->fc_mp;
2312         remaining = cfg->fc_mp_len;
2313
2314         /* Parse a Multipath Entry */
2315         while (rtnh_ok(rtnh, remaining)) {
2316                 memcpy(&r_cfg, cfg, sizeof(*cfg));
2317                 if (rtnh->rtnh_ifindex)
2318                         r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2319
2320                 attrlen = rtnh_attrlen(rtnh);
2321                 if (attrlen > 0) {
2322                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2323
2324                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2325                         if (nla) {
2326                                 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2327                                 r_cfg.fc_flags |= RTF_GATEWAY;
2328                         }
2329                 }
2330                 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2331                 if (err) {
2332                         last_err = err;
2333                         /* If we are trying to remove a route, do not stop the
2334                          * loop when ip6_route_del() fails (because next hop is
2335                          * already gone), we should try to remove all next hops.
2336                          */
2337                         if (add) {
2338                                 /* If add fails, we should try to delete all
2339                                  * next hops that have been already added.
2340                                  */
2341                                 add = 0;
2342                                 goto beginning;
2343                         }
2344                 }
2345                 /* Because each route is added like a single route we remove
2346                  * this flag after the first nexthop (if there is a collision,
2347                  * we have already fail to add the first nexthop:
2348                  * fib6_add_rt2node() has reject it).
2349                  */
2350                 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2351                 rtnh = rtnh_next(rtnh, &remaining);
2352         }
2353
2354         return last_err;
2355 }
2356
2357 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2358 {
2359         struct fib6_config cfg;
2360         int err;
2361
2362         err = rtm_to_fib6_config(skb, nlh, &cfg);
2363         if (err < 0)
2364                 return err;
2365
2366         if (cfg.fc_mp)
2367                 return ip6_route_multipath(&cfg, 0);
2368         else
2369                 return ip6_route_del(&cfg);
2370 }
2371
2372 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2373 {
2374         struct fib6_config cfg;
2375         int err;
2376
2377         err = rtm_to_fib6_config(skb, nlh, &cfg);
2378         if (err < 0)
2379                 return err;
2380
2381         if (cfg.fc_mp)
2382                 return ip6_route_multipath(&cfg, 1);
2383         else
2384                 return ip6_route_add(&cfg);
2385 }
2386
2387 static inline size_t rt6_nlmsg_size(void)
2388 {
2389         return NLMSG_ALIGN(sizeof(struct rtmsg))
2390                + nla_total_size(16) /* RTA_SRC */
2391                + nla_total_size(16) /* RTA_DST */
2392                + nla_total_size(16) /* RTA_GATEWAY */
2393                + nla_total_size(16) /* RTA_PREFSRC */
2394                + nla_total_size(4) /* RTA_TABLE */
2395                + nla_total_size(4) /* RTA_IIF */
2396                + nla_total_size(4) /* RTA_OIF */
2397                + nla_total_size(4) /* RTA_PRIORITY */
2398                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2399                + nla_total_size(sizeof(struct rta_cacheinfo));
2400 }
2401
2402 static int rt6_fill_node(struct net *net,
2403                          struct sk_buff *skb, struct rt6_info *rt,
2404                          struct in6_addr *dst, struct in6_addr *src,
2405                          int iif, int type, u32 portid, u32 seq,
2406                          int prefix, int nowait, unsigned int flags)
2407 {
2408         struct rtmsg *rtm;
2409         struct nlmsghdr *nlh;
2410         long expires;
2411         u32 table;
2412
2413         if (prefix) {   /* user wants prefix routes only */
2414                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2415                         /* success since this is not a prefix route */
2416                         return 1;
2417                 }
2418         }
2419
2420         nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2421         if (!nlh)
2422                 return -EMSGSIZE;
2423
2424         rtm = nlmsg_data(nlh);
2425         rtm->rtm_family = AF_INET6;
2426         rtm->rtm_dst_len = rt->rt6i_dst.plen;
2427         rtm->rtm_src_len = rt->rt6i_src.plen;
2428         rtm->rtm_tos = 0;
2429         if (rt->rt6i_table)
2430                 table = rt->rt6i_table->tb6_id;
2431         else
2432                 table = RT6_TABLE_UNSPEC;
2433         rtm->rtm_table = table;
2434         if (nla_put_u32(skb, RTA_TABLE, table))
2435                 goto nla_put_failure;
2436         if (rt->rt6i_flags & RTF_REJECT) {
2437                 switch (rt->dst.error) {
2438                 case -EINVAL:
2439                         rtm->rtm_type = RTN_BLACKHOLE;
2440                         break;
2441                 case -EACCES:
2442                         rtm->rtm_type = RTN_PROHIBIT;
2443                         break;
2444                 case -EAGAIN:
2445                         rtm->rtm_type = RTN_THROW;
2446                         break;
2447                 default:
2448                         rtm->rtm_type = RTN_UNREACHABLE;
2449                         break;
2450                 }
2451         }
2452         else if (rt->rt6i_flags & RTF_LOCAL)
2453                 rtm->rtm_type = RTN_LOCAL;
2454         else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2455                 rtm->rtm_type = RTN_LOCAL;
2456         else
2457                 rtm->rtm_type = RTN_UNICAST;
2458         rtm->rtm_flags = 0;
2459         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2460         rtm->rtm_protocol = rt->rt6i_protocol;
2461         if (rt->rt6i_flags & RTF_DYNAMIC)
2462                 rtm->rtm_protocol = RTPROT_REDIRECT;
2463         else if (rt->rt6i_flags & RTF_ADDRCONF) {
2464                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2465                         rtm->rtm_protocol = RTPROT_RA;
2466                 else
2467                         rtm->rtm_protocol = RTPROT_KERNEL;
2468         }
2469
2470         if (rt->rt6i_flags & RTF_CACHE)
2471                 rtm->rtm_flags |= RTM_F_CLONED;
2472
2473         if (dst) {
2474                 if (nla_put(skb, RTA_DST, 16, dst))
2475                         goto nla_put_failure;
2476                 rtm->rtm_dst_len = 128;
2477         } else if (rtm->rtm_dst_len)
2478                 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2479                         goto nla_put_failure;
2480 #ifdef CONFIG_IPV6_SUBTREES
2481         if (src) {
2482                 if (nla_put(skb, RTA_SRC, 16, src))
2483                         goto nla_put_failure;
2484                 rtm->rtm_src_len = 128;
2485         } else if (rtm->rtm_src_len &&
2486                    nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2487                 goto nla_put_failure;
2488 #endif
2489         if (iif) {
2490 #ifdef CONFIG_IPV6_MROUTE
2491                 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2492                         int err = ip6mr_get_route(net, skb, rtm, nowait);
2493                         if (err <= 0) {
2494                                 if (!nowait) {
2495                                         if (err == 0)
2496                                                 return 0;
2497                                         goto nla_put_failure;
2498                                 } else {
2499                                         if (err == -EMSGSIZE)
2500                                                 goto nla_put_failure;
2501                                 }
2502                         }
2503                 } else
2504 #endif
2505                         if (nla_put_u32(skb, RTA_IIF, iif))
2506                                 goto nla_put_failure;
2507         } else if (dst) {
2508                 struct in6_addr saddr_buf;
2509                 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2510                     nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2511                         goto nla_put_failure;
2512         }
2513
2514         if (rt->rt6i_prefsrc.plen) {
2515                 struct in6_addr saddr_buf;
2516                 saddr_buf = rt->rt6i_prefsrc.addr;
2517                 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2518                         goto nla_put_failure;
2519         }
2520
2521         if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2522                 goto nla_put_failure;
2523
2524         if (rt->rt6i_flags & RTF_GATEWAY) {
2525                 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2526                         goto nla_put_failure;
2527         }
2528
2529         if (rt->dst.dev &&
2530             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2531                 goto nla_put_failure;
2532         if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2533                 goto nla_put_failure;
2534
2535         expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2536
2537         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2538                 goto nla_put_failure;
2539
2540         return nlmsg_end(skb, nlh);
2541
2542 nla_put_failure:
2543         nlmsg_cancel(skb, nlh);
2544         return -EMSGSIZE;
2545 }
2546
2547 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2548 {
2549         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2550         int prefix;
2551
2552         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2553                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2554                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2555         } else
2556                 prefix = 0;
2557
2558         return rt6_fill_node(arg->net,
2559                      arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2560                      NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2561                      prefix, 0, NLM_F_MULTI);
2562 }
2563
2564 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2565 {
2566         struct net *net = sock_net(in_skb->sk);
2567         struct nlattr *tb[RTA_MAX+1];
2568         struct rt6_info *rt;
2569         struct sk_buff *skb;
2570         struct rtmsg *rtm;
2571         struct flowi6 fl6;
2572         int err, iif = 0, oif = 0;
2573
2574         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2575         if (err < 0)
2576                 goto errout;
2577
2578         err = -EINVAL;
2579         memset(&fl6, 0, sizeof(fl6));
2580
2581         if (tb[RTA_SRC]) {
2582                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2583                         goto errout;
2584
2585                 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2586         }
2587
2588         if (tb[RTA_DST]) {
2589                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2590                         goto errout;
2591
2592                 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2593         }
2594
2595         if (tb[RTA_IIF])
2596                 iif = nla_get_u32(tb[RTA_IIF]);
2597
2598         if (tb[RTA_OIF])
2599                 oif = nla_get_u32(tb[RTA_OIF]);
2600
2601         if (iif) {
2602                 struct net_device *dev;
2603                 int flags = 0;
2604
2605                 dev = __dev_get_by_index(net, iif);
2606                 if (!dev) {
2607                         err = -ENODEV;
2608                         goto errout;
2609                 }
2610
2611                 fl6.flowi6_iif = iif;
2612
2613                 if (!ipv6_addr_any(&fl6.saddr))
2614                         flags |= RT6_LOOKUP_F_HAS_SADDR;
2615
2616                 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2617                                                                flags);
2618         } else {
2619                 fl6.flowi6_oif = oif;
2620
2621                 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2622         }
2623
2624         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2625         if (!skb) {
2626                 ip6_rt_put(rt);
2627                 err = -ENOBUFS;
2628                 goto errout;
2629         }
2630
2631         /* Reserve room for dummy headers, this skb can pass
2632            through good chunk of routing engine.
2633          */
2634         skb_reset_mac_header(skb);
2635         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2636
2637         skb_dst_set(skb, &rt->dst);
2638
2639         err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2640                             RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2641                             nlh->nlmsg_seq, 0, 0, 0);
2642         if (err < 0) {
2643                 kfree_skb(skb);
2644                 goto errout;
2645         }
2646
2647         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2648 errout:
2649         return err;
2650 }
2651
2652 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2653 {
2654         struct sk_buff *skb;
2655         struct net *net = info->nl_net;
2656         u32 seq;
2657         int err;
2658
2659         err = -ENOBUFS;
2660         seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2661
2662         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2663         if (!skb)
2664                 goto errout;
2665
2666         err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2667                                 event, info->portid, seq, 0, 0, 0);
2668         if (err < 0) {
2669                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2670                 WARN_ON(err == -EMSGSIZE);
2671                 kfree_skb(skb);
2672                 goto errout;
2673         }
2674         rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2675                     info->nlh, gfp_any());
2676         return;
2677 errout:
2678         if (err < 0)
2679                 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2680 }
2681
2682 static int ip6_route_dev_notify(struct notifier_block *this,
2683                                 unsigned long event, void *data)
2684 {
2685         struct net_device *dev = (struct net_device *)data;
2686         struct net *net = dev_net(dev);
2687
2688         if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2689                 net->ipv6.ip6_null_entry->dst.dev = dev;
2690                 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2691 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2692                 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2693                 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2694                 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2695                 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2696 #endif
2697         }
2698
2699         return NOTIFY_OK;
2700 }
2701
2702 /*
2703  *      /proc
2704  */
2705
2706 #ifdef CONFIG_PROC_FS
2707
2708 struct rt6_proc_arg
2709 {
2710         char *buffer;
2711         int offset;
2712         int length;
2713         int skip;
2714         int len;
2715 };
2716
2717 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2718 {
2719         struct seq_file *m = p_arg;
2720
2721         seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2722
2723 #ifdef CONFIG_IPV6_SUBTREES
2724         seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2725 #else
2726         seq_puts(m, "00000000000000000000000000000000 00 ");
2727 #endif
2728         if (rt->rt6i_flags & RTF_GATEWAY) {
2729                 seq_printf(m, "%pi6", &rt->rt6i_gateway);
2730         } else {
2731                 seq_puts(m, "00000000000000000000000000000000");
2732         }
2733         seq_printf(m, " %08x %08x %08x %08x %8s\n",
2734                    rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2735                    rt->dst.__use, rt->rt6i_flags,
2736                    rt->dst.dev ? rt->dst.dev->name : "");
2737         return 0;
2738 }
2739
2740 static int ipv6_route_show(struct seq_file *m, void *v)
2741 {
2742         struct net *net = (struct net *)m->private;
2743         fib6_clean_all_ro(net, rt6_info_route, 0, m);
2744         return 0;
2745 }
2746
2747 static int ipv6_route_open(struct inode *inode, struct file *file)
2748 {
2749         return single_open_net(inode, file, ipv6_route_show);
2750 }
2751
2752 static const struct file_operations ipv6_route_proc_fops = {
2753         .owner          = THIS_MODULE,
2754         .open           = ipv6_route_open,
2755         .read           = seq_read,
2756         .llseek         = seq_lseek,
2757         .release        = single_release_net,
2758 };
2759
2760 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2761 {
2762         struct net *net = (struct net *)seq->private;
2763         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2764                    net->ipv6.rt6_stats->fib_nodes,
2765                    net->ipv6.rt6_stats->fib_route_nodes,
2766                    net->ipv6.rt6_stats->fib_rt_alloc,
2767                    net->ipv6.rt6_stats->fib_rt_entries,
2768                    net->ipv6.rt6_stats->fib_rt_cache,
2769                    dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2770                    net->ipv6.rt6_stats->fib_discarded_routes);
2771
2772         return 0;
2773 }
2774
2775 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2776 {
2777         return single_open_net(inode, file, rt6_stats_seq_show);
2778 }
2779
2780 static const struct file_operations rt6_stats_seq_fops = {
2781         .owner   = THIS_MODULE,
2782         .open    = rt6_stats_seq_open,
2783         .read    = seq_read,
2784         .llseek  = seq_lseek,
2785         .release = single_release_net,
2786 };
2787 #endif  /* CONFIG_PROC_FS */
2788
2789 #ifdef CONFIG_SYSCTL
2790
2791 static
2792 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2793                               void __user *buffer, size_t *lenp, loff_t *ppos)
2794 {
2795         struct net *net;
2796         int delay;
2797         if (!write)
2798                 return -EINVAL;
2799
2800         net = (struct net *)ctl->extra1;
2801         delay = net->ipv6.sysctl.flush_delay;
2802         proc_dointvec(ctl, write, buffer, lenp, ppos);
2803         fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2804         return 0;
2805 }
2806
2807 ctl_table ipv6_route_table_template[] = {
2808         {
2809                 .procname       =       "flush",
2810                 .data           =       &init_net.ipv6.sysctl.flush_delay,
2811                 .maxlen         =       sizeof(int),
2812                 .mode           =       0200,
2813                 .proc_handler   =       ipv6_sysctl_rtcache_flush
2814         },
2815         {
2816                 .procname       =       "gc_thresh",
2817                 .data           =       &ip6_dst_ops_template.gc_thresh,
2818                 .maxlen         =       sizeof(int),
2819                 .mode           =       0644,
2820                 .proc_handler   =       proc_dointvec,
2821         },
2822         {
2823                 .procname       =       "max_size",
2824                 .data           =       &init_net.ipv6.sysctl.ip6_rt_max_size,
2825                 .maxlen         =       sizeof(int),
2826                 .mode           =       0644,
2827                 .proc_handler   =       proc_dointvec,
2828         },
2829         {
2830                 .procname       =       "gc_min_interval",
2831                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2832                 .maxlen         =       sizeof(int),
2833                 .mode           =       0644,
2834                 .proc_handler   =       proc_dointvec_jiffies,
2835         },
2836         {
2837                 .procname       =       "gc_timeout",
2838                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2839                 .maxlen         =       sizeof(int),
2840                 .mode           =       0644,
2841                 .proc_handler   =       proc_dointvec_jiffies,
2842         },
2843         {
2844                 .procname       =       "gc_interval",
2845                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2846                 .maxlen         =       sizeof(int),
2847                 .mode           =       0644,
2848                 .proc_handler   =       proc_dointvec_jiffies,
2849         },
2850         {
2851                 .procname       =       "gc_elasticity",
2852                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2853                 .maxlen         =       sizeof(int),
2854                 .mode           =       0644,
2855                 .proc_handler   =       proc_dointvec,
2856         },
2857         {
2858                 .procname       =       "mtu_expires",
2859                 .data           =       &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2860                 .maxlen         =       sizeof(int),
2861                 .mode           =       0644,
2862                 .proc_handler   =       proc_dointvec_jiffies,
2863         },
2864         {
2865                 .procname       =       "min_adv_mss",
2866                 .data           =       &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2867                 .maxlen         =       sizeof(int),
2868                 .mode           =       0644,
2869                 .proc_handler   =       proc_dointvec,
2870         },
2871         {
2872                 .procname       =       "gc_min_interval_ms",
2873                 .data           =       &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2874                 .maxlen         =       sizeof(int),
2875                 .mode           =       0644,
2876                 .proc_handler   =       proc_dointvec_ms_jiffies,
2877         },
2878         { }
2879 };
2880
2881 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2882 {
2883         struct ctl_table *table;
2884
2885         table = kmemdup(ipv6_route_table_template,
2886                         sizeof(ipv6_route_table_template),
2887                         GFP_KERNEL);
2888
2889         if (table) {
2890                 table[0].data = &net->ipv6.sysctl.flush_delay;
2891                 table[0].extra1 = net;
2892                 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2893                 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2894                 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2895                 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2896                 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2897                 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2898                 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2899                 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2900                 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2901
2902                 /* Don't export sysctls to unprivileged users */
2903                 if (net->user_ns != &init_user_ns)
2904                         table[0].procname = NULL;
2905         }
2906
2907         return table;
2908 }
2909 #endif
2910
2911 static int __net_init ip6_route_net_init(struct net *net)
2912 {
2913         int ret = -ENOMEM;
2914
2915         memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2916                sizeof(net->ipv6.ip6_dst_ops));
2917
2918         if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2919                 goto out_ip6_dst_ops;
2920
2921         net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2922                                            sizeof(*net->ipv6.ip6_null_entry),
2923                                            GFP_KERNEL);
2924         if (!net->ipv6.ip6_null_entry)
2925                 goto out_ip6_dst_entries;
2926         net->ipv6.ip6_null_entry->dst.path =
2927                 (struct dst_entry *)net->ipv6.ip6_null_entry;
2928         net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2929         dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2930                          ip6_template_metrics, true);
2931
2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2933         net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2934                                                sizeof(*net->ipv6.ip6_prohibit_entry),
2935                                                GFP_KERNEL);
2936         if (!net->ipv6.ip6_prohibit_entry)
2937                 goto out_ip6_null_entry;
2938         net->ipv6.ip6_prohibit_entry->dst.path =
2939                 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2940         net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2941         dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2942                          ip6_template_metrics, true);
2943
2944         net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2945                                                sizeof(*net->ipv6.ip6_blk_hole_entry),
2946                                                GFP_KERNEL);
2947         if (!net->ipv6.ip6_blk_hole_entry)
2948                 goto out_ip6_prohibit_entry;
2949         net->ipv6.ip6_blk_hole_entry->dst.path =
2950                 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2951         net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2952         dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2953                          ip6_template_metrics, true);
2954 #endif
2955
2956         net->ipv6.sysctl.flush_delay = 0;
2957         net->ipv6.sysctl.ip6_rt_max_size = 4096;
2958         net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2959         net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2960         net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2961         net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2962         net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2963         net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2964
2965         net->ipv6.ip6_rt_gc_expire = 30*HZ;
2966
2967         ret = 0;
2968 out:
2969         return ret;
2970
2971 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2972 out_ip6_prohibit_entry:
2973         kfree(net->ipv6.ip6_prohibit_entry);
2974 out_ip6_null_entry:
2975         kfree(net->ipv6.ip6_null_entry);
2976 #endif
2977 out_ip6_dst_entries:
2978         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2979 out_ip6_dst_ops:
2980         goto out;
2981 }
2982
2983 static void __net_exit ip6_route_net_exit(struct net *net)
2984 {
2985         kfree(net->ipv6.ip6_null_entry);
2986 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2987         kfree(net->ipv6.ip6_prohibit_entry);
2988         kfree(net->ipv6.ip6_blk_hole_entry);
2989 #endif
2990         dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2991 }
2992
2993 static int __net_init ip6_route_net_init_late(struct net *net)
2994 {
2995 #ifdef CONFIG_PROC_FS
2996         proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
2997         proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
2998 #endif
2999         return 0;
3000 }
3001
3002 static void __net_exit ip6_route_net_exit_late(struct net *net)
3003 {
3004 #ifdef CONFIG_PROC_FS
3005         remove_proc_entry("ipv6_route", net->proc_net);
3006         remove_proc_entry("rt6_stats", net->proc_net);
3007 #endif
3008 }
3009
3010 static struct pernet_operations ip6_route_net_ops = {
3011         .init = ip6_route_net_init,
3012         .exit = ip6_route_net_exit,
3013 };
3014
3015 static int __net_init ipv6_inetpeer_init(struct net *net)
3016 {
3017         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3018
3019         if (!bp)
3020                 return -ENOMEM;
3021         inet_peer_base_init(bp);
3022         net->ipv6.peers = bp;
3023         return 0;
3024 }
3025
3026 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3027 {
3028         struct inet_peer_base *bp = net->ipv6.peers;
3029
3030         net->ipv6.peers = NULL;
3031         inetpeer_invalidate_tree(bp);
3032         kfree(bp);
3033 }
3034
3035 static struct pernet_operations ipv6_inetpeer_ops = {
3036         .init   =       ipv6_inetpeer_init,
3037         .exit   =       ipv6_inetpeer_exit,
3038 };
3039
3040 static struct pernet_operations ip6_route_net_late_ops = {
3041         .init = ip6_route_net_init_late,
3042         .exit = ip6_route_net_exit_late,
3043 };
3044
3045 static struct notifier_block ip6_route_dev_notifier = {
3046         .notifier_call = ip6_route_dev_notify,
3047         .priority = 0,
3048 };
3049
3050 int __init ip6_route_init(void)
3051 {
3052         int ret;
3053
3054         ret = -ENOMEM;
3055         ip6_dst_ops_template.kmem_cachep =
3056                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3057                                   SLAB_HWCACHE_ALIGN, NULL);
3058         if (!ip6_dst_ops_template.kmem_cachep)
3059                 goto out;
3060
3061         ret = dst_entries_init(&ip6_dst_blackhole_ops);
3062         if (ret)
3063                 goto out_kmem_cache;
3064
3065         ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3066         if (ret)
3067                 goto out_dst_entries;
3068
3069         ret = register_pernet_subsys(&ip6_route_net_ops);
3070         if (ret)
3071                 goto out_register_inetpeer;
3072
3073         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3074
3075         /* Registering of the loopback is done before this portion of code,
3076          * the loopback reference in rt6_info will not be taken, do it
3077          * manually for init_net */
3078         init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3079         init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3080   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3081         init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3082         init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3083         init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3084         init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3085   #endif
3086         ret = fib6_init();
3087         if (ret)
3088                 goto out_register_subsys;
3089
3090         ret = xfrm6_init();
3091         if (ret)
3092                 goto out_fib6_init;
3093
3094         ret = fib6_rules_init();
3095         if (ret)
3096                 goto xfrm6_init;
3097
3098         ret = register_pernet_subsys(&ip6_route_net_late_ops);
3099         if (ret)
3100                 goto fib6_rules_init;
3101
3102         ret = -ENOBUFS;
3103         if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3104             __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3105             __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3106                 goto out_register_late_subsys;
3107
3108         ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3109         if (ret)
3110                 goto out_register_late_subsys;
3111
3112 out:
3113         return ret;
3114
3115 out_register_late_subsys:
3116         unregister_pernet_subsys(&ip6_route_net_late_ops);
3117 fib6_rules_init:
3118         fib6_rules_cleanup();
3119 xfrm6_init:
3120         xfrm6_fini();
3121 out_fib6_init:
3122         fib6_gc_cleanup();
3123 out_register_subsys:
3124         unregister_pernet_subsys(&ip6_route_net_ops);
3125 out_register_inetpeer:
3126         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3127 out_dst_entries:
3128         dst_entries_destroy(&ip6_dst_blackhole_ops);
3129 out_kmem_cache:
3130         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3131         goto out;
3132 }
3133
3134 void ip6_route_cleanup(void)
3135 {
3136         unregister_netdevice_notifier(&ip6_route_dev_notifier);
3137         unregister_pernet_subsys(&ip6_route_net_late_ops);
3138         fib6_rules_cleanup();
3139         xfrm6_fini();
3140         fib6_gc_cleanup();
3141         unregister_pernet_subsys(&ipv6_inetpeer_ops);
3142         unregister_pernet_subsys(&ip6_route_net_ops);
3143         dst_entries_destroy(&ip6_dst_blackhole_ops);
3144         kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3145 }