]> Pileus Git - ~andy/linux/blobdiff - net/ipv6/route.c
net: Add initial_ref arg to dst_alloc().
[~andy/linux] / net / ipv6 / route.c
index 7659d6f16e6bae4b7e8a1701161181f627c0e41f..7946b53692da1ae28d5c882c8b06e913b684cd5e 100644 (file)
 #define RT6_TRACE(x...) do { ; } while (0)
 #endif
 
-#define CLONE_OFFLINK_ROUTE 0
-
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 static struct dst_entry        *ip6_dst_check(struct dst_entry *dst, u32 cookie);
+static unsigned int     ip6_default_advmss(const struct dst_entry *dst);
+static unsigned int     ip6_default_mtu(const struct dst_entry *dst);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void            ip6_dst_destroy(struct dst_entry *);
 static void            ip6_dst_ifdown(struct dst_entry *,
@@ -97,12 +97,45 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
                                           struct in6_addr *gwaddr, int ifindex);
 #endif
 
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+       struct rt6_info *rt = (struct rt6_info *) dst;
+       struct inet_peer *peer;
+       u32 *p = NULL;
+
+       if (!rt->rt6i_peer)
+               rt6_bind_peer(rt, 1);
+
+       peer = rt->rt6i_peer;
+       if (peer) {
+               u32 *old_p = __DST_METRICS_PTR(old);
+               unsigned long prev, new;
+
+               p = peer->metrics;
+               if (inet_metrics_new(peer))
+                       memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+               new = (unsigned long) p;
+               prev = cmpxchg(&dst->_metrics, old, new);
+
+               if (prev != old) {
+                       p = __DST_METRICS_PTR(prev);
+                       if (prev & DST_METRICS_READ_ONLY)
+                               p = NULL;
+               }
+       }
+       return p;
+}
+
 static struct dst_ops ip6_dst_ops_template = {
        .family                 =       AF_INET6,
        .protocol               =       cpu_to_be16(ETH_P_IPV6),
        .gc                     =       ip6_dst_gc,
        .gc_thresh              =       1024,
        .check                  =       ip6_dst_check,
+       .default_advmss         =       ip6_default_advmss,
+       .default_mtu            =       ip6_default_mtu,
+       .cow_metrics            =       ipv6_cow_metrics,
        .destroy                =       ip6_dst_destroy,
        .ifdown                 =       ip6_dst_ifdown,
        .negative_advice        =       ip6_negative_advice,
@@ -111,6 +144,11 @@ static struct dst_ops ip6_dst_ops_template = {
        .local_out              =       __ip6_local_out,
 };
 
+static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
+{
+       return 0;
+}
+
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
@@ -120,16 +158,20 @@ static struct dst_ops ip6_dst_blackhole_ops = {
        .protocol               =       cpu_to_be16(ETH_P_IPV6),
        .destroy                =       ip6_dst_destroy,
        .check                  =       ip6_dst_check,
+       .default_mtu            =       ip6_blackhole_default_mtu,
        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 };
 
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+       [RTAX_HOPLIMIT - 1] = 255,
+};
+
 static struct rt6_info ip6_null_entry_template = {
        .dst = {
                .__refcnt       = ATOMIC_INIT(1),
                .__use          = 1,
                .obsolete       = -1,
                .error          = -ENETUNREACH,
-               .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
                .input          = ip6_pkt_discard,
                .output         = ip6_pkt_discard_out,
        },
@@ -150,7 +192,6 @@ static struct rt6_info ip6_prohibit_entry_template = {
                .__use          = 1,
                .obsolete       = -1,
                .error          = -EACCES,
-               .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
                .input          = ip6_pkt_prohibit,
                .output         = ip6_pkt_prohibit_out,
        },
@@ -166,7 +207,6 @@ static struct rt6_info ip6_blk_hole_entry_template = {
                .__use          = 1,
                .obsolete       = -1,
                .error          = -EINVAL,
-               .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
                .input          = dst_discard,
                .output         = dst_discard,
        },
@@ -181,18 +221,41 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
 {
-       return (struct rt6_info *)dst_alloc(ops);
+       return (struct rt6_info *)dst_alloc(ops, 0);
 }
 
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
        struct rt6_info *rt = (struct rt6_info *)dst;
        struct inet6_dev *idev = rt->rt6i_idev;
+       struct inet_peer *peer = rt->rt6i_peer;
 
        if (idev != NULL) {
                rt->rt6i_idev = NULL;
                in6_dev_put(idev);
        }
+       if (peer) {
+               rt->rt6i_peer = NULL;
+               inet_putpeer(peer);
+       }
+}
+
+static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
+
+static u32 rt6_peer_genid(void)
+{
+       return atomic_read(&__rt6_peer_genid);
+}
+
+void rt6_bind_peer(struct rt6_info *rt, int create)
+{
+       struct inet_peer *peer;
+
+       peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
+       if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
+               inet_putpeer(peer);
+       else
+               rt->rt6i_peer_genid = rt6_peer_genid();
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -558,11 +621,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
 {
        struct flowi fl = {
                .oif = oif,
-               .nl_u = {
-                       .ip6_u = {
-                               .daddr = *daddr,
-                       },
-               },
+               .fl6_dst = *daddr,
        };
        struct dst_entry *dst;
        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
@@ -723,13 +782,8 @@ restart:
 
        if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
                nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
-       else {
-#if CLONE_OFFLINK_ROUTE
+       else
                nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
-               goto out2;
-#endif
-       }
 
        dst_release(&rt->dst);
        rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -778,13 +832,9 @@ void ip6_route_input(struct sk_buff *skb)
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct flowi fl = {
                .iif = skb->dev->ifindex,
-               .nl_u = {
-                       .ip6_u = {
-                               .daddr = iph->daddr,
-                               .saddr = iph->saddr,
-                               .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
-                       },
-               },
+               .fl6_dst = iph->daddr,
+               .fl6_src = iph->saddr,
+               .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
                .mark = skb->mark,
                .proto = iph->nexthdr,
        };
@@ -823,18 +873,17 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 {
        struct rt6_info *ort = (struct rt6_info *) *dstp;
        struct rt6_info *rt = (struct rt6_info *)
-               dst_alloc(&ip6_dst_blackhole_ops);
+               dst_alloc(&ip6_dst_blackhole_ops, 1);
        struct dst_entry *new = NULL;
 
        if (rt) {
                new = &rt->dst;
 
-               atomic_set(&new->__refcnt, 1);
                new->__use = 1;
                new->input = dst_discard;
                new->output = dst_discard;
 
-               memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+               dst_copy_metrics(new, &ort->dst);
                new->dev = ort->dst.dev;
                if (new->dev)
                        dev_hold(new->dev);
@@ -871,9 +920,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
        rt = (struct rt6_info *) dst;
 
-       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
+               if (rt->rt6i_peer_genid != rt6_peer_genid()) {
+                       if (!rt->rt6i_peer)
+                               rt6_bind_peer(rt, 0);
+                       rt->rt6i_peer_genid = rt6_peer_genid();
+               }
                return dst;
-
+       }
        return NULL;
 }
 
@@ -918,18 +972,21 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
        if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
                rt6->rt6i_flags |= RTF_MODIFIED;
                if (mtu < IPV6_MIN_MTU) {
+                       u32 features = dst_metric(dst, RTAX_FEATURES);
                        mtu = IPV6_MIN_MTU;
-                       dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+                       features |= RTAX_FEATURE_ALLFRAG;
+                       dst_metric_set(dst, RTAX_FEATURES, features);
                }
-               dst->metrics[RTAX_MTU-1] = mtu;
-               call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
+               dst_metric_set(dst, RTAX_MTU, mtu);
        }
 }
 
-static int ipv6_get_mtu(struct net_device *dev);
-
-static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
+static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 {
+       struct net_device *dev = dst->dev;
+       unsigned int mtu = dst_mtu(dst);
+       struct net *net = dev_net(dev);
+
        mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
 
        if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
@@ -946,6 +1003,20 @@ static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
        return mtu;
 }
 
+static unsigned int ip6_default_mtu(const struct dst_entry *dst)
+{
+       unsigned int mtu = IPV6_MIN_MTU;
+       struct inet6_dev *idev;
+
+       rcu_read_lock();
+       idev = __in6_dev_get(dst->dev);
+       if (idev)
+               mtu = idev->cnf.mtu6;
+       rcu_read_unlock();
+
+       return mtu;
+}
+
 static struct dst_entry *icmp6_dst_gc_list;
 static DEFINE_SPINLOCK(icmp6_dst_lock);
 
@@ -979,9 +1050,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
        rt->rt6i_idev     = idev;
        rt->rt6i_nexthop  = neigh;
        atomic_set(&rt->dst.__refcnt, 1);
-       rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
-       rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-       rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+       dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
        rt->dst.output  = ip6_output;
 
 #if 0  /* there's no chance to use these for ndisc */
@@ -1080,23 +1149,10 @@ out:
    Remove it only when all the things will work!
  */
 
-static int ipv6_get_mtu(struct net_device *dev)
-{
-       int mtu = IPV6_MIN_MTU;
-       struct inet6_dev *idev;
-
-       rcu_read_lock();
-       idev = __in6_dev_get(dev);
-       if (idev)
-               mtu = idev->cnf.mtu6;
-       rcu_read_unlock();
-       return mtu;
-}
-
 int ip6_dst_hoplimit(struct dst_entry *dst)
 {
-       int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
-       if (hoplimit < 0) {
+       int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+       if (hoplimit == 0) {
                struct net_device *dev = dst->dev;
                struct inet6_dev *idev;
 
@@ -1110,6 +1166,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
        }
        return hoplimit;
 }
+EXPORT_SYMBOL(ip6_dst_hoplimit);
 
 /*
  *
@@ -1295,17 +1352,11 @@ install_route:
                                        goto out;
                                }
 
-                               rt->dst.metrics[type - 1] = nla_get_u32(nla);
+                               dst_metric_set(&rt->dst, type, nla_get_u32(nla));
                        }
                }
        }
 
-       if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
-               rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
-       if (!dst_mtu(&rt->dst))
-               rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
-       if (!dst_metric(&rt->dst, RTAX_ADVMSS))
-               rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
        rt->dst.dev = dev;
        rt->rt6i_idev = idev;
        rt->rt6i_table = table;
@@ -1463,12 +1514,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
        struct ip6rd_flowi rdfl = {
                .fl = {
                        .oif = dev->ifindex,
-                       .nl_u = {
-                               .ip6_u = {
-                                       .daddr = *dest,
-                                       .saddr = *src,
-                               },
-                       },
+                       .fl6_dst = *dest,
+                       .fl6_src = *src,
                },
        };
 
@@ -1534,10 +1581,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
        ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
        nrt->rt6i_nexthop = neigh_clone(neigh);
-       /* Reset pmtu, it may be better */
-       nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-       nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
-                                                       dst_mtu(&nrt->dst));
 
        if (ip6_ins_rt(nrt))
                goto out;
@@ -1601,9 +1644,12 @@ again:
           would return automatically.
         */
        if (rt->rt6i_flags & RTF_CACHE) {
-               rt->dst.metrics[RTAX_MTU-1] = pmtu;
-               if (allfrag)
-                       rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+               dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
+               if (allfrag) {
+                       u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
+                       features |= RTAX_FEATURE_ALLFRAG;
+                       dst_metric_set(&rt->dst, RTAX_FEATURES, features);
+               }
                dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
                rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
                goto out;
@@ -1620,9 +1666,12 @@ again:
                nrt = rt6_alloc_clone(rt, daddr);
 
        if (nrt) {
-               nrt->dst.metrics[RTAX_MTU-1] = pmtu;
-               if (allfrag)
-                       nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+               dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
+               if (allfrag) {
+                       u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
+                       features |= RTAX_FEATURE_ALLFRAG;
+                       dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
+               }
 
                /* According to RFC 1981, detecting PMTU increase shouldn't be
                 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1673,7 +1722,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
                rt->dst.input = ort->dst.input;
                rt->dst.output = ort->dst.output;
 
-               memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+               dst_copy_metrics(&rt->dst, &ort->dst);
                rt->dst.error = ort->dst.error;
                rt->dst.dev = ort->dst.dev;
                if (rt->dst.dev)
@@ -1965,9 +2014,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
        rt->dst.output = ip6_output;
        rt->rt6i_dev = net->loopback_dev;
        rt->rt6i_idev = idev;
-       rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-       rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
-       rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+       dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
        rt->dst.obsolete = -1;
 
        rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
@@ -2004,11 +2051,11 @@ struct arg_dev_net {
 
 static int fib6_ifdown(struct rt6_info *rt, void *arg)
 {
-       struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
-       struct net *net = ((struct arg_dev_net *)arg)->net;
+       const struct arg_dev_net *adn = arg;
+       const struct net_device *dev = adn->dev;
 
-       if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
-           rt != net->ipv6.ip6_null_entry) {
+       if ((rt->rt6i_dev == dev || dev == NULL) &&
+           rt != adn->net->ipv6.ip6_null_entry) {
                RT6_TRACE("deleted by ifdown %p\n", rt);
                return -1;
        }
@@ -2036,7 +2083,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 {
        struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
        struct inet6_dev *idev;
-       struct net *net = dev_net(arg->dev);
 
        /* In IPv6 pmtu discovery is not optional,
           so that RTAX_MTU lock cannot disable it.
@@ -2067,8 +2113,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
            (dst_mtu(&rt->dst) >= arg->mtu ||
             (dst_mtu(&rt->dst) < arg->mtu &&
              dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
-               rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
-               rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+               dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
        }
        return 0;
 }
@@ -2294,7 +2339,7 @@ static int rt6_fill_node(struct net *net,
                        NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
        }
 
-       if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
+       if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
                goto nla_put_failure;
 
        if (rt->dst.neighbour)
@@ -2470,8 +2515,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 
 #ifdef CONFIG_PROC_FS
 
-#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
-
 struct rt6_proc_arg
 {
        char *buffer;
@@ -2687,6 +2730,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_null_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_null_entry;
        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+       dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+                        ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2697,6 +2742,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_prohibit_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+       dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+                        ip6_template_metrics, true);
 
        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2706,6 +2753,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_blk_hole_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
+       dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+                        ip6_template_metrics, true);
 #endif
 
        net->ipv6.sysctl.flush_delay = 0;