]> Pileus Git - ~andy/linux/blob - net/ipv4/ip_tunnel.c
ip_tunnel: Move ip_tunnel_get_stats64 into ip_tunnel_core.c
[~andy/linux] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 static void tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103
104 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
105 {
106         struct dst_entry *dst;
107
108         rcu_read_lock();
109         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
110         if (dst) {
111                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
112                         rcu_read_unlock();
113                         tunnel_dst_reset(t);
114                         return NULL;
115                 }
116                 dst_hold(dst);
117         }
118         rcu_read_unlock();
119         return (struct rtable *)dst;
120 }
121
122 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
123                                 __be16 flags, __be32 key)
124 {
125         if (p->i_flags & TUNNEL_KEY) {
126                 if (flags & TUNNEL_KEY)
127                         return key == p->i_key;
128                 else
129                         /* key expected, none present */
130                         return false;
131         } else
132                 return !(flags & TUNNEL_KEY);
133 }
134
135 /* Fallback tunnel: no source, no destination, no key, no options
136
137    Tunnel hash table:
138    We require exact key match i.e. if a key is present in packet
139    it will match only tunnel with the same key; if it is not present,
140    it will match only keyless tunnel.
141
142    All keysless packets, if not matched configured keyless tunnels
143    will match fallback tunnel.
144    Given src, dst and key, find appropriate for input tunnel.
145 */
146 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
147                                    int link, __be16 flags,
148                                    __be32 remote, __be32 local,
149                                    __be32 key)
150 {
151         unsigned int hash;
152         struct ip_tunnel *t, *cand = NULL;
153         struct hlist_head *head;
154
155         hash = ip_tunnel_hash(key, remote);
156         head = &itn->tunnels[hash];
157
158         hlist_for_each_entry_rcu(t, head, hash_node) {
159                 if (local != t->parms.iph.saddr ||
160                     remote != t->parms.iph.daddr ||
161                     !(t->dev->flags & IFF_UP))
162                         continue;
163
164                 if (!ip_tunnel_key_match(&t->parms, flags, key))
165                         continue;
166
167                 if (t->parms.link == link)
168                         return t;
169                 else
170                         cand = t;
171         }
172
173         hlist_for_each_entry_rcu(t, head, hash_node) {
174                 if (remote != t->parms.iph.daddr ||
175                     !(t->dev->flags & IFF_UP))
176                         continue;
177
178                 if (!ip_tunnel_key_match(&t->parms, flags, key))
179                         continue;
180
181                 if (t->parms.link == link)
182                         return t;
183                 else if (!cand)
184                         cand = t;
185         }
186
187         hash = ip_tunnel_hash(key, 0);
188         head = &itn->tunnels[hash];
189
190         hlist_for_each_entry_rcu(t, head, hash_node) {
191                 if ((local != t->parms.iph.saddr &&
192                      (local != t->parms.iph.daddr ||
193                       !ipv4_is_multicast(local))) ||
194                     !(t->dev->flags & IFF_UP))
195                         continue;
196
197                 if (!ip_tunnel_key_match(&t->parms, flags, key))
198                         continue;
199
200                 if (t->parms.link == link)
201                         return t;
202                 else if (!cand)
203                         cand = t;
204         }
205
206         if (flags & TUNNEL_NO_KEY)
207                 goto skip_key_lookup;
208
209         hlist_for_each_entry_rcu(t, head, hash_node) {
210                 if (t->parms.i_key != key ||
211                     !(t->dev->flags & IFF_UP))
212                         continue;
213
214                 if (t->parms.link == link)
215                         return t;
216                 else if (!cand)
217                         cand = t;
218         }
219
220 skip_key_lookup:
221         if (cand)
222                 return cand;
223
224         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
225                 return netdev_priv(itn->fb_tunnel_dev);
226
227
228         return NULL;
229 }
230 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
231
232 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
233                                     struct ip_tunnel_parm *parms)
234 {
235         unsigned int h;
236         __be32 remote;
237
238         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
239                 remote = parms->iph.daddr;
240         else
241                 remote = 0;
242
243         h = ip_tunnel_hash(parms->i_key, remote);
244         return &itn->tunnels[h];
245 }
246
247 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
248 {
249         struct hlist_head *head = ip_bucket(itn, &t->parms);
250
251         hlist_add_head_rcu(&t->hash_node, head);
252 }
253
254 static void ip_tunnel_del(struct ip_tunnel *t)
255 {
256         hlist_del_init_rcu(&t->hash_node);
257 }
258
259 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
260                                         struct ip_tunnel_parm *parms,
261                                         int type)
262 {
263         __be32 remote = parms->iph.daddr;
264         __be32 local = parms->iph.saddr;
265         __be32 key = parms->i_key;
266         int link = parms->link;
267         struct ip_tunnel *t = NULL;
268         struct hlist_head *head = ip_bucket(itn, parms);
269
270         hlist_for_each_entry_rcu(t, head, hash_node) {
271                 if (local == t->parms.iph.saddr &&
272                     remote == t->parms.iph.daddr &&
273                     key == t->parms.i_key &&
274                     link == t->parms.link &&
275                     type == t->dev->type)
276                         break;
277         }
278         return t;
279 }
280
281 static struct net_device *__ip_tunnel_create(struct net *net,
282                                              const struct rtnl_link_ops *ops,
283                                              struct ip_tunnel_parm *parms)
284 {
285         int err;
286         struct ip_tunnel *tunnel;
287         struct net_device *dev;
288         char name[IFNAMSIZ];
289
290         if (parms->name[0])
291                 strlcpy(name, parms->name, IFNAMSIZ);
292         else {
293                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
294                         err = -E2BIG;
295                         goto failed;
296                 }
297                 strlcpy(name, ops->kind, IFNAMSIZ);
298                 strncat(name, "%d", 2);
299         }
300
301         ASSERT_RTNL();
302         dev = alloc_netdev(ops->priv_size, name, ops->setup);
303         if (!dev) {
304                 err = -ENOMEM;
305                 goto failed;
306         }
307         dev_net_set(dev, net);
308
309         dev->rtnl_link_ops = ops;
310
311         tunnel = netdev_priv(dev);
312         tunnel->parms = *parms;
313         tunnel->net = net;
314
315         err = register_netdevice(dev);
316         if (err)
317                 goto failed_free;
318
319         return dev;
320
321 failed_free:
322         free_netdev(dev);
323 failed:
324         return ERR_PTR(err);
325 }
326
327 static inline void init_tunnel_flow(struct flowi4 *fl4,
328                                     int proto,
329                                     __be32 daddr, __be32 saddr,
330                                     __be32 key, __u8 tos, int oif)
331 {
332         memset(fl4, 0, sizeof(*fl4));
333         fl4->flowi4_oif = oif;
334         fl4->daddr = daddr;
335         fl4->saddr = saddr;
336         fl4->flowi4_tos = tos;
337         fl4->flowi4_proto = proto;
338         fl4->fl4_gre_key = key;
339 }
340
341 static int ip_tunnel_bind_dev(struct net_device *dev)
342 {
343         struct net_device *tdev = NULL;
344         struct ip_tunnel *tunnel = netdev_priv(dev);
345         const struct iphdr *iph;
346         int hlen = LL_MAX_HEADER;
347         int mtu = ETH_DATA_LEN;
348         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
349
350         iph = &tunnel->parms.iph;
351
352         /* Guess output device to choose reasonable mtu and needed_headroom */
353         if (iph->daddr) {
354                 struct flowi4 fl4;
355                 struct rtable *rt;
356
357                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
358                                  iph->saddr, tunnel->parms.o_key,
359                                  RT_TOS(iph->tos), tunnel->parms.link);
360                 rt = ip_route_output_key(tunnel->net, &fl4);
361
362                 if (!IS_ERR(rt)) {
363                         tdev = rt->dst.dev;
364                         tunnel_dst_set(tunnel, &rt->dst);
365                         ip_rt_put(rt);
366                 }
367                 if (dev->type != ARPHRD_ETHER)
368                         dev->flags |= IFF_POINTOPOINT;
369         }
370
371         if (!tdev && tunnel->parms.link)
372                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
373
374         if (tdev) {
375                 hlen = tdev->hard_header_len + tdev->needed_headroom;
376                 mtu = tdev->mtu;
377         }
378         dev->iflink = tunnel->parms.link;
379
380         dev->needed_headroom = t_hlen + hlen;
381         mtu -= (dev->hard_header_len + t_hlen);
382
383         if (mtu < 68)
384                 mtu = 68;
385
386         return mtu;
387 }
388
389 static struct ip_tunnel *ip_tunnel_create(struct net *net,
390                                           struct ip_tunnel_net *itn,
391                                           struct ip_tunnel_parm *parms)
392 {
393         struct ip_tunnel *nt, *fbt;
394         struct net_device *dev;
395
396         BUG_ON(!itn->fb_tunnel_dev);
397         fbt = netdev_priv(itn->fb_tunnel_dev);
398         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
399         if (IS_ERR(dev))
400                 return NULL;
401
402         dev->mtu = ip_tunnel_bind_dev(dev);
403
404         nt = netdev_priv(dev);
405         ip_tunnel_add(itn, nt);
406         return nt;
407 }
408
409 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
410                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
411 {
412         struct pcpu_sw_netstats *tstats;
413         const struct iphdr *iph = ip_hdr(skb);
414         int err;
415
416 #ifdef CONFIG_NET_IPGRE_BROADCAST
417         if (ipv4_is_multicast(iph->daddr)) {
418                 /* Looped back packet, drop it! */
419                 if (rt_is_output_route(skb_rtable(skb)))
420                         goto drop;
421                 tunnel->dev->stats.multicast++;
422                 skb->pkt_type = PACKET_BROADCAST;
423         }
424 #endif
425
426         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
427              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
428                 tunnel->dev->stats.rx_crc_errors++;
429                 tunnel->dev->stats.rx_errors++;
430                 goto drop;
431         }
432
433         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
434                 if (!(tpi->flags&TUNNEL_SEQ) ||
435                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
436                         tunnel->dev->stats.rx_fifo_errors++;
437                         tunnel->dev->stats.rx_errors++;
438                         goto drop;
439                 }
440                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
441         }
442
443         err = IP_ECN_decapsulate(iph, skb);
444         if (unlikely(err)) {
445                 if (log_ecn_error)
446                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
447                                         &iph->saddr, iph->tos);
448                 if (err > 1) {
449                         ++tunnel->dev->stats.rx_frame_errors;
450                         ++tunnel->dev->stats.rx_errors;
451                         goto drop;
452                 }
453         }
454
455         tstats = this_cpu_ptr(tunnel->dev->tstats);
456         u64_stats_update_begin(&tstats->syncp);
457         tstats->rx_packets++;
458         tstats->rx_bytes += skb->len;
459         u64_stats_update_end(&tstats->syncp);
460
461         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
462
463         if (tunnel->dev->type == ARPHRD_ETHER) {
464                 skb->protocol = eth_type_trans(skb, tunnel->dev);
465                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
466         } else {
467                 skb->dev = tunnel->dev;
468         }
469
470         gro_cells_receive(&tunnel->gro_cells, skb);
471         return 0;
472
473 drop:
474         kfree_skb(skb);
475         return 0;
476 }
477 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
478
479 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
480                             struct rtable *rt, __be16 df)
481 {
482         struct ip_tunnel *tunnel = netdev_priv(dev);
483         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
484         int mtu;
485
486         if (df)
487                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
488                                         - sizeof(struct iphdr) - tunnel->hlen;
489         else
490                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
491
492         if (skb_dst(skb))
493                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
494
495         if (skb->protocol == htons(ETH_P_IP)) {
496                 if (!skb_is_gso(skb) &&
497                     (df & htons(IP_DF)) && mtu < pkt_size) {
498                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
499                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
500                         return -E2BIG;
501                 }
502         }
503 #if IS_ENABLED(CONFIG_IPV6)
504         else if (skb->protocol == htons(ETH_P_IPV6)) {
505                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
506
507                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
508                            mtu >= IPV6_MIN_MTU) {
509                         if ((tunnel->parms.iph.daddr &&
510                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
511                             rt6->rt6i_dst.plen == 128) {
512                                 rt6->rt6i_flags |= RTF_MODIFIED;
513                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
514                         }
515                 }
516
517                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
518                                         mtu < pkt_size) {
519                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520                         return -E2BIG;
521                 }
522         }
523 #endif
524         return 0;
525 }
526
527 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
528                     const struct iphdr *tnl_params, const u8 protocol)
529 {
530         struct ip_tunnel *tunnel = netdev_priv(dev);
531         const struct iphdr *inner_iph;
532         struct flowi4 fl4;
533         u8     tos, ttl;
534         __be16 df;
535         struct rtable *rt;              /* Route to the other host */
536         unsigned int max_headroom;      /* The extra header space needed */
537         __be32 dst;
538         int err;
539         bool connected = true;
540
541         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
542
543         dst = tnl_params->daddr;
544         if (dst == 0) {
545                 /* NBMA tunnel */
546
547                 if (skb_dst(skb) == NULL) {
548                         dev->stats.tx_fifo_errors++;
549                         goto tx_error;
550                 }
551
552                 if (skb->protocol == htons(ETH_P_IP)) {
553                         rt = skb_rtable(skb);
554                         dst = rt_nexthop(rt, inner_iph->daddr);
555                 }
556 #if IS_ENABLED(CONFIG_IPV6)
557                 else if (skb->protocol == htons(ETH_P_IPV6)) {
558                         const struct in6_addr *addr6;
559                         struct neighbour *neigh;
560                         bool do_tx_error_icmp;
561                         int addr_type;
562
563                         neigh = dst_neigh_lookup(skb_dst(skb),
564                                                  &ipv6_hdr(skb)->daddr);
565                         if (neigh == NULL)
566                                 goto tx_error;
567
568                         addr6 = (const struct in6_addr *)&neigh->primary_key;
569                         addr_type = ipv6_addr_type(addr6);
570
571                         if (addr_type == IPV6_ADDR_ANY) {
572                                 addr6 = &ipv6_hdr(skb)->daddr;
573                                 addr_type = ipv6_addr_type(addr6);
574                         }
575
576                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
577                                 do_tx_error_icmp = true;
578                         else {
579                                 do_tx_error_icmp = false;
580                                 dst = addr6->s6_addr32[3];
581                         }
582                         neigh_release(neigh);
583                         if (do_tx_error_icmp)
584                                 goto tx_error_icmp;
585                 }
586 #endif
587                 else
588                         goto tx_error;
589
590                 connected = false;
591         }
592
593         tos = tnl_params->tos;
594         if (tos & 0x1) {
595                 tos &= ~0x1;
596                 if (skb->protocol == htons(ETH_P_IP)) {
597                         tos = inner_iph->tos;
598                         connected = false;
599                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
600                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
601                         connected = false;
602                 }
603         }
604
605         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
606                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
607
608         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
609
610         if (!rt) {
611                 rt = ip_route_output_key(tunnel->net, &fl4);
612
613                 if (IS_ERR(rt)) {
614                         dev->stats.tx_carrier_errors++;
615                         goto tx_error;
616                 }
617                 if (connected)
618                         tunnel_dst_set(tunnel, &rt->dst);
619         }
620
621         if (rt->dst.dev == dev) {
622                 ip_rt_put(rt);
623                 dev->stats.collisions++;
624                 goto tx_error;
625         }
626
627         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
628                 ip_rt_put(rt);
629                 goto tx_error;
630         }
631
632         if (tunnel->err_count > 0) {
633                 if (time_before(jiffies,
634                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
635                         tunnel->err_count--;
636
637                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
638                         dst_link_failure(skb);
639                 } else
640                         tunnel->err_count = 0;
641         }
642
643         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
644         ttl = tnl_params->ttl;
645         if (ttl == 0) {
646                 if (skb->protocol == htons(ETH_P_IP))
647                         ttl = inner_iph->ttl;
648 #if IS_ENABLED(CONFIG_IPV6)
649                 else if (skb->protocol == htons(ETH_P_IPV6))
650                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
651 #endif
652                 else
653                         ttl = ip4_dst_hoplimit(&rt->dst);
654         }
655
656         df = tnl_params->frag_off;
657         if (skb->protocol == htons(ETH_P_IP))
658                 df |= (inner_iph->frag_off&htons(IP_DF));
659
660         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
661                         + rt->dst.header_len;
662         if (max_headroom > dev->needed_headroom)
663                 dev->needed_headroom = max_headroom;
664
665         if (skb_cow_head(skb, dev->needed_headroom)) {
666                 dev->stats.tx_dropped++;
667                 kfree_skb(skb);
668                 return;
669         }
670
671         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
672                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
673         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
674
675         return;
676
677 #if IS_ENABLED(CONFIG_IPV6)
678 tx_error_icmp:
679         dst_link_failure(skb);
680 #endif
681 tx_error:
682         dev->stats.tx_errors++;
683         kfree_skb(skb);
684 }
685 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
686
687 static void ip_tunnel_update(struct ip_tunnel_net *itn,
688                              struct ip_tunnel *t,
689                              struct net_device *dev,
690                              struct ip_tunnel_parm *p,
691                              bool set_mtu)
692 {
693         ip_tunnel_del(t);
694         t->parms.iph.saddr = p->iph.saddr;
695         t->parms.iph.daddr = p->iph.daddr;
696         t->parms.i_key = p->i_key;
697         t->parms.o_key = p->o_key;
698         if (dev->type != ARPHRD_ETHER) {
699                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
700                 memcpy(dev->broadcast, &p->iph.daddr, 4);
701         }
702         ip_tunnel_add(itn, t);
703
704         t->parms.iph.ttl = p->iph.ttl;
705         t->parms.iph.tos = p->iph.tos;
706         t->parms.iph.frag_off = p->iph.frag_off;
707
708         if (t->parms.link != p->link) {
709                 int mtu;
710
711                 t->parms.link = p->link;
712                 mtu = ip_tunnel_bind_dev(dev);
713                 if (set_mtu)
714                         dev->mtu = mtu;
715         }
716         tunnel_dst_reset_all(t);
717         netdev_state_change(dev);
718 }
719
720 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
721 {
722         int err = 0;
723         struct ip_tunnel *t;
724         struct net *net = dev_net(dev);
725         struct ip_tunnel *tunnel = netdev_priv(dev);
726         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
727
728         BUG_ON(!itn->fb_tunnel_dev);
729         switch (cmd) {
730         case SIOCGETTUNNEL:
731                 t = NULL;
732                 if (dev == itn->fb_tunnel_dev)
733                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
734                 if (t == NULL)
735                         t = netdev_priv(dev);
736                 memcpy(p, &t->parms, sizeof(*p));
737                 break;
738
739         case SIOCADDTUNNEL:
740         case SIOCCHGTUNNEL:
741                 err = -EPERM;
742                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
743                         goto done;
744                 if (p->iph.ttl)
745                         p->iph.frag_off |= htons(IP_DF);
746                 if (!(p->i_flags&TUNNEL_KEY))
747                         p->i_key = 0;
748                 if (!(p->o_flags&TUNNEL_KEY))
749                         p->o_key = 0;
750
751                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
752
753                 if (!t && (cmd == SIOCADDTUNNEL))
754                         t = ip_tunnel_create(net, itn, p);
755
756                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
757                         if (t != NULL) {
758                                 if (t->dev != dev) {
759                                         err = -EEXIST;
760                                         break;
761                                 }
762                         } else {
763                                 unsigned int nflags = 0;
764
765                                 if (ipv4_is_multicast(p->iph.daddr))
766                                         nflags = IFF_BROADCAST;
767                                 else if (p->iph.daddr)
768                                         nflags = IFF_POINTOPOINT;
769
770                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
771                                         err = -EINVAL;
772                                         break;
773                                 }
774
775                                 t = netdev_priv(dev);
776                         }
777                 }
778
779                 if (t) {
780                         err = 0;
781                         ip_tunnel_update(itn, t, dev, p, true);
782                 } else
783                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
784                 break;
785
786         case SIOCDELTUNNEL:
787                 err = -EPERM;
788                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
789                         goto done;
790
791                 if (dev == itn->fb_tunnel_dev) {
792                         err = -ENOENT;
793                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
794                         if (t == NULL)
795                                 goto done;
796                         err = -EPERM;
797                         if (t == netdev_priv(itn->fb_tunnel_dev))
798                                 goto done;
799                         dev = t->dev;
800                 }
801                 unregister_netdevice(dev);
802                 err = 0;
803                 break;
804
805         default:
806                 err = -EINVAL;
807         }
808
809 done:
810         return err;
811 }
812 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
813
814 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
815 {
816         struct ip_tunnel *tunnel = netdev_priv(dev);
817         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
818
819         if (new_mtu < 68 ||
820             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
821                 return -EINVAL;
822         dev->mtu = new_mtu;
823         return 0;
824 }
825 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
826
827 static void ip_tunnel_dev_free(struct net_device *dev)
828 {
829         struct ip_tunnel *tunnel = netdev_priv(dev);
830
831         gro_cells_destroy(&tunnel->gro_cells);
832         free_percpu(tunnel->dst_cache);
833         free_percpu(dev->tstats);
834         free_netdev(dev);
835 }
836
837 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
838 {
839         struct ip_tunnel *tunnel = netdev_priv(dev);
840         struct ip_tunnel_net *itn;
841
842         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
843
844         if (itn->fb_tunnel_dev != dev) {
845                 ip_tunnel_del(netdev_priv(dev));
846                 unregister_netdevice_queue(dev, head);
847         }
848 }
849 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
850
851 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
852                                   struct rtnl_link_ops *ops, char *devname)
853 {
854         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
855         struct ip_tunnel_parm parms;
856         unsigned int i;
857
858         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
859                 INIT_HLIST_HEAD(&itn->tunnels[i]);
860
861         if (!ops) {
862                 itn->fb_tunnel_dev = NULL;
863                 return 0;
864         }
865
866         memset(&parms, 0, sizeof(parms));
867         if (devname)
868                 strlcpy(parms.name, devname, IFNAMSIZ);
869
870         rtnl_lock();
871         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
872         /* FB netdevice is special: we have one, and only one per netns.
873          * Allowing to move it to another netns is clearly unsafe.
874          */
875         if (!IS_ERR(itn->fb_tunnel_dev)) {
876                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
877                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
878         }
879         rtnl_unlock();
880
881         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
882 }
883 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
884
885 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
886                               struct rtnl_link_ops *ops)
887 {
888         struct net *net = dev_net(itn->fb_tunnel_dev);
889         struct net_device *dev, *aux;
890         int h;
891
892         for_each_netdev_safe(net, dev, aux)
893                 if (dev->rtnl_link_ops == ops)
894                         unregister_netdevice_queue(dev, head);
895
896         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
897                 struct ip_tunnel *t;
898                 struct hlist_node *n;
899                 struct hlist_head *thead = &itn->tunnels[h];
900
901                 hlist_for_each_entry_safe(t, n, thead, hash_node)
902                         /* If dev is in the same netns, it has already
903                          * been added to the list by the previous loop.
904                          */
905                         if (!net_eq(dev_net(t->dev), net))
906                                 unregister_netdevice_queue(t->dev, head);
907         }
908 }
909
910 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
911 {
912         LIST_HEAD(list);
913
914         rtnl_lock();
915         ip_tunnel_destroy(itn, &list, ops);
916         unregister_netdevice_many(&list);
917         rtnl_unlock();
918 }
919 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
920
921 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
922                       struct ip_tunnel_parm *p)
923 {
924         struct ip_tunnel *nt;
925         struct net *net = dev_net(dev);
926         struct ip_tunnel_net *itn;
927         int mtu;
928         int err;
929
930         nt = netdev_priv(dev);
931         itn = net_generic(net, nt->ip_tnl_net_id);
932
933         if (ip_tunnel_find(itn, p, dev->type))
934                 return -EEXIST;
935
936         nt->net = net;
937         nt->parms = *p;
938         err = register_netdevice(dev);
939         if (err)
940                 goto out;
941
942         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
943                 eth_hw_addr_random(dev);
944
945         mtu = ip_tunnel_bind_dev(dev);
946         if (!tb[IFLA_MTU])
947                 dev->mtu = mtu;
948
949         ip_tunnel_add(itn, nt);
950
951 out:
952         return err;
953 }
954 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
955
956 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
957                          struct ip_tunnel_parm *p)
958 {
959         struct ip_tunnel *t;
960         struct ip_tunnel *tunnel = netdev_priv(dev);
961         struct net *net = tunnel->net;
962         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
963
964         if (dev == itn->fb_tunnel_dev)
965                 return -EINVAL;
966
967         t = ip_tunnel_find(itn, p, dev->type);
968
969         if (t) {
970                 if (t->dev != dev)
971                         return -EEXIST;
972         } else {
973                 t = tunnel;
974
975                 if (dev->type != ARPHRD_ETHER) {
976                         unsigned int nflags = 0;
977
978                         if (ipv4_is_multicast(p->iph.daddr))
979                                 nflags = IFF_BROADCAST;
980                         else if (p->iph.daddr)
981                                 nflags = IFF_POINTOPOINT;
982
983                         if ((dev->flags ^ nflags) &
984                             (IFF_POINTOPOINT | IFF_BROADCAST))
985                                 return -EINVAL;
986                 }
987         }
988
989         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
990         return 0;
991 }
992 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
993
994 int ip_tunnel_init(struct net_device *dev)
995 {
996         struct ip_tunnel *tunnel = netdev_priv(dev);
997         struct iphdr *iph = &tunnel->parms.iph;
998         int i, err;
999
1000         dev->destructor = ip_tunnel_dev_free;
1001         dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
1002         if (!dev->tstats)
1003                 return -ENOMEM;
1004
1005         for_each_possible_cpu(i) {
1006                 struct pcpu_sw_netstats *ipt_stats;
1007                 ipt_stats = per_cpu_ptr(dev->tstats, i);
1008                 u64_stats_init(&ipt_stats->syncp);
1009         }
1010
1011         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1012         if (!tunnel->dst_cache) {
1013                 free_percpu(dev->tstats);
1014                 return -ENOMEM;
1015         }
1016
1017         err = gro_cells_init(&tunnel->gro_cells, dev);
1018         if (err) {
1019                 free_percpu(tunnel->dst_cache);
1020                 free_percpu(dev->tstats);
1021                 return err;
1022         }
1023
1024         tunnel->dev = dev;
1025         tunnel->net = dev_net(dev);
1026         strcpy(tunnel->parms.name, dev->name);
1027         iph->version            = 4;
1028         iph->ihl                = 5;
1029
1030         return 0;
1031 }
1032 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1033
1034 void ip_tunnel_uninit(struct net_device *dev)
1035 {
1036         struct ip_tunnel *tunnel = netdev_priv(dev);
1037         struct net *net = tunnel->net;
1038         struct ip_tunnel_net *itn;
1039
1040         itn = net_generic(net, tunnel->ip_tnl_net_id);
1041         /* fb_tunnel_dev will be unregisted in net-exit call. */
1042         if (itn->fb_tunnel_dev != dev)
1043                 ip_tunnel_del(netdev_priv(dev));
1044
1045         tunnel_dst_reset_all(tunnel);
1046 }
1047 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1048
1049 /* Do least required initialization, rest of init is done in tunnel_init call */
1050 void ip_tunnel_setup(struct net_device *dev, int net_id)
1051 {
1052         struct ip_tunnel *tunnel = netdev_priv(dev);
1053         tunnel->ip_tnl_net_id = net_id;
1054 }
1055 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1056
1057 MODULE_LICENSE("GPL");