]> Pileus Git - ~andy/linux/blob - net/ipv4/ip_tunnel.c
78a89e61925d6ae27937098f906145d2d8c48f5d
[~andy/linux] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
104
105 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
106 {
107         struct dst_entry *dst;
108
109         rcu_read_lock();
110         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
111         if (dst) {
112                 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113                         rcu_read_unlock();
114                         tunnel_dst_reset(t);
115                         return NULL;
116                 }
117                 dst_hold(dst);
118         }
119         rcu_read_unlock();
120         return (struct rtable *)dst;
121 }
122
123 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124                                 __be16 flags, __be32 key)
125 {
126         if (p->i_flags & TUNNEL_KEY) {
127                 if (flags & TUNNEL_KEY)
128                         return key == p->i_key;
129                 else
130                         /* key expected, none present */
131                         return false;
132         } else
133                 return !(flags & TUNNEL_KEY);
134 }
135
136 /* Fallback tunnel: no source, no destination, no key, no options
137
138    Tunnel hash table:
139    We require exact key match i.e. if a key is present in packet
140    it will match only tunnel with the same key; if it is not present,
141    it will match only keyless tunnel.
142
143    All keysless packets, if not matched configured keyless tunnels
144    will match fallback tunnel.
145    Given src, dst and key, find appropriate for input tunnel.
146 */
147 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148                                    int link, __be16 flags,
149                                    __be32 remote, __be32 local,
150                                    __be32 key)
151 {
152         unsigned int hash;
153         struct ip_tunnel *t, *cand = NULL;
154         struct hlist_head *head;
155
156         hash = ip_tunnel_hash(key, remote);
157         head = &itn->tunnels[hash];
158
159         hlist_for_each_entry_rcu(t, head, hash_node) {
160                 if (local != t->parms.iph.saddr ||
161                     remote != t->parms.iph.daddr ||
162                     !(t->dev->flags & IFF_UP))
163                         continue;
164
165                 if (!ip_tunnel_key_match(&t->parms, flags, key))
166                         continue;
167
168                 if (t->parms.link == link)
169                         return t;
170                 else
171                         cand = t;
172         }
173
174         hlist_for_each_entry_rcu(t, head, hash_node) {
175                 if (remote != t->parms.iph.daddr ||
176                     !(t->dev->flags & IFF_UP))
177                         continue;
178
179                 if (!ip_tunnel_key_match(&t->parms, flags, key))
180                         continue;
181
182                 if (t->parms.link == link)
183                         return t;
184                 else if (!cand)
185                         cand = t;
186         }
187
188         hash = ip_tunnel_hash(key, 0);
189         head = &itn->tunnels[hash];
190
191         hlist_for_each_entry_rcu(t, head, hash_node) {
192                 if ((local != t->parms.iph.saddr &&
193                      (local != t->parms.iph.daddr ||
194                       !ipv4_is_multicast(local))) ||
195                     !(t->dev->flags & IFF_UP))
196                         continue;
197
198                 if (!ip_tunnel_key_match(&t->parms, flags, key))
199                         continue;
200
201                 if (t->parms.link == link)
202                         return t;
203                 else if (!cand)
204                         cand = t;
205         }
206
207         if (flags & TUNNEL_NO_KEY)
208                 goto skip_key_lookup;
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (t->parms.i_key != key ||
212                     !(t->dev->flags & IFF_UP))
213                         continue;
214
215                 if (t->parms.link == link)
216                         return t;
217                 else if (!cand)
218                         cand = t;
219         }
220
221 skip_key_lookup:
222         if (cand)
223                 return cand;
224
225         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226                 return netdev_priv(itn->fb_tunnel_dev);
227
228
229         return NULL;
230 }
231 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234                                     struct ip_tunnel_parm *parms)
235 {
236         unsigned int h;
237         __be32 remote;
238
239         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
240                 remote = parms->iph.daddr;
241         else
242                 remote = 0;
243
244         h = ip_tunnel_hash(parms->i_key, remote);
245         return &itn->tunnels[h];
246 }
247
248 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
249 {
250         struct hlist_head *head = ip_bucket(itn, &t->parms);
251
252         hlist_add_head_rcu(&t->hash_node, head);
253 }
254
255 static void ip_tunnel_del(struct ip_tunnel *t)
256 {
257         hlist_del_init_rcu(&t->hash_node);
258 }
259
260 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
261                                         struct ip_tunnel_parm *parms,
262                                         int type)
263 {
264         __be32 remote = parms->iph.daddr;
265         __be32 local = parms->iph.saddr;
266         __be32 key = parms->i_key;
267         int link = parms->link;
268         struct ip_tunnel *t = NULL;
269         struct hlist_head *head = ip_bucket(itn, parms);
270
271         hlist_for_each_entry_rcu(t, head, hash_node) {
272                 if (local == t->parms.iph.saddr &&
273                     remote == t->parms.iph.daddr &&
274                     key == t->parms.i_key &&
275                     link == t->parms.link &&
276                     type == t->dev->type)
277                         break;
278         }
279         return t;
280 }
281
282 static struct net_device *__ip_tunnel_create(struct net *net,
283                                              const struct rtnl_link_ops *ops,
284                                              struct ip_tunnel_parm *parms)
285 {
286         int err;
287         struct ip_tunnel *tunnel;
288         struct net_device *dev;
289         char name[IFNAMSIZ];
290
291         if (parms->name[0])
292                 strlcpy(name, parms->name, IFNAMSIZ);
293         else {
294                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
295                         err = -E2BIG;
296                         goto failed;
297                 }
298                 strlcpy(name, ops->kind, IFNAMSIZ);
299                 strncat(name, "%d", 2);
300         }
301
302         ASSERT_RTNL();
303         dev = alloc_netdev(ops->priv_size, name, ops->setup);
304         if (!dev) {
305                 err = -ENOMEM;
306                 goto failed;
307         }
308         dev_net_set(dev, net);
309
310         dev->rtnl_link_ops = ops;
311
312         tunnel = netdev_priv(dev);
313         tunnel->parms = *parms;
314         tunnel->net = net;
315
316         err = register_netdevice(dev);
317         if (err)
318                 goto failed_free;
319
320         return dev;
321
322 failed_free:
323         free_netdev(dev);
324 failed:
325         return ERR_PTR(err);
326 }
327
328 static inline void init_tunnel_flow(struct flowi4 *fl4,
329                                     int proto,
330                                     __be32 daddr, __be32 saddr,
331                                     __be32 key, __u8 tos, int oif)
332 {
333         memset(fl4, 0, sizeof(*fl4));
334         fl4->flowi4_oif = oif;
335         fl4->daddr = daddr;
336         fl4->saddr = saddr;
337         fl4->flowi4_tos = tos;
338         fl4->flowi4_proto = proto;
339         fl4->fl4_gre_key = key;
340 }
341
342 static int ip_tunnel_bind_dev(struct net_device *dev)
343 {
344         struct net_device *tdev = NULL;
345         struct ip_tunnel *tunnel = netdev_priv(dev);
346         const struct iphdr *iph;
347         int hlen = LL_MAX_HEADER;
348         int mtu = ETH_DATA_LEN;
349         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
350
351         iph = &tunnel->parms.iph;
352
353         /* Guess output device to choose reasonable mtu and needed_headroom */
354         if (iph->daddr) {
355                 struct flowi4 fl4;
356                 struct rtable *rt;
357
358                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
359                                  iph->saddr, tunnel->parms.o_key,
360                                  RT_TOS(iph->tos), tunnel->parms.link);
361                 rt = ip_route_output_key(tunnel->net, &fl4);
362
363                 if (!IS_ERR(rt)) {
364                         tdev = rt->dst.dev;
365                         tunnel_dst_set(tunnel, &rt->dst);
366                         ip_rt_put(rt);
367                 }
368                 if (dev->type != ARPHRD_ETHER)
369                         dev->flags |= IFF_POINTOPOINT;
370         }
371
372         if (!tdev && tunnel->parms.link)
373                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
374
375         if (tdev) {
376                 hlen = tdev->hard_header_len + tdev->needed_headroom;
377                 mtu = tdev->mtu;
378         }
379         dev->iflink = tunnel->parms.link;
380
381         dev->needed_headroom = t_hlen + hlen;
382         mtu -= (dev->hard_header_len + t_hlen);
383
384         if (mtu < 68)
385                 mtu = 68;
386
387         return mtu;
388 }
389
390 static struct ip_tunnel *ip_tunnel_create(struct net *net,
391                                           struct ip_tunnel_net *itn,
392                                           struct ip_tunnel_parm *parms)
393 {
394         struct ip_tunnel *nt, *fbt;
395         struct net_device *dev;
396
397         BUG_ON(!itn->fb_tunnel_dev);
398         fbt = netdev_priv(itn->fb_tunnel_dev);
399         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
400         if (IS_ERR(dev))
401                 return NULL;
402
403         dev->mtu = ip_tunnel_bind_dev(dev);
404
405         nt = netdev_priv(dev);
406         ip_tunnel_add(itn, nt);
407         return nt;
408 }
409
410 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
411                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
412 {
413         struct pcpu_sw_netstats *tstats;
414         const struct iphdr *iph = ip_hdr(skb);
415         int err;
416
417 #ifdef CONFIG_NET_IPGRE_BROADCAST
418         if (ipv4_is_multicast(iph->daddr)) {
419                 /* Looped back packet, drop it! */
420                 if (rt_is_output_route(skb_rtable(skb)))
421                         goto drop;
422                 tunnel->dev->stats.multicast++;
423                 skb->pkt_type = PACKET_BROADCAST;
424         }
425 #endif
426
427         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
428              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
429                 tunnel->dev->stats.rx_crc_errors++;
430                 tunnel->dev->stats.rx_errors++;
431                 goto drop;
432         }
433
434         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
435                 if (!(tpi->flags&TUNNEL_SEQ) ||
436                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
437                         tunnel->dev->stats.rx_fifo_errors++;
438                         tunnel->dev->stats.rx_errors++;
439                         goto drop;
440                 }
441                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
442         }
443
444         err = IP_ECN_decapsulate(iph, skb);
445         if (unlikely(err)) {
446                 if (log_ecn_error)
447                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
448                                         &iph->saddr, iph->tos);
449                 if (err > 1) {
450                         ++tunnel->dev->stats.rx_frame_errors;
451                         ++tunnel->dev->stats.rx_errors;
452                         goto drop;
453                 }
454         }
455
456         tstats = this_cpu_ptr(tunnel->dev->tstats);
457         u64_stats_update_begin(&tstats->syncp);
458         tstats->rx_packets++;
459         tstats->rx_bytes += skb->len;
460         u64_stats_update_end(&tstats->syncp);
461
462         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
463
464         if (tunnel->dev->type == ARPHRD_ETHER) {
465                 skb->protocol = eth_type_trans(skb, tunnel->dev);
466                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
467         } else {
468                 skb->dev = tunnel->dev;
469         }
470
471         gro_cells_receive(&tunnel->gro_cells, skb);
472         return 0;
473
474 drop:
475         kfree_skb(skb);
476         return 0;
477 }
478 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
479
480 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
481                             struct rtable *rt, __be16 df)
482 {
483         struct ip_tunnel *tunnel = netdev_priv(dev);
484         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
485         int mtu;
486
487         if (df)
488                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
489                                         - sizeof(struct iphdr) - tunnel->hlen;
490         else
491                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
492
493         if (skb_dst(skb))
494                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
495
496         if (skb->protocol == htons(ETH_P_IP)) {
497                 if (!skb_is_gso(skb) &&
498                     (df & htons(IP_DF)) && mtu < pkt_size) {
499                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
500                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
501                         return -E2BIG;
502                 }
503         }
504 #if IS_ENABLED(CONFIG_IPV6)
505         else if (skb->protocol == htons(ETH_P_IPV6)) {
506                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
507
508                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
509                            mtu >= IPV6_MIN_MTU) {
510                         if ((tunnel->parms.iph.daddr &&
511                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
512                             rt6->rt6i_dst.plen == 128) {
513                                 rt6->rt6i_flags |= RTF_MODIFIED;
514                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
515                         }
516                 }
517
518                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
519                                         mtu < pkt_size) {
520                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
521                         return -E2BIG;
522                 }
523         }
524 #endif
525         return 0;
526 }
527
528 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
529                     const struct iphdr *tnl_params, const u8 protocol)
530 {
531         struct ip_tunnel *tunnel = netdev_priv(dev);
532         const struct iphdr *inner_iph;
533         struct flowi4 fl4;
534         u8     tos, ttl;
535         __be16 df;
536         struct rtable *rt;              /* Route to the other host */
537         unsigned int max_headroom;      /* The extra header space needed */
538         __be32 dst;
539         int err;
540         bool connected = true;
541
542         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
543
544         dst = tnl_params->daddr;
545         if (dst == 0) {
546                 /* NBMA tunnel */
547
548                 if (skb_dst(skb) == NULL) {
549                         dev->stats.tx_fifo_errors++;
550                         goto tx_error;
551                 }
552
553                 if (skb->protocol == htons(ETH_P_IP)) {
554                         rt = skb_rtable(skb);
555                         dst = rt_nexthop(rt, inner_iph->daddr);
556                 }
557 #if IS_ENABLED(CONFIG_IPV6)
558                 else if (skb->protocol == htons(ETH_P_IPV6)) {
559                         const struct in6_addr *addr6;
560                         struct neighbour *neigh;
561                         bool do_tx_error_icmp;
562                         int addr_type;
563
564                         neigh = dst_neigh_lookup(skb_dst(skb),
565                                                  &ipv6_hdr(skb)->daddr);
566                         if (neigh == NULL)
567                                 goto tx_error;
568
569                         addr6 = (const struct in6_addr *)&neigh->primary_key;
570                         addr_type = ipv6_addr_type(addr6);
571
572                         if (addr_type == IPV6_ADDR_ANY) {
573                                 addr6 = &ipv6_hdr(skb)->daddr;
574                                 addr_type = ipv6_addr_type(addr6);
575                         }
576
577                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
578                                 do_tx_error_icmp = true;
579                         else {
580                                 do_tx_error_icmp = false;
581                                 dst = addr6->s6_addr32[3];
582                         }
583                         neigh_release(neigh);
584                         if (do_tx_error_icmp)
585                                 goto tx_error_icmp;
586                 }
587 #endif
588                 else
589                         goto tx_error;
590
591                 connected = false;
592         }
593
594         tos = tnl_params->tos;
595         if (tos & 0x1) {
596                 tos &= ~0x1;
597                 if (skb->protocol == htons(ETH_P_IP)) {
598                         tos = inner_iph->tos;
599                         connected = false;
600                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
601                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
602                         connected = false;
603                 }
604         }
605
606         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
607                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
608
609         rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
610
611         if (!rt) {
612                 rt = ip_route_output_key(tunnel->net, &fl4);
613
614                 if (IS_ERR(rt)) {
615                         dev->stats.tx_carrier_errors++;
616                         goto tx_error;
617                 }
618                 if (connected)
619                         tunnel_dst_set(tunnel, &rt->dst);
620         }
621
622         if (rt->dst.dev == dev) {
623                 ip_rt_put(rt);
624                 dev->stats.collisions++;
625                 goto tx_error;
626         }
627
628         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
629                 ip_rt_put(rt);
630                 goto tx_error;
631         }
632
633         if (tunnel->err_count > 0) {
634                 if (time_before(jiffies,
635                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
636                         tunnel->err_count--;
637
638                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
639                         dst_link_failure(skb);
640                 } else
641                         tunnel->err_count = 0;
642         }
643
644         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
645         ttl = tnl_params->ttl;
646         if (ttl == 0) {
647                 if (skb->protocol == htons(ETH_P_IP))
648                         ttl = inner_iph->ttl;
649 #if IS_ENABLED(CONFIG_IPV6)
650                 else if (skb->protocol == htons(ETH_P_IPV6))
651                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
652 #endif
653                 else
654                         ttl = ip4_dst_hoplimit(&rt->dst);
655         }
656
657         df = tnl_params->frag_off;
658         if (skb->protocol == htons(ETH_P_IP))
659                 df |= (inner_iph->frag_off&htons(IP_DF));
660
661         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
662                         + rt->dst.header_len;
663         if (max_headroom > dev->needed_headroom)
664                 dev->needed_headroom = max_headroom;
665
666         if (skb_cow_head(skb, dev->needed_headroom)) {
667                 dev->stats.tx_dropped++;
668                 kfree_skb(skb);
669                 return;
670         }
671
672         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
673                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
674         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
675
676         return;
677
678 #if IS_ENABLED(CONFIG_IPV6)
679 tx_error_icmp:
680         dst_link_failure(skb);
681 #endif
682 tx_error:
683         dev->stats.tx_errors++;
684         kfree_skb(skb);
685 }
686 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
687
688 static void ip_tunnel_update(struct ip_tunnel_net *itn,
689                              struct ip_tunnel *t,
690                              struct net_device *dev,
691                              struct ip_tunnel_parm *p,
692                              bool set_mtu)
693 {
694         ip_tunnel_del(t);
695         t->parms.iph.saddr = p->iph.saddr;
696         t->parms.iph.daddr = p->iph.daddr;
697         t->parms.i_key = p->i_key;
698         t->parms.o_key = p->o_key;
699         if (dev->type != ARPHRD_ETHER) {
700                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
701                 memcpy(dev->broadcast, &p->iph.daddr, 4);
702         }
703         ip_tunnel_add(itn, t);
704
705         t->parms.iph.ttl = p->iph.ttl;
706         t->parms.iph.tos = p->iph.tos;
707         t->parms.iph.frag_off = p->iph.frag_off;
708
709         if (t->parms.link != p->link) {
710                 int mtu;
711
712                 t->parms.link = p->link;
713                 mtu = ip_tunnel_bind_dev(dev);
714                 if (set_mtu)
715                         dev->mtu = mtu;
716         }
717         ip_tunnel_dst_reset_all(t);
718         netdev_state_change(dev);
719 }
720
721 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
722 {
723         int err = 0;
724         struct ip_tunnel *t;
725         struct net *net = dev_net(dev);
726         struct ip_tunnel *tunnel = netdev_priv(dev);
727         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
728
729         BUG_ON(!itn->fb_tunnel_dev);
730         switch (cmd) {
731         case SIOCGETTUNNEL:
732                 t = NULL;
733                 if (dev == itn->fb_tunnel_dev)
734                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
735                 if (t == NULL)
736                         t = netdev_priv(dev);
737                 memcpy(p, &t->parms, sizeof(*p));
738                 break;
739
740         case SIOCADDTUNNEL:
741         case SIOCCHGTUNNEL:
742                 err = -EPERM;
743                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
744                         goto done;
745                 if (p->iph.ttl)
746                         p->iph.frag_off |= htons(IP_DF);
747                 if (!(p->i_flags&TUNNEL_KEY))
748                         p->i_key = 0;
749                 if (!(p->o_flags&TUNNEL_KEY))
750                         p->o_key = 0;
751
752                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
753
754                 if (!t && (cmd == SIOCADDTUNNEL))
755                         t = ip_tunnel_create(net, itn, p);
756
757                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
758                         if (t != NULL) {
759                                 if (t->dev != dev) {
760                                         err = -EEXIST;
761                                         break;
762                                 }
763                         } else {
764                                 unsigned int nflags = 0;
765
766                                 if (ipv4_is_multicast(p->iph.daddr))
767                                         nflags = IFF_BROADCAST;
768                                 else if (p->iph.daddr)
769                                         nflags = IFF_POINTOPOINT;
770
771                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
772                                         err = -EINVAL;
773                                         break;
774                                 }
775
776                                 t = netdev_priv(dev);
777                         }
778                 }
779
780                 if (t) {
781                         err = 0;
782                         ip_tunnel_update(itn, t, dev, p, true);
783                 } else
784                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
785                 break;
786
787         case SIOCDELTUNNEL:
788                 err = -EPERM;
789                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
790                         goto done;
791
792                 if (dev == itn->fb_tunnel_dev) {
793                         err = -ENOENT;
794                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
795                         if (t == NULL)
796                                 goto done;
797                         err = -EPERM;
798                         if (t == netdev_priv(itn->fb_tunnel_dev))
799                                 goto done;
800                         dev = t->dev;
801                 }
802                 unregister_netdevice(dev);
803                 err = 0;
804                 break;
805
806         default:
807                 err = -EINVAL;
808         }
809
810 done:
811         return err;
812 }
813 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
814
815 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
816 {
817         struct ip_tunnel *tunnel = netdev_priv(dev);
818         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
819
820         if (new_mtu < 68 ||
821             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
822                 return -EINVAL;
823         dev->mtu = new_mtu;
824         return 0;
825 }
826 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
827
828 static void ip_tunnel_dev_free(struct net_device *dev)
829 {
830         struct ip_tunnel *tunnel = netdev_priv(dev);
831
832         gro_cells_destroy(&tunnel->gro_cells);
833         free_percpu(tunnel->dst_cache);
834         free_percpu(dev->tstats);
835         free_netdev(dev);
836 }
837
838 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
839 {
840         struct ip_tunnel *tunnel = netdev_priv(dev);
841         struct ip_tunnel_net *itn;
842
843         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
844
845         if (itn->fb_tunnel_dev != dev) {
846                 ip_tunnel_del(netdev_priv(dev));
847                 unregister_netdevice_queue(dev, head);
848         }
849 }
850 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
851
852 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
853                                   struct rtnl_link_ops *ops, char *devname)
854 {
855         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
856         struct ip_tunnel_parm parms;
857         unsigned int i;
858
859         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
860                 INIT_HLIST_HEAD(&itn->tunnels[i]);
861
862         if (!ops) {
863                 itn->fb_tunnel_dev = NULL;
864                 return 0;
865         }
866
867         memset(&parms, 0, sizeof(parms));
868         if (devname)
869                 strlcpy(parms.name, devname, IFNAMSIZ);
870
871         rtnl_lock();
872         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
873         /* FB netdevice is special: we have one, and only one per netns.
874          * Allowing to move it to another netns is clearly unsafe.
875          */
876         if (!IS_ERR(itn->fb_tunnel_dev)) {
877                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
878                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
879         }
880         rtnl_unlock();
881
882         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
883 }
884 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885
886 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
887                               struct rtnl_link_ops *ops)
888 {
889         struct net *net = dev_net(itn->fb_tunnel_dev);
890         struct net_device *dev, *aux;
891         int h;
892
893         for_each_netdev_safe(net, dev, aux)
894                 if (dev->rtnl_link_ops == ops)
895                         unregister_netdevice_queue(dev, head);
896
897         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
898                 struct ip_tunnel *t;
899                 struct hlist_node *n;
900                 struct hlist_head *thead = &itn->tunnels[h];
901
902                 hlist_for_each_entry_safe(t, n, thead, hash_node)
903                         /* If dev is in the same netns, it has already
904                          * been added to the list by the previous loop.
905                          */
906                         if (!net_eq(dev_net(t->dev), net))
907                                 unregister_netdevice_queue(t->dev, head);
908         }
909 }
910
911 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
912 {
913         LIST_HEAD(list);
914
915         rtnl_lock();
916         ip_tunnel_destroy(itn, &list, ops);
917         unregister_netdevice_many(&list);
918         rtnl_unlock();
919 }
920 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
921
922 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
923                       struct ip_tunnel_parm *p)
924 {
925         struct ip_tunnel *nt;
926         struct net *net = dev_net(dev);
927         struct ip_tunnel_net *itn;
928         int mtu;
929         int err;
930
931         nt = netdev_priv(dev);
932         itn = net_generic(net, nt->ip_tnl_net_id);
933
934         if (ip_tunnel_find(itn, p, dev->type))
935                 return -EEXIST;
936
937         nt->net = net;
938         nt->parms = *p;
939         err = register_netdevice(dev);
940         if (err)
941                 goto out;
942
943         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
944                 eth_hw_addr_random(dev);
945
946         mtu = ip_tunnel_bind_dev(dev);
947         if (!tb[IFLA_MTU])
948                 dev->mtu = mtu;
949
950         ip_tunnel_add(itn, nt);
951
952 out:
953         return err;
954 }
955 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
956
957 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
958                          struct ip_tunnel_parm *p)
959 {
960         struct ip_tunnel *t;
961         struct ip_tunnel *tunnel = netdev_priv(dev);
962         struct net *net = tunnel->net;
963         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
964
965         if (dev == itn->fb_tunnel_dev)
966                 return -EINVAL;
967
968         t = ip_tunnel_find(itn, p, dev->type);
969
970         if (t) {
971                 if (t->dev != dev)
972                         return -EEXIST;
973         } else {
974                 t = tunnel;
975
976                 if (dev->type != ARPHRD_ETHER) {
977                         unsigned int nflags = 0;
978
979                         if (ipv4_is_multicast(p->iph.daddr))
980                                 nflags = IFF_BROADCAST;
981                         else if (p->iph.daddr)
982                                 nflags = IFF_POINTOPOINT;
983
984                         if ((dev->flags ^ nflags) &
985                             (IFF_POINTOPOINT | IFF_BROADCAST))
986                                 return -EINVAL;
987                 }
988         }
989
990         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
991         return 0;
992 }
993 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
994
995 int ip_tunnel_init(struct net_device *dev)
996 {
997         struct ip_tunnel *tunnel = netdev_priv(dev);
998         struct iphdr *iph = &tunnel->parms.iph;
999         int i, err;
1000
1001         dev->destructor = ip_tunnel_dev_free;
1002         dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
1003         if (!dev->tstats)
1004                 return -ENOMEM;
1005
1006         for_each_possible_cpu(i) {
1007                 struct pcpu_sw_netstats *ipt_stats;
1008                 ipt_stats = per_cpu_ptr(dev->tstats, i);
1009                 u64_stats_init(&ipt_stats->syncp);
1010         }
1011
1012         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1013         if (!tunnel->dst_cache) {
1014                 free_percpu(dev->tstats);
1015                 return -ENOMEM;
1016         }
1017
1018         err = gro_cells_init(&tunnel->gro_cells, dev);
1019         if (err) {
1020                 free_percpu(tunnel->dst_cache);
1021                 free_percpu(dev->tstats);
1022                 return err;
1023         }
1024
1025         tunnel->dev = dev;
1026         tunnel->net = dev_net(dev);
1027         strcpy(tunnel->parms.name, dev->name);
1028         iph->version            = 4;
1029         iph->ihl                = 5;
1030
1031         return 0;
1032 }
1033 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1034
1035 void ip_tunnel_uninit(struct net_device *dev)
1036 {
1037         struct ip_tunnel *tunnel = netdev_priv(dev);
1038         struct net *net = tunnel->net;
1039         struct ip_tunnel_net *itn;
1040
1041         itn = net_generic(net, tunnel->ip_tnl_net_id);
1042         /* fb_tunnel_dev will be unregisted in net-exit call. */
1043         if (itn->fb_tunnel_dev != dev)
1044                 ip_tunnel_del(netdev_priv(dev));
1045
1046         ip_tunnel_dst_reset_all(tunnel);
1047 }
1048 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1049
1050 /* Do least required initialization, rest of init is done in tunnel_init call */
1051 void ip_tunnel_setup(struct net_device *dev, int net_id)
1052 {
1053         struct ip_tunnel *tunnel = netdev_priv(dev);
1054         tunnel->ip_tnl_net_id = net_id;
1055 }
1056 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1057
1058 MODULE_LICENSE("GPL");