]> Pileus Git - ~andy/linux/blob - net/ipv4/ip_tunnel.c
tunnels: harmonize cleanup done on skb on xmit path
[~andy/linux] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65                                    __be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73                                                 struct rtnl_link_stats64 *tot)
74 {
75         int i;
76
77         for_each_possible_cpu(i) {
78                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80                 unsigned int start;
81
82                 do {
83                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
84                         rx_packets = tstats->rx_packets;
85                         tx_packets = tstats->tx_packets;
86                         rx_bytes = tstats->rx_bytes;
87                         tx_bytes = tstats->tx_bytes;
88                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90                 tot->rx_packets += rx_packets;
91                 tot->tx_packets += tx_packets;
92                 tot->rx_bytes   += rx_bytes;
93                 tot->tx_bytes   += tx_bytes;
94         }
95
96         tot->multicast = dev->stats.multicast;
97
98         tot->rx_crc_errors = dev->stats.rx_crc_errors;
99         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100         tot->rx_length_errors = dev->stats.rx_length_errors;
101         tot->rx_frame_errors = dev->stats.rx_frame_errors;
102         tot->rx_errors = dev->stats.rx_errors;
103
104         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106         tot->tx_dropped = dev->stats.tx_dropped;
107         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108         tot->tx_errors = dev->stats.tx_errors;
109
110         tot->collisions  = dev->stats.collisions;
111
112         return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117                                 __be16 flags, __be32 key)
118 {
119         if (p->i_flags & TUNNEL_KEY) {
120                 if (flags & TUNNEL_KEY)
121                         return key == p->i_key;
122                 else
123                         /* key expected, none present */
124                         return false;
125         } else
126                 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131    Tunnel hash table:
132    We require exact key match i.e. if a key is present in packet
133    it will match only tunnel with the same key; if it is not present,
134    it will match only keyless tunnel.
135
136    All keysless packets, if not matched configured keyless tunnels
137    will match fallback tunnel.
138    Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141                                    int link, __be16 flags,
142                                    __be32 remote, __be32 local,
143                                    __be32 key)
144 {
145         unsigned int hash;
146         struct ip_tunnel *t, *cand = NULL;
147         struct hlist_head *head;
148
149         hash = ip_tunnel_hash(itn, key, remote);
150         head = &itn->tunnels[hash];
151
152         hlist_for_each_entry_rcu(t, head, hash_node) {
153                 if (local != t->parms.iph.saddr ||
154                     remote != t->parms.iph.daddr ||
155                     !(t->dev->flags & IFF_UP))
156                         continue;
157
158                 if (!ip_tunnel_key_match(&t->parms, flags, key))
159                         continue;
160
161                 if (t->parms.link == link)
162                         return t;
163                 else
164                         cand = t;
165         }
166
167         hlist_for_each_entry_rcu(t, head, hash_node) {
168                 if (remote != t->parms.iph.daddr ||
169                     !(t->dev->flags & IFF_UP))
170                         continue;
171
172                 if (!ip_tunnel_key_match(&t->parms, flags, key))
173                         continue;
174
175                 if (t->parms.link == link)
176                         return t;
177                 else if (!cand)
178                         cand = t;
179         }
180
181         hash = ip_tunnel_hash(itn, key, 0);
182         head = &itn->tunnels[hash];
183
184         hlist_for_each_entry_rcu(t, head, hash_node) {
185                 if ((local != t->parms.iph.saddr &&
186                      (local != t->parms.iph.daddr ||
187                       !ipv4_is_multicast(local))) ||
188                     !(t->dev->flags & IFF_UP))
189                         continue;
190
191                 if (!ip_tunnel_key_match(&t->parms, flags, key))
192                         continue;
193
194                 if (t->parms.link == link)
195                         return t;
196                 else if (!cand)
197                         cand = t;
198         }
199
200         if (flags & TUNNEL_NO_KEY)
201                 goto skip_key_lookup;
202
203         hlist_for_each_entry_rcu(t, head, hash_node) {
204                 if (t->parms.i_key != key ||
205                     !(t->dev->flags & IFF_UP))
206                         continue;
207
208                 if (t->parms.link == link)
209                         return t;
210                 else if (!cand)
211                         cand = t;
212         }
213
214 skip_key_lookup:
215         if (cand)
216                 return cand;
217
218         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219                 return netdev_priv(itn->fb_tunnel_dev);
220
221
222         return NULL;
223 }
224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227                                     struct ip_tunnel_parm *parms)
228 {
229         unsigned int h;
230         __be32 remote;
231
232         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233                 remote = parms->iph.daddr;
234         else
235                 remote = 0;
236
237         h = ip_tunnel_hash(itn, parms->i_key, remote);
238         return &itn->tunnels[h];
239 }
240
241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242 {
243         struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245         hlist_add_head_rcu(&t->hash_node, head);
246 }
247
248 static void ip_tunnel_del(struct ip_tunnel *t)
249 {
250         hlist_del_init_rcu(&t->hash_node);
251 }
252
253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254                                         struct ip_tunnel_parm *parms,
255                                         int type)
256 {
257         __be32 remote = parms->iph.daddr;
258         __be32 local = parms->iph.saddr;
259         __be32 key = parms->i_key;
260         int link = parms->link;
261         struct ip_tunnel *t = NULL;
262         struct hlist_head *head = ip_bucket(itn, parms);
263
264         hlist_for_each_entry_rcu(t, head, hash_node) {
265                 if (local == t->parms.iph.saddr &&
266                     remote == t->parms.iph.daddr &&
267                     key == t->parms.i_key &&
268                     link == t->parms.link &&
269                     type == t->dev->type)
270                         break;
271         }
272         return t;
273 }
274
275 static struct net_device *__ip_tunnel_create(struct net *net,
276                                              const struct rtnl_link_ops *ops,
277                                              struct ip_tunnel_parm *parms)
278 {
279         int err;
280         struct ip_tunnel *tunnel;
281         struct net_device *dev;
282         char name[IFNAMSIZ];
283
284         if (parms->name[0])
285                 strlcpy(name, parms->name, IFNAMSIZ);
286         else {
287                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288                         err = -E2BIG;
289                         goto failed;
290                 }
291                 strlcpy(name, ops->kind, IFNAMSIZ);
292                 strncat(name, "%d", 2);
293         }
294
295         ASSERT_RTNL();
296         dev = alloc_netdev(ops->priv_size, name, ops->setup);
297         if (!dev) {
298                 err = -ENOMEM;
299                 goto failed;
300         }
301         dev_net_set(dev, net);
302
303         dev->rtnl_link_ops = ops;
304
305         tunnel = netdev_priv(dev);
306         tunnel->parms = *parms;
307         tunnel->net = net;
308
309         err = register_netdevice(dev);
310         if (err)
311                 goto failed_free;
312
313         return dev;
314
315 failed_free:
316         free_netdev(dev);
317 failed:
318         return ERR_PTR(err);
319 }
320
321 static inline struct rtable *ip_route_output_tunnel(struct net *net,
322                                                     struct flowi4 *fl4,
323                                                     int proto,
324                                                     __be32 daddr, __be32 saddr,
325                                                     __be32 key, __u8 tos, int oif)
326 {
327         memset(fl4, 0, sizeof(*fl4));
328         fl4->flowi4_oif = oif;
329         fl4->daddr = daddr;
330         fl4->saddr = saddr;
331         fl4->flowi4_tos = tos;
332         fl4->flowi4_proto = proto;
333         fl4->fl4_gre_key = key;
334         return ip_route_output_key(net, fl4);
335 }
336
337 static int ip_tunnel_bind_dev(struct net_device *dev)
338 {
339         struct net_device *tdev = NULL;
340         struct ip_tunnel *tunnel = netdev_priv(dev);
341         const struct iphdr *iph;
342         int hlen = LL_MAX_HEADER;
343         int mtu = ETH_DATA_LEN;
344         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
345
346         iph = &tunnel->parms.iph;
347
348         /* Guess output device to choose reasonable mtu and needed_headroom */
349         if (iph->daddr) {
350                 struct flowi4 fl4;
351                 struct rtable *rt;
352
353                 rt = ip_route_output_tunnel(tunnel->net, &fl4,
354                                             tunnel->parms.iph.protocol,
355                                             iph->daddr, iph->saddr,
356                                             tunnel->parms.o_key,
357                                             RT_TOS(iph->tos),
358                                             tunnel->parms.link);
359                 if (!IS_ERR(rt)) {
360                         tdev = rt->dst.dev;
361                         ip_rt_put(rt);
362                 }
363                 if (dev->type != ARPHRD_ETHER)
364                         dev->flags |= IFF_POINTOPOINT;
365         }
366
367         if (!tdev && tunnel->parms.link)
368                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
369
370         if (tdev) {
371                 hlen = tdev->hard_header_len + tdev->needed_headroom;
372                 mtu = tdev->mtu;
373         }
374         dev->iflink = tunnel->parms.link;
375
376         dev->needed_headroom = t_hlen + hlen;
377         mtu -= (dev->hard_header_len + t_hlen);
378
379         if (mtu < 68)
380                 mtu = 68;
381
382         return mtu;
383 }
384
385 static struct ip_tunnel *ip_tunnel_create(struct net *net,
386                                           struct ip_tunnel_net *itn,
387                                           struct ip_tunnel_parm *parms)
388 {
389         struct ip_tunnel *nt, *fbt;
390         struct net_device *dev;
391
392         BUG_ON(!itn->fb_tunnel_dev);
393         fbt = netdev_priv(itn->fb_tunnel_dev);
394         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
395         if (IS_ERR(dev))
396                 return NULL;
397
398         dev->mtu = ip_tunnel_bind_dev(dev);
399
400         nt = netdev_priv(dev);
401         ip_tunnel_add(itn, nt);
402         return nt;
403 }
404
405 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
406                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
407 {
408         struct pcpu_tstats *tstats;
409         const struct iphdr *iph = ip_hdr(skb);
410         int err;
411
412 #ifdef CONFIG_NET_IPGRE_BROADCAST
413         if (ipv4_is_multicast(iph->daddr)) {
414                 /* Looped back packet, drop it! */
415                 if (rt_is_output_route(skb_rtable(skb)))
416                         goto drop;
417                 tunnel->dev->stats.multicast++;
418                 skb->pkt_type = PACKET_BROADCAST;
419         }
420 #endif
421
422         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
423              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
424                 tunnel->dev->stats.rx_crc_errors++;
425                 tunnel->dev->stats.rx_errors++;
426                 goto drop;
427         }
428
429         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
430                 if (!(tpi->flags&TUNNEL_SEQ) ||
431                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
432                         tunnel->dev->stats.rx_fifo_errors++;
433                         tunnel->dev->stats.rx_errors++;
434                         goto drop;
435                 }
436                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
437         }
438
439         err = IP_ECN_decapsulate(iph, skb);
440         if (unlikely(err)) {
441                 if (log_ecn_error)
442                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
443                                         &iph->saddr, iph->tos);
444                 if (err > 1) {
445                         ++tunnel->dev->stats.rx_frame_errors;
446                         ++tunnel->dev->stats.rx_errors;
447                         goto drop;
448                 }
449         }
450
451         tstats = this_cpu_ptr(tunnel->dev->tstats);
452         u64_stats_update_begin(&tstats->syncp);
453         tstats->rx_packets++;
454         tstats->rx_bytes += skb->len;
455         u64_stats_update_end(&tstats->syncp);
456
457         if (tunnel->dev->type == ARPHRD_ETHER) {
458                 skb->protocol = eth_type_trans(skb, tunnel->dev);
459                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
460         } else {
461                 skb->dev = tunnel->dev;
462         }
463
464         if (!net_eq(tunnel->net, dev_net(tunnel->dev)))
465                 skb_scrub_packet(skb, true);
466
467         gro_cells_receive(&tunnel->gro_cells, skb);
468         return 0;
469
470 drop:
471         kfree_skb(skb);
472         return 0;
473 }
474 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
475
476 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
477                             struct rtable *rt, __be16 df)
478 {
479         struct ip_tunnel *tunnel = netdev_priv(dev);
480         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
481         int mtu;
482
483         if (df)
484                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
485                                         - sizeof(struct iphdr) - tunnel->hlen;
486         else
487                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
488
489         if (skb_dst(skb))
490                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
491
492         if (skb->protocol == htons(ETH_P_IP)) {
493                 if (!skb_is_gso(skb) &&
494                     (df & htons(IP_DF)) && mtu < pkt_size) {
495                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
496                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
497                         return -E2BIG;
498                 }
499         }
500 #if IS_ENABLED(CONFIG_IPV6)
501         else if (skb->protocol == htons(ETH_P_IPV6)) {
502                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
503
504                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
505                            mtu >= IPV6_MIN_MTU) {
506                         if ((tunnel->parms.iph.daddr &&
507                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
508                             rt6->rt6i_dst.plen == 128) {
509                                 rt6->rt6i_flags |= RTF_MODIFIED;
510                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
511                         }
512                 }
513
514                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
515                                         mtu < pkt_size) {
516                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
517                         return -E2BIG;
518                 }
519         }
520 #endif
521         return 0;
522 }
523
524 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
525                     const struct iphdr *tnl_params, const u8 protocol)
526 {
527         struct ip_tunnel *tunnel = netdev_priv(dev);
528         const struct iphdr *inner_iph;
529         struct flowi4 fl4;
530         u8     tos, ttl;
531         __be16 df;
532         struct rtable *rt;              /* Route to the other host */
533         unsigned int max_headroom;      /* The extra header space needed */
534         __be32 dst;
535         int err;
536
537         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
538
539         dst = tnl_params->daddr;
540         if (dst == 0) {
541                 /* NBMA tunnel */
542
543                 if (skb_dst(skb) == NULL) {
544                         dev->stats.tx_fifo_errors++;
545                         goto tx_error;
546                 }
547
548                 if (skb->protocol == htons(ETH_P_IP)) {
549                         rt = skb_rtable(skb);
550                         dst = rt_nexthop(rt, inner_iph->daddr);
551                 }
552 #if IS_ENABLED(CONFIG_IPV6)
553                 else if (skb->protocol == htons(ETH_P_IPV6)) {
554                         const struct in6_addr *addr6;
555                         struct neighbour *neigh;
556                         bool do_tx_error_icmp;
557                         int addr_type;
558
559                         neigh = dst_neigh_lookup(skb_dst(skb),
560                                                  &ipv6_hdr(skb)->daddr);
561                         if (neigh == NULL)
562                                 goto tx_error;
563
564                         addr6 = (const struct in6_addr *)&neigh->primary_key;
565                         addr_type = ipv6_addr_type(addr6);
566
567                         if (addr_type == IPV6_ADDR_ANY) {
568                                 addr6 = &ipv6_hdr(skb)->daddr;
569                                 addr_type = ipv6_addr_type(addr6);
570                         }
571
572                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
573                                 do_tx_error_icmp = true;
574                         else {
575                                 do_tx_error_icmp = false;
576                                 dst = addr6->s6_addr32[3];
577                         }
578                         neigh_release(neigh);
579                         if (do_tx_error_icmp)
580                                 goto tx_error_icmp;
581                 }
582 #endif
583                 else
584                         goto tx_error;
585         }
586
587         tos = tnl_params->tos;
588         if (tos & 0x1) {
589                 tos &= ~0x1;
590                 if (skb->protocol == htons(ETH_P_IP))
591                         tos = inner_iph->tos;
592                 else if (skb->protocol == htons(ETH_P_IPV6))
593                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
594         }
595
596         rt = ip_route_output_tunnel(tunnel->net, &fl4,
597                                     protocol,
598                                     dst, tnl_params->saddr,
599                                     tunnel->parms.o_key,
600                                     RT_TOS(tos),
601                                     tunnel->parms.link);
602         if (IS_ERR(rt)) {
603                 dev->stats.tx_carrier_errors++;
604                 goto tx_error;
605         }
606         if (rt->dst.dev == dev) {
607                 ip_rt_put(rt);
608                 dev->stats.collisions++;
609                 goto tx_error;
610         }
611
612         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
613                 ip_rt_put(rt);
614                 goto tx_error;
615         }
616
617         if (tunnel->err_count > 0) {
618                 if (time_before(jiffies,
619                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
620                         tunnel->err_count--;
621
622                         dst_link_failure(skb);
623                 } else
624                         tunnel->err_count = 0;
625         }
626
627         ttl = tnl_params->ttl;
628         if (ttl == 0) {
629                 if (skb->protocol == htons(ETH_P_IP))
630                         ttl = inner_iph->ttl;
631 #if IS_ENABLED(CONFIG_IPV6)
632                 else if (skb->protocol == htons(ETH_P_IPV6))
633                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
634 #endif
635                 else
636                         ttl = ip4_dst_hoplimit(&rt->dst);
637         }
638
639         df = tnl_params->frag_off;
640         if (skb->protocol == htons(ETH_P_IP))
641                 df |= (inner_iph->frag_off&htons(IP_DF));
642
643         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
644                         + rt->dst.header_len;
645         if (max_headroom > dev->needed_headroom) {
646                 dev->needed_headroom = max_headroom;
647                 if (skb_cow_head(skb, dev->needed_headroom)) {
648                         dev->stats.tx_dropped++;
649                         dev_kfree_skb(skb);
650                         return;
651                 }
652         }
653
654         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
655                             ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df,
656                             !net_eq(tunnel->net, dev_net(dev)));
657         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
658
659         return;
660
661 #if IS_ENABLED(CONFIG_IPV6)
662 tx_error_icmp:
663         dst_link_failure(skb);
664 #endif
665 tx_error:
666         dev->stats.tx_errors++;
667         dev_kfree_skb(skb);
668 }
669 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
670
671 static void ip_tunnel_update(struct ip_tunnel_net *itn,
672                              struct ip_tunnel *t,
673                              struct net_device *dev,
674                              struct ip_tunnel_parm *p,
675                              bool set_mtu)
676 {
677         ip_tunnel_del(t);
678         t->parms.iph.saddr = p->iph.saddr;
679         t->parms.iph.daddr = p->iph.daddr;
680         t->parms.i_key = p->i_key;
681         t->parms.o_key = p->o_key;
682         if (dev->type != ARPHRD_ETHER) {
683                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
684                 memcpy(dev->broadcast, &p->iph.daddr, 4);
685         }
686         ip_tunnel_add(itn, t);
687
688         t->parms.iph.ttl = p->iph.ttl;
689         t->parms.iph.tos = p->iph.tos;
690         t->parms.iph.frag_off = p->iph.frag_off;
691
692         if (t->parms.link != p->link) {
693                 int mtu;
694
695                 t->parms.link = p->link;
696                 mtu = ip_tunnel_bind_dev(dev);
697                 if (set_mtu)
698                         dev->mtu = mtu;
699         }
700         netdev_state_change(dev);
701 }
702
703 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
704 {
705         int err = 0;
706         struct ip_tunnel *t;
707         struct net *net = dev_net(dev);
708         struct ip_tunnel *tunnel = netdev_priv(dev);
709         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
710
711         BUG_ON(!itn->fb_tunnel_dev);
712         switch (cmd) {
713         case SIOCGETTUNNEL:
714                 t = NULL;
715                 if (dev == itn->fb_tunnel_dev)
716                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
717                 if (t == NULL)
718                         t = netdev_priv(dev);
719                 memcpy(p, &t->parms, sizeof(*p));
720                 break;
721
722         case SIOCADDTUNNEL:
723         case SIOCCHGTUNNEL:
724                 err = -EPERM;
725                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
726                         goto done;
727                 if (p->iph.ttl)
728                         p->iph.frag_off |= htons(IP_DF);
729                 if (!(p->i_flags&TUNNEL_KEY))
730                         p->i_key = 0;
731                 if (!(p->o_flags&TUNNEL_KEY))
732                         p->o_key = 0;
733
734                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
735
736                 if (!t && (cmd == SIOCADDTUNNEL))
737                         t = ip_tunnel_create(net, itn, p);
738
739                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
740                         if (t != NULL) {
741                                 if (t->dev != dev) {
742                                         err = -EEXIST;
743                                         break;
744                                 }
745                         } else {
746                                 unsigned int nflags = 0;
747
748                                 if (ipv4_is_multicast(p->iph.daddr))
749                                         nflags = IFF_BROADCAST;
750                                 else if (p->iph.daddr)
751                                         nflags = IFF_POINTOPOINT;
752
753                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
754                                         err = -EINVAL;
755                                         break;
756                                 }
757
758                                 t = netdev_priv(dev);
759                         }
760                 }
761
762                 if (t) {
763                         err = 0;
764                         ip_tunnel_update(itn, t, dev, p, true);
765                 } else
766                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
767                 break;
768
769         case SIOCDELTUNNEL:
770                 err = -EPERM;
771                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
772                         goto done;
773
774                 if (dev == itn->fb_tunnel_dev) {
775                         err = -ENOENT;
776                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
777                         if (t == NULL)
778                                 goto done;
779                         err = -EPERM;
780                         if (t == netdev_priv(itn->fb_tunnel_dev))
781                                 goto done;
782                         dev = t->dev;
783                 }
784                 unregister_netdevice(dev);
785                 err = 0;
786                 break;
787
788         default:
789                 err = -EINVAL;
790         }
791
792 done:
793         return err;
794 }
795 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
796
797 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
798 {
799         struct ip_tunnel *tunnel = netdev_priv(dev);
800         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
801
802         if (new_mtu < 68 ||
803             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
804                 return -EINVAL;
805         dev->mtu = new_mtu;
806         return 0;
807 }
808 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
809
810 static void ip_tunnel_dev_free(struct net_device *dev)
811 {
812         struct ip_tunnel *tunnel = netdev_priv(dev);
813
814         gro_cells_destroy(&tunnel->gro_cells);
815         free_percpu(dev->tstats);
816         free_netdev(dev);
817 }
818
819 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
820 {
821         struct ip_tunnel *tunnel = netdev_priv(dev);
822         struct ip_tunnel_net *itn;
823
824         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
825
826         if (itn->fb_tunnel_dev != dev) {
827                 ip_tunnel_del(netdev_priv(dev));
828                 unregister_netdevice_queue(dev, head);
829         }
830 }
831 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
832
833 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
834                                   struct rtnl_link_ops *ops, char *devname)
835 {
836         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
837         struct ip_tunnel_parm parms;
838         unsigned int i;
839
840         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
841                 INIT_HLIST_HEAD(&itn->tunnels[i]);
842
843         if (!ops) {
844                 itn->fb_tunnel_dev = NULL;
845                 return 0;
846         }
847
848         memset(&parms, 0, sizeof(parms));
849         if (devname)
850                 strlcpy(parms.name, devname, IFNAMSIZ);
851
852         rtnl_lock();
853         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
854         /* FB netdevice is special: we have one, and only one per netns.
855          * Allowing to move it to another netns is clearly unsafe.
856          */
857         if (!IS_ERR(itn->fb_tunnel_dev))
858                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
859         rtnl_unlock();
860
861         return PTR_RET(itn->fb_tunnel_dev);
862 }
863 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
864
865 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
866                               struct rtnl_link_ops *ops)
867 {
868         struct net *net = dev_net(itn->fb_tunnel_dev);
869         struct net_device *dev, *aux;
870         int h;
871
872         for_each_netdev_safe(net, dev, aux)
873                 if (dev->rtnl_link_ops == ops)
874                         unregister_netdevice_queue(dev, head);
875
876         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
877                 struct ip_tunnel *t;
878                 struct hlist_node *n;
879                 struct hlist_head *thead = &itn->tunnels[h];
880
881                 hlist_for_each_entry_safe(t, n, thead, hash_node)
882                         /* If dev is in the same netns, it has already
883                          * been added to the list by the previous loop.
884                          */
885                         if (!net_eq(dev_net(t->dev), net))
886                                 unregister_netdevice_queue(t->dev, head);
887         }
888         if (itn->fb_tunnel_dev)
889                 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
890 }
891
892 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
893 {
894         LIST_HEAD(list);
895
896         rtnl_lock();
897         ip_tunnel_destroy(itn, &list, ops);
898         unregister_netdevice_many(&list);
899         rtnl_unlock();
900 }
901 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
902
903 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
904                       struct ip_tunnel_parm *p)
905 {
906         struct ip_tunnel *nt;
907         struct net *net = dev_net(dev);
908         struct ip_tunnel_net *itn;
909         int mtu;
910         int err;
911
912         nt = netdev_priv(dev);
913         itn = net_generic(net, nt->ip_tnl_net_id);
914
915         if (ip_tunnel_find(itn, p, dev->type))
916                 return -EEXIST;
917
918         nt->net = net;
919         nt->parms = *p;
920         err = register_netdevice(dev);
921         if (err)
922                 goto out;
923
924         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
925                 eth_hw_addr_random(dev);
926
927         mtu = ip_tunnel_bind_dev(dev);
928         if (!tb[IFLA_MTU])
929                 dev->mtu = mtu;
930
931         ip_tunnel_add(itn, nt);
932
933 out:
934         return err;
935 }
936 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
937
938 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
939                          struct ip_tunnel_parm *p)
940 {
941         struct ip_tunnel *t;
942         struct ip_tunnel *tunnel = netdev_priv(dev);
943         struct net *net = tunnel->net;
944         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
945
946         if (dev == itn->fb_tunnel_dev)
947                 return -EINVAL;
948
949         t = ip_tunnel_find(itn, p, dev->type);
950
951         if (t) {
952                 if (t->dev != dev)
953                         return -EEXIST;
954         } else {
955                 t = tunnel;
956
957                 if (dev->type != ARPHRD_ETHER) {
958                         unsigned int nflags = 0;
959
960                         if (ipv4_is_multicast(p->iph.daddr))
961                                 nflags = IFF_BROADCAST;
962                         else if (p->iph.daddr)
963                                 nflags = IFF_POINTOPOINT;
964
965                         if ((dev->flags ^ nflags) &
966                             (IFF_POINTOPOINT | IFF_BROADCAST))
967                                 return -EINVAL;
968                 }
969         }
970
971         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
972         return 0;
973 }
974 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
975
976 int ip_tunnel_init(struct net_device *dev)
977 {
978         struct ip_tunnel *tunnel = netdev_priv(dev);
979         struct iphdr *iph = &tunnel->parms.iph;
980         int err;
981
982         dev->destructor = ip_tunnel_dev_free;
983         dev->tstats = alloc_percpu(struct pcpu_tstats);
984         if (!dev->tstats)
985                 return -ENOMEM;
986
987         err = gro_cells_init(&tunnel->gro_cells, dev);
988         if (err) {
989                 free_percpu(dev->tstats);
990                 return err;
991         }
992
993         tunnel->dev = dev;
994         tunnel->net = dev_net(dev);
995         strcpy(tunnel->parms.name, dev->name);
996         iph->version            = 4;
997         iph->ihl                = 5;
998
999         return 0;
1000 }
1001 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1002
1003 void ip_tunnel_uninit(struct net_device *dev)
1004 {
1005         struct ip_tunnel *tunnel = netdev_priv(dev);
1006         struct net *net = tunnel->net;
1007         struct ip_tunnel_net *itn;
1008
1009         itn = net_generic(net, tunnel->ip_tnl_net_id);
1010         /* fb_tunnel_dev will be unregisted in net-exit call. */
1011         if (itn->fb_tunnel_dev != dev)
1012                 ip_tunnel_del(netdev_priv(dev));
1013 }
1014 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1015
1016 /* Do least required initialization, rest of init is done in tunnel_init call */
1017 void ip_tunnel_setup(struct net_device *dev, int net_id)
1018 {
1019         struct ip_tunnel *tunnel = netdev_priv(dev);
1020         tunnel->ip_tnl_net_id = net_id;
1021 }
1022 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1023
1024 MODULE_LICENSE("GPL");