]> Pileus Git - ~andy/linux/blob - net/ipv4/ip_tunnel.c
ASoC: txx9aclc_ac97: Fix kernel crash on probe
[~andy/linux] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst)
73 {
74         struct dst_entry *old_dst;
75
76         if (dst) {
77                 if (dst->flags & DST_NOCACHE)
78                         dst = NULL;
79                 else
80                         dst_clone(dst);
81         }
82         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83         dst_release(old_dst);
84 }
85
86 static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87 {
88         __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
89 }
90
91 static void tunnel_dst_reset(struct ip_tunnel *t)
92 {
93         tunnel_dst_set(t, NULL);
94 }
95
96 static void tunnel_dst_reset_all(struct ip_tunnel *t)
97 {
98         int i;
99
100         for_each_possible_cpu(i)
101                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102 }
103
104 static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
105 {
106         struct dst_entry *dst;
107
108         rcu_read_lock();
109         dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
110         if (dst)
111                 dst_hold(dst);
112         rcu_read_unlock();
113         return dst;
114 }
115
116 static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
117 {
118         struct dst_entry *dst = tunnel_dst_get(t);
119
120         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
121                 tunnel_dst_reset(t);
122                 return NULL;
123         }
124
125         return dst;
126 }
127
128 /* Often modified stats are per cpu, other are shared (netdev->stats) */
129 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
130                                                 struct rtnl_link_stats64 *tot)
131 {
132         int i;
133
134         for_each_possible_cpu(i) {
135                 const struct pcpu_sw_netstats *tstats =
136                                                    per_cpu_ptr(dev->tstats, i);
137                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
138                 unsigned int start;
139
140                 do {
141                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
142                         rx_packets = tstats->rx_packets;
143                         tx_packets = tstats->tx_packets;
144                         rx_bytes = tstats->rx_bytes;
145                         tx_bytes = tstats->tx_bytes;
146                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
147
148                 tot->rx_packets += rx_packets;
149                 tot->tx_packets += tx_packets;
150                 tot->rx_bytes   += rx_bytes;
151                 tot->tx_bytes   += tx_bytes;
152         }
153
154         tot->multicast = dev->stats.multicast;
155
156         tot->rx_crc_errors = dev->stats.rx_crc_errors;
157         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
158         tot->rx_length_errors = dev->stats.rx_length_errors;
159         tot->rx_frame_errors = dev->stats.rx_frame_errors;
160         tot->rx_errors = dev->stats.rx_errors;
161
162         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
163         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
164         tot->tx_dropped = dev->stats.tx_dropped;
165         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
166         tot->tx_errors = dev->stats.tx_errors;
167
168         tot->collisions  = dev->stats.collisions;
169
170         return tot;
171 }
172 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
173
174 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
175                                 __be16 flags, __be32 key)
176 {
177         if (p->i_flags & TUNNEL_KEY) {
178                 if (flags & TUNNEL_KEY)
179                         return key == p->i_key;
180                 else
181                         /* key expected, none present */
182                         return false;
183         } else
184                 return !(flags & TUNNEL_KEY);
185 }
186
187 /* Fallback tunnel: no source, no destination, no key, no options
188
189    Tunnel hash table:
190    We require exact key match i.e. if a key is present in packet
191    it will match only tunnel with the same key; if it is not present,
192    it will match only keyless tunnel.
193
194    All keysless packets, if not matched configured keyless tunnels
195    will match fallback tunnel.
196    Given src, dst and key, find appropriate for input tunnel.
197 */
198 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
199                                    int link, __be16 flags,
200                                    __be32 remote, __be32 local,
201                                    __be32 key)
202 {
203         unsigned int hash;
204         struct ip_tunnel *t, *cand = NULL;
205         struct hlist_head *head;
206
207         hash = ip_tunnel_hash(key, remote);
208         head = &itn->tunnels[hash];
209
210         hlist_for_each_entry_rcu(t, head, hash_node) {
211                 if (local != t->parms.iph.saddr ||
212                     remote != t->parms.iph.daddr ||
213                     !(t->dev->flags & IFF_UP))
214                         continue;
215
216                 if (!ip_tunnel_key_match(&t->parms, flags, key))
217                         continue;
218
219                 if (t->parms.link == link)
220                         return t;
221                 else
222                         cand = t;
223         }
224
225         hlist_for_each_entry_rcu(t, head, hash_node) {
226                 if (remote != t->parms.iph.daddr ||
227                     !(t->dev->flags & IFF_UP))
228                         continue;
229
230                 if (!ip_tunnel_key_match(&t->parms, flags, key))
231                         continue;
232
233                 if (t->parms.link == link)
234                         return t;
235                 else if (!cand)
236                         cand = t;
237         }
238
239         hash = ip_tunnel_hash(key, 0);
240         head = &itn->tunnels[hash];
241
242         hlist_for_each_entry_rcu(t, head, hash_node) {
243                 if ((local != t->parms.iph.saddr &&
244                      (local != t->parms.iph.daddr ||
245                       !ipv4_is_multicast(local))) ||
246                     !(t->dev->flags & IFF_UP))
247                         continue;
248
249                 if (!ip_tunnel_key_match(&t->parms, flags, key))
250                         continue;
251
252                 if (t->parms.link == link)
253                         return t;
254                 else if (!cand)
255                         cand = t;
256         }
257
258         if (flags & TUNNEL_NO_KEY)
259                 goto skip_key_lookup;
260
261         hlist_for_each_entry_rcu(t, head, hash_node) {
262                 if (t->parms.i_key != key ||
263                     !(t->dev->flags & IFF_UP))
264                         continue;
265
266                 if (t->parms.link == link)
267                         return t;
268                 else if (!cand)
269                         cand = t;
270         }
271
272 skip_key_lookup:
273         if (cand)
274                 return cand;
275
276         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
277                 return netdev_priv(itn->fb_tunnel_dev);
278
279
280         return NULL;
281 }
282 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
283
284 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
285                                     struct ip_tunnel_parm *parms)
286 {
287         unsigned int h;
288         __be32 remote;
289
290         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
291                 remote = parms->iph.daddr;
292         else
293                 remote = 0;
294
295         h = ip_tunnel_hash(parms->i_key, remote);
296         return &itn->tunnels[h];
297 }
298
299 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
300 {
301         struct hlist_head *head = ip_bucket(itn, &t->parms);
302
303         hlist_add_head_rcu(&t->hash_node, head);
304 }
305
306 static void ip_tunnel_del(struct ip_tunnel *t)
307 {
308         hlist_del_init_rcu(&t->hash_node);
309 }
310
311 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
312                                         struct ip_tunnel_parm *parms,
313                                         int type)
314 {
315         __be32 remote = parms->iph.daddr;
316         __be32 local = parms->iph.saddr;
317         __be32 key = parms->i_key;
318         int link = parms->link;
319         struct ip_tunnel *t = NULL;
320         struct hlist_head *head = ip_bucket(itn, parms);
321
322         hlist_for_each_entry_rcu(t, head, hash_node) {
323                 if (local == t->parms.iph.saddr &&
324                     remote == t->parms.iph.daddr &&
325                     key == t->parms.i_key &&
326                     link == t->parms.link &&
327                     type == t->dev->type)
328                         break;
329         }
330         return t;
331 }
332
333 static struct net_device *__ip_tunnel_create(struct net *net,
334                                              const struct rtnl_link_ops *ops,
335                                              struct ip_tunnel_parm *parms)
336 {
337         int err;
338         struct ip_tunnel *tunnel;
339         struct net_device *dev;
340         char name[IFNAMSIZ];
341
342         if (parms->name[0])
343                 strlcpy(name, parms->name, IFNAMSIZ);
344         else {
345                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
346                         err = -E2BIG;
347                         goto failed;
348                 }
349                 strlcpy(name, ops->kind, IFNAMSIZ);
350                 strncat(name, "%d", 2);
351         }
352
353         ASSERT_RTNL();
354         dev = alloc_netdev(ops->priv_size, name, ops->setup);
355         if (!dev) {
356                 err = -ENOMEM;
357                 goto failed;
358         }
359         dev_net_set(dev, net);
360
361         dev->rtnl_link_ops = ops;
362
363         tunnel = netdev_priv(dev);
364         tunnel->parms = *parms;
365         tunnel->net = net;
366
367         err = register_netdevice(dev);
368         if (err)
369                 goto failed_free;
370
371         return dev;
372
373 failed_free:
374         free_netdev(dev);
375 failed:
376         return ERR_PTR(err);
377 }
378
379 static inline void init_tunnel_flow(struct flowi4 *fl4,
380                                     int proto,
381                                     __be32 daddr, __be32 saddr,
382                                     __be32 key, __u8 tos, int oif)
383 {
384         memset(fl4, 0, sizeof(*fl4));
385         fl4->flowi4_oif = oif;
386         fl4->daddr = daddr;
387         fl4->saddr = saddr;
388         fl4->flowi4_tos = tos;
389         fl4->flowi4_proto = proto;
390         fl4->fl4_gre_key = key;
391 }
392
393 static int ip_tunnel_bind_dev(struct net_device *dev)
394 {
395         struct net_device *tdev = NULL;
396         struct ip_tunnel *tunnel = netdev_priv(dev);
397         const struct iphdr *iph;
398         int hlen = LL_MAX_HEADER;
399         int mtu = ETH_DATA_LEN;
400         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
401
402         iph = &tunnel->parms.iph;
403
404         /* Guess output device to choose reasonable mtu and needed_headroom */
405         if (iph->daddr) {
406                 struct flowi4 fl4;
407                 struct rtable *rt;
408
409                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
410                                  iph->saddr, tunnel->parms.o_key,
411                                  RT_TOS(iph->tos), tunnel->parms.link);
412                 rt = ip_route_output_key(tunnel->net, &fl4);
413
414                 if (!IS_ERR(rt)) {
415                         tdev = rt->dst.dev;
416                         tunnel_dst_set(tunnel, &rt->dst);
417                         ip_rt_put(rt);
418                 }
419                 if (dev->type != ARPHRD_ETHER)
420                         dev->flags |= IFF_POINTOPOINT;
421         }
422
423         if (!tdev && tunnel->parms.link)
424                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
425
426         if (tdev) {
427                 hlen = tdev->hard_header_len + tdev->needed_headroom;
428                 mtu = tdev->mtu;
429         }
430         dev->iflink = tunnel->parms.link;
431
432         dev->needed_headroom = t_hlen + hlen;
433         mtu -= (dev->hard_header_len + t_hlen);
434
435         if (mtu < 68)
436                 mtu = 68;
437
438         return mtu;
439 }
440
441 static struct ip_tunnel *ip_tunnel_create(struct net *net,
442                                           struct ip_tunnel_net *itn,
443                                           struct ip_tunnel_parm *parms)
444 {
445         struct ip_tunnel *nt, *fbt;
446         struct net_device *dev;
447
448         BUG_ON(!itn->fb_tunnel_dev);
449         fbt = netdev_priv(itn->fb_tunnel_dev);
450         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
451         if (IS_ERR(dev))
452                 return NULL;
453
454         dev->mtu = ip_tunnel_bind_dev(dev);
455
456         nt = netdev_priv(dev);
457         ip_tunnel_add(itn, nt);
458         return nt;
459 }
460
461 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
462                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
463 {
464         struct pcpu_sw_netstats *tstats;
465         const struct iphdr *iph = ip_hdr(skb);
466         int err;
467
468 #ifdef CONFIG_NET_IPGRE_BROADCAST
469         if (ipv4_is_multicast(iph->daddr)) {
470                 /* Looped back packet, drop it! */
471                 if (rt_is_output_route(skb_rtable(skb)))
472                         goto drop;
473                 tunnel->dev->stats.multicast++;
474                 skb->pkt_type = PACKET_BROADCAST;
475         }
476 #endif
477
478         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
479              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
480                 tunnel->dev->stats.rx_crc_errors++;
481                 tunnel->dev->stats.rx_errors++;
482                 goto drop;
483         }
484
485         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
486                 if (!(tpi->flags&TUNNEL_SEQ) ||
487                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
488                         tunnel->dev->stats.rx_fifo_errors++;
489                         tunnel->dev->stats.rx_errors++;
490                         goto drop;
491                 }
492                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
493         }
494
495         err = IP_ECN_decapsulate(iph, skb);
496         if (unlikely(err)) {
497                 if (log_ecn_error)
498                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
499                                         &iph->saddr, iph->tos);
500                 if (err > 1) {
501                         ++tunnel->dev->stats.rx_frame_errors;
502                         ++tunnel->dev->stats.rx_errors;
503                         goto drop;
504                 }
505         }
506
507         tstats = this_cpu_ptr(tunnel->dev->tstats);
508         u64_stats_update_begin(&tstats->syncp);
509         tstats->rx_packets++;
510         tstats->rx_bytes += skb->len;
511         u64_stats_update_end(&tstats->syncp);
512
513         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
514
515         if (tunnel->dev->type == ARPHRD_ETHER) {
516                 skb->protocol = eth_type_trans(skb, tunnel->dev);
517                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
518         } else {
519                 skb->dev = tunnel->dev;
520         }
521
522         gro_cells_receive(&tunnel->gro_cells, skb);
523         return 0;
524
525 drop:
526         kfree_skb(skb);
527         return 0;
528 }
529 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
530
531 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
532                             struct rtable *rt, __be16 df)
533 {
534         struct ip_tunnel *tunnel = netdev_priv(dev);
535         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
536         int mtu;
537
538         if (df)
539                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
540                                         - sizeof(struct iphdr) - tunnel->hlen;
541         else
542                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
543
544         if (skb_dst(skb))
545                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
546
547         if (skb->protocol == htons(ETH_P_IP)) {
548                 if (!skb_is_gso(skb) &&
549                     (df & htons(IP_DF)) && mtu < pkt_size) {
550                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
551                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
552                         return -E2BIG;
553                 }
554         }
555 #if IS_ENABLED(CONFIG_IPV6)
556         else if (skb->protocol == htons(ETH_P_IPV6)) {
557                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
558
559                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
560                            mtu >= IPV6_MIN_MTU) {
561                         if ((tunnel->parms.iph.daddr &&
562                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
563                             rt6->rt6i_dst.plen == 128) {
564                                 rt6->rt6i_flags |= RTF_MODIFIED;
565                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
566                         }
567                 }
568
569                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
570                                         mtu < pkt_size) {
571                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
572                         return -E2BIG;
573                 }
574         }
575 #endif
576         return 0;
577 }
578
579 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
580                     const struct iphdr *tnl_params, const u8 protocol)
581 {
582         struct ip_tunnel *tunnel = netdev_priv(dev);
583         const struct iphdr *inner_iph;
584         struct flowi4 fl4;
585         u8     tos, ttl;
586         __be16 df;
587         struct rtable *rt = NULL;       /* Route to the other host */
588         unsigned int max_headroom;      /* The extra header space needed */
589         __be32 dst;
590         int err;
591         bool connected = true;
592
593         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
594
595         dst = tnl_params->daddr;
596         if (dst == 0) {
597                 /* NBMA tunnel */
598
599                 if (skb_dst(skb) == NULL) {
600                         dev->stats.tx_fifo_errors++;
601                         goto tx_error;
602                 }
603
604                 if (skb->protocol == htons(ETH_P_IP)) {
605                         rt = skb_rtable(skb);
606                         dst = rt_nexthop(rt, inner_iph->daddr);
607                 }
608 #if IS_ENABLED(CONFIG_IPV6)
609                 else if (skb->protocol == htons(ETH_P_IPV6)) {
610                         const struct in6_addr *addr6;
611                         struct neighbour *neigh;
612                         bool do_tx_error_icmp;
613                         int addr_type;
614
615                         neigh = dst_neigh_lookup(skb_dst(skb),
616                                                  &ipv6_hdr(skb)->daddr);
617                         if (neigh == NULL)
618                                 goto tx_error;
619
620                         addr6 = (const struct in6_addr *)&neigh->primary_key;
621                         addr_type = ipv6_addr_type(addr6);
622
623                         if (addr_type == IPV6_ADDR_ANY) {
624                                 addr6 = &ipv6_hdr(skb)->daddr;
625                                 addr_type = ipv6_addr_type(addr6);
626                         }
627
628                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
629                                 do_tx_error_icmp = true;
630                         else {
631                                 do_tx_error_icmp = false;
632                                 dst = addr6->s6_addr32[3];
633                         }
634                         neigh_release(neigh);
635                         if (do_tx_error_icmp)
636                                 goto tx_error_icmp;
637                 }
638 #endif
639                 else
640                         goto tx_error;
641
642                 connected = false;
643         }
644
645         tos = tnl_params->tos;
646         if (tos & 0x1) {
647                 tos &= ~0x1;
648                 if (skb->protocol == htons(ETH_P_IP)) {
649                         tos = inner_iph->tos;
650                         connected = false;
651                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
652                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
653                         connected = false;
654                 }
655         }
656
657         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
658                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
659
660         if (connected)
661                 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
662
663         if (!rt) {
664                 rt = ip_route_output_key(tunnel->net, &fl4);
665
666                 if (IS_ERR(rt)) {
667                         dev->stats.tx_carrier_errors++;
668                         goto tx_error;
669                 }
670                 if (connected)
671                         tunnel_dst_set(tunnel, &rt->dst);
672         }
673
674         if (rt->dst.dev == dev) {
675                 ip_rt_put(rt);
676                 dev->stats.collisions++;
677                 goto tx_error;
678         }
679
680         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
681                 ip_rt_put(rt);
682                 goto tx_error;
683         }
684
685         if (tunnel->err_count > 0) {
686                 if (time_before(jiffies,
687                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
688                         tunnel->err_count--;
689
690                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
691                         dst_link_failure(skb);
692                 } else
693                         tunnel->err_count = 0;
694         }
695
696         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
697         ttl = tnl_params->ttl;
698         if (ttl == 0) {
699                 if (skb->protocol == htons(ETH_P_IP))
700                         ttl = inner_iph->ttl;
701 #if IS_ENABLED(CONFIG_IPV6)
702                 else if (skb->protocol == htons(ETH_P_IPV6))
703                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
704 #endif
705                 else
706                         ttl = ip4_dst_hoplimit(&rt->dst);
707         }
708
709         df = tnl_params->frag_off;
710         if (skb->protocol == htons(ETH_P_IP))
711                 df |= (inner_iph->frag_off&htons(IP_DF));
712
713         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
714                         + rt->dst.header_len;
715         if (max_headroom > dev->needed_headroom)
716                 dev->needed_headroom = max_headroom;
717
718         if (skb_cow_head(skb, dev->needed_headroom)) {
719                 dev->stats.tx_dropped++;
720                 kfree_skb(skb);
721                 return;
722         }
723
724         err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
725                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
726         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
727
728         return;
729
730 #if IS_ENABLED(CONFIG_IPV6)
731 tx_error_icmp:
732         dst_link_failure(skb);
733 #endif
734 tx_error:
735         dev->stats.tx_errors++;
736         kfree_skb(skb);
737 }
738 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
739
740 static void ip_tunnel_update(struct ip_tunnel_net *itn,
741                              struct ip_tunnel *t,
742                              struct net_device *dev,
743                              struct ip_tunnel_parm *p,
744                              bool set_mtu)
745 {
746         ip_tunnel_del(t);
747         t->parms.iph.saddr = p->iph.saddr;
748         t->parms.iph.daddr = p->iph.daddr;
749         t->parms.i_key = p->i_key;
750         t->parms.o_key = p->o_key;
751         if (dev->type != ARPHRD_ETHER) {
752                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
753                 memcpy(dev->broadcast, &p->iph.daddr, 4);
754         }
755         ip_tunnel_add(itn, t);
756
757         t->parms.iph.ttl = p->iph.ttl;
758         t->parms.iph.tos = p->iph.tos;
759         t->parms.iph.frag_off = p->iph.frag_off;
760
761         if (t->parms.link != p->link) {
762                 int mtu;
763
764                 t->parms.link = p->link;
765                 mtu = ip_tunnel_bind_dev(dev);
766                 if (set_mtu)
767                         dev->mtu = mtu;
768         }
769         tunnel_dst_reset_all(t);
770         netdev_state_change(dev);
771 }
772
773 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
774 {
775         int err = 0;
776         struct ip_tunnel *t;
777         struct net *net = dev_net(dev);
778         struct ip_tunnel *tunnel = netdev_priv(dev);
779         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
780
781         BUG_ON(!itn->fb_tunnel_dev);
782         switch (cmd) {
783         case SIOCGETTUNNEL:
784                 t = NULL;
785                 if (dev == itn->fb_tunnel_dev)
786                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
787                 if (t == NULL)
788                         t = netdev_priv(dev);
789                 memcpy(p, &t->parms, sizeof(*p));
790                 break;
791
792         case SIOCADDTUNNEL:
793         case SIOCCHGTUNNEL:
794                 err = -EPERM;
795                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
796                         goto done;
797                 if (p->iph.ttl)
798                         p->iph.frag_off |= htons(IP_DF);
799                 if (!(p->i_flags&TUNNEL_KEY))
800                         p->i_key = 0;
801                 if (!(p->o_flags&TUNNEL_KEY))
802                         p->o_key = 0;
803
804                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
805
806                 if (!t && (cmd == SIOCADDTUNNEL))
807                         t = ip_tunnel_create(net, itn, p);
808
809                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
810                         if (t != NULL) {
811                                 if (t->dev != dev) {
812                                         err = -EEXIST;
813                                         break;
814                                 }
815                         } else {
816                                 unsigned int nflags = 0;
817
818                                 if (ipv4_is_multicast(p->iph.daddr))
819                                         nflags = IFF_BROADCAST;
820                                 else if (p->iph.daddr)
821                                         nflags = IFF_POINTOPOINT;
822
823                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
824                                         err = -EINVAL;
825                                         break;
826                                 }
827
828                                 t = netdev_priv(dev);
829                         }
830                 }
831
832                 if (t) {
833                         err = 0;
834                         ip_tunnel_update(itn, t, dev, p, true);
835                 } else
836                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
837                 break;
838
839         case SIOCDELTUNNEL:
840                 err = -EPERM;
841                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
842                         goto done;
843
844                 if (dev == itn->fb_tunnel_dev) {
845                         err = -ENOENT;
846                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
847                         if (t == NULL)
848                                 goto done;
849                         err = -EPERM;
850                         if (t == netdev_priv(itn->fb_tunnel_dev))
851                                 goto done;
852                         dev = t->dev;
853                 }
854                 unregister_netdevice(dev);
855                 err = 0;
856                 break;
857
858         default:
859                 err = -EINVAL;
860         }
861
862 done:
863         return err;
864 }
865 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
866
867 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
868 {
869         struct ip_tunnel *tunnel = netdev_priv(dev);
870         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
871
872         if (new_mtu < 68 ||
873             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
874                 return -EINVAL;
875         dev->mtu = new_mtu;
876         return 0;
877 }
878 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
879
880 static void ip_tunnel_dev_free(struct net_device *dev)
881 {
882         struct ip_tunnel *tunnel = netdev_priv(dev);
883
884         gro_cells_destroy(&tunnel->gro_cells);
885         free_percpu(tunnel->dst_cache);
886         free_percpu(dev->tstats);
887         free_netdev(dev);
888 }
889
890 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
891 {
892         struct ip_tunnel *tunnel = netdev_priv(dev);
893         struct ip_tunnel_net *itn;
894
895         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
896
897         if (itn->fb_tunnel_dev != dev) {
898                 ip_tunnel_del(netdev_priv(dev));
899                 unregister_netdevice_queue(dev, head);
900         }
901 }
902 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
903
904 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
905                                   struct rtnl_link_ops *ops, char *devname)
906 {
907         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
908         struct ip_tunnel_parm parms;
909         unsigned int i;
910
911         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
912                 INIT_HLIST_HEAD(&itn->tunnels[i]);
913
914         if (!ops) {
915                 itn->fb_tunnel_dev = NULL;
916                 return 0;
917         }
918
919         memset(&parms, 0, sizeof(parms));
920         if (devname)
921                 strlcpy(parms.name, devname, IFNAMSIZ);
922
923         rtnl_lock();
924         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
925         /* FB netdevice is special: we have one, and only one per netns.
926          * Allowing to move it to another netns is clearly unsafe.
927          */
928         if (!IS_ERR(itn->fb_tunnel_dev)) {
929                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
930                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
931         }
932         rtnl_unlock();
933
934         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
935 }
936 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
937
938 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
939                               struct rtnl_link_ops *ops)
940 {
941         struct net *net = dev_net(itn->fb_tunnel_dev);
942         struct net_device *dev, *aux;
943         int h;
944
945         for_each_netdev_safe(net, dev, aux)
946                 if (dev->rtnl_link_ops == ops)
947                         unregister_netdevice_queue(dev, head);
948
949         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
950                 struct ip_tunnel *t;
951                 struct hlist_node *n;
952                 struct hlist_head *thead = &itn->tunnels[h];
953
954                 hlist_for_each_entry_safe(t, n, thead, hash_node)
955                         /* If dev is in the same netns, it has already
956                          * been added to the list by the previous loop.
957                          */
958                         if (!net_eq(dev_net(t->dev), net))
959                                 unregister_netdevice_queue(t->dev, head);
960         }
961 }
962
963 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
964 {
965         LIST_HEAD(list);
966
967         rtnl_lock();
968         ip_tunnel_destroy(itn, &list, ops);
969         unregister_netdevice_many(&list);
970         rtnl_unlock();
971 }
972 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
973
974 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
975                       struct ip_tunnel_parm *p)
976 {
977         struct ip_tunnel *nt;
978         struct net *net = dev_net(dev);
979         struct ip_tunnel_net *itn;
980         int mtu;
981         int err;
982
983         nt = netdev_priv(dev);
984         itn = net_generic(net, nt->ip_tnl_net_id);
985
986         if (ip_tunnel_find(itn, p, dev->type))
987                 return -EEXIST;
988
989         nt->net = net;
990         nt->parms = *p;
991         err = register_netdevice(dev);
992         if (err)
993                 goto out;
994
995         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
996                 eth_hw_addr_random(dev);
997
998         mtu = ip_tunnel_bind_dev(dev);
999         if (!tb[IFLA_MTU])
1000                 dev->mtu = mtu;
1001
1002         ip_tunnel_add(itn, nt);
1003
1004 out:
1005         return err;
1006 }
1007 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1008
1009 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1010                          struct ip_tunnel_parm *p)
1011 {
1012         struct ip_tunnel *t;
1013         struct ip_tunnel *tunnel = netdev_priv(dev);
1014         struct net *net = tunnel->net;
1015         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1016
1017         if (dev == itn->fb_tunnel_dev)
1018                 return -EINVAL;
1019
1020         t = ip_tunnel_find(itn, p, dev->type);
1021
1022         if (t) {
1023                 if (t->dev != dev)
1024                         return -EEXIST;
1025         } else {
1026                 t = tunnel;
1027
1028                 if (dev->type != ARPHRD_ETHER) {
1029                         unsigned int nflags = 0;
1030
1031                         if (ipv4_is_multicast(p->iph.daddr))
1032                                 nflags = IFF_BROADCAST;
1033                         else if (p->iph.daddr)
1034                                 nflags = IFF_POINTOPOINT;
1035
1036                         if ((dev->flags ^ nflags) &
1037                             (IFF_POINTOPOINT | IFF_BROADCAST))
1038                                 return -EINVAL;
1039                 }
1040         }
1041
1042         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1043         return 0;
1044 }
1045 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1046
1047 int ip_tunnel_init(struct net_device *dev)
1048 {
1049         struct ip_tunnel *tunnel = netdev_priv(dev);
1050         struct iphdr *iph = &tunnel->parms.iph;
1051         int i, err;
1052
1053         dev->destructor = ip_tunnel_dev_free;
1054         dev->tstats = alloc_percpu(struct pcpu_sw_netstats);
1055         if (!dev->tstats)
1056                 return -ENOMEM;
1057
1058         for_each_possible_cpu(i) {
1059                 struct pcpu_sw_netstats *ipt_stats;
1060                 ipt_stats = per_cpu_ptr(dev->tstats, i);
1061                 u64_stats_init(&ipt_stats->syncp);
1062         }
1063
1064         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1065         if (!tunnel->dst_cache) {
1066                 free_percpu(dev->tstats);
1067                 return -ENOMEM;
1068         }
1069
1070         err = gro_cells_init(&tunnel->gro_cells, dev);
1071         if (err) {
1072                 free_percpu(tunnel->dst_cache);
1073                 free_percpu(dev->tstats);
1074                 return err;
1075         }
1076
1077         tunnel->dev = dev;
1078         tunnel->net = dev_net(dev);
1079         strcpy(tunnel->parms.name, dev->name);
1080         iph->version            = 4;
1081         iph->ihl                = 5;
1082
1083         return 0;
1084 }
1085 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1086
1087 void ip_tunnel_uninit(struct net_device *dev)
1088 {
1089         struct ip_tunnel *tunnel = netdev_priv(dev);
1090         struct net *net = tunnel->net;
1091         struct ip_tunnel_net *itn;
1092
1093         itn = net_generic(net, tunnel->ip_tnl_net_id);
1094         /* fb_tunnel_dev will be unregisted in net-exit call. */
1095         if (itn->fb_tunnel_dev != dev)
1096                 ip_tunnel_del(netdev_priv(dev));
1097
1098         tunnel_dst_reset_all(tunnel);
1099 }
1100 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1101
1102 /* Do least required initialization, rest of init is done in tunnel_init call */
1103 void ip_tunnel_setup(struct net_device *dev, int net_id)
1104 {
1105         struct ip_tunnel *tunnel = netdev_priv(dev);
1106         tunnel->ip_tnl_net_id = net_id;
1107 }
1108 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1109
1110 MODULE_LICENSE("GPL");