]> Pileus Git - ~andy/linux/blob - net/ipv4/ip_tunnel.c
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/shemminger...
[~andy/linux] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65                                    __be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73                                                 struct rtnl_link_stats64 *tot)
74 {
75         int i;
76
77         for_each_possible_cpu(i) {
78                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80                 unsigned int start;
81
82                 do {
83                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
84                         rx_packets = tstats->rx_packets;
85                         tx_packets = tstats->tx_packets;
86                         rx_bytes = tstats->rx_bytes;
87                         tx_bytes = tstats->tx_bytes;
88                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90                 tot->rx_packets += rx_packets;
91                 tot->tx_packets += tx_packets;
92                 tot->rx_bytes   += rx_bytes;
93                 tot->tx_bytes   += tx_bytes;
94         }
95
96         tot->multicast = dev->stats.multicast;
97
98         tot->rx_crc_errors = dev->stats.rx_crc_errors;
99         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100         tot->rx_length_errors = dev->stats.rx_length_errors;
101         tot->rx_frame_errors = dev->stats.rx_frame_errors;
102         tot->rx_errors = dev->stats.rx_errors;
103
104         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106         tot->tx_dropped = dev->stats.tx_dropped;
107         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108         tot->tx_errors = dev->stats.tx_errors;
109
110         tot->collisions  = dev->stats.collisions;
111
112         return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117                                 __be16 flags, __be32 key)
118 {
119         if (p->i_flags & TUNNEL_KEY) {
120                 if (flags & TUNNEL_KEY)
121                         return key == p->i_key;
122                 else
123                         /* key expected, none present */
124                         return false;
125         } else
126                 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131    Tunnel hash table:
132    We require exact key match i.e. if a key is present in packet
133    it will match only tunnel with the same key; if it is not present,
134    it will match only keyless tunnel.
135
136    All keysless packets, if not matched configured keyless tunnels
137    will match fallback tunnel.
138    Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141                                    int link, __be16 flags,
142                                    __be32 remote, __be32 local,
143                                    __be32 key)
144 {
145         unsigned int hash;
146         struct ip_tunnel *t, *cand = NULL;
147         struct hlist_head *head;
148
149         hash = ip_tunnel_hash(itn, key, remote);
150         head = &itn->tunnels[hash];
151
152         hlist_for_each_entry_rcu(t, head, hash_node) {
153                 if (local != t->parms.iph.saddr ||
154                     remote != t->parms.iph.daddr ||
155                     !(t->dev->flags & IFF_UP))
156                         continue;
157
158                 if (!ip_tunnel_key_match(&t->parms, flags, key))
159                         continue;
160
161                 if (t->parms.link == link)
162                         return t;
163                 else
164                         cand = t;
165         }
166
167         hlist_for_each_entry_rcu(t, head, hash_node) {
168                 if (remote != t->parms.iph.daddr ||
169                     !(t->dev->flags & IFF_UP))
170                         continue;
171
172                 if (!ip_tunnel_key_match(&t->parms, flags, key))
173                         continue;
174
175                 if (t->parms.link == link)
176                         return t;
177                 else if (!cand)
178                         cand = t;
179         }
180
181         hash = ip_tunnel_hash(itn, key, 0);
182         head = &itn->tunnels[hash];
183
184         hlist_for_each_entry_rcu(t, head, hash_node) {
185                 if ((local != t->parms.iph.saddr &&
186                      (local != t->parms.iph.daddr ||
187                       !ipv4_is_multicast(local))) ||
188                     !(t->dev->flags & IFF_UP))
189                         continue;
190
191                 if (!ip_tunnel_key_match(&t->parms, flags, key))
192                         continue;
193
194                 if (t->parms.link == link)
195                         return t;
196                 else if (!cand)
197                         cand = t;
198         }
199
200         if (flags & TUNNEL_NO_KEY)
201                 goto skip_key_lookup;
202
203         hlist_for_each_entry_rcu(t, head, hash_node) {
204                 if (t->parms.i_key != key ||
205                     !(t->dev->flags & IFF_UP))
206                         continue;
207
208                 if (t->parms.link == link)
209                         return t;
210                 else if (!cand)
211                         cand = t;
212         }
213
214 skip_key_lookup:
215         if (cand)
216                 return cand;
217
218         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219                 return netdev_priv(itn->fb_tunnel_dev);
220
221
222         return NULL;
223 }
224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227                                     struct ip_tunnel_parm *parms)
228 {
229         unsigned int h;
230         __be32 remote;
231
232         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233                 remote = parms->iph.daddr;
234         else
235                 remote = 0;
236
237         h = ip_tunnel_hash(itn, parms->i_key, remote);
238         return &itn->tunnels[h];
239 }
240
241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242 {
243         struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245         hlist_add_head_rcu(&t->hash_node, head);
246 }
247
248 static void ip_tunnel_del(struct ip_tunnel *t)
249 {
250         hlist_del_init_rcu(&t->hash_node);
251 }
252
253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254                                         struct ip_tunnel_parm *parms,
255                                         int type)
256 {
257         __be32 remote = parms->iph.daddr;
258         __be32 local = parms->iph.saddr;
259         __be32 key = parms->i_key;
260         int link = parms->link;
261         struct ip_tunnel *t = NULL;
262         struct hlist_head *head = ip_bucket(itn, parms);
263
264         hlist_for_each_entry_rcu(t, head, hash_node) {
265                 if (local == t->parms.iph.saddr &&
266                     remote == t->parms.iph.daddr &&
267                     key == t->parms.i_key &&
268                     link == t->parms.link &&
269                     type == t->dev->type)
270                         break;
271         }
272         return t;
273 }
274
275 static struct net_device *__ip_tunnel_create(struct net *net,
276                                              const struct rtnl_link_ops *ops,
277                                              struct ip_tunnel_parm *parms)
278 {
279         int err;
280         struct ip_tunnel *tunnel;
281         struct net_device *dev;
282         char name[IFNAMSIZ];
283
284         if (parms->name[0])
285                 strlcpy(name, parms->name, IFNAMSIZ);
286         else {
287                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288                         err = -E2BIG;
289                         goto failed;
290                 }
291                 strlcpy(name, ops->kind, IFNAMSIZ);
292                 strncat(name, "%d", 2);
293         }
294
295         ASSERT_RTNL();
296         dev = alloc_netdev(ops->priv_size, name, ops->setup);
297         if (!dev) {
298                 err = -ENOMEM;
299                 goto failed;
300         }
301         dev_net_set(dev, net);
302
303         dev->rtnl_link_ops = ops;
304
305         tunnel = netdev_priv(dev);
306         tunnel->parms = *parms;
307         tunnel->net = net;
308
309         err = register_netdevice(dev);
310         if (err)
311                 goto failed_free;
312
313         return dev;
314
315 failed_free:
316         free_netdev(dev);
317 failed:
318         return ERR_PTR(err);
319 }
320
321 static inline struct rtable *ip_route_output_tunnel(struct net *net,
322                                                     struct flowi4 *fl4,
323                                                     int proto,
324                                                     __be32 daddr, __be32 saddr,
325                                                     __be32 key, __u8 tos, int oif)
326 {
327         memset(fl4, 0, sizeof(*fl4));
328         fl4->flowi4_oif = oif;
329         fl4->daddr = daddr;
330         fl4->saddr = saddr;
331         fl4->flowi4_tos = tos;
332         fl4->flowi4_proto = proto;
333         fl4->fl4_gre_key = key;
334         return ip_route_output_key(net, fl4);
335 }
336
337 static int ip_tunnel_bind_dev(struct net_device *dev)
338 {
339         struct net_device *tdev = NULL;
340         struct ip_tunnel *tunnel = netdev_priv(dev);
341         const struct iphdr *iph;
342         int hlen = LL_MAX_HEADER;
343         int mtu = ETH_DATA_LEN;
344         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
345
346         iph = &tunnel->parms.iph;
347
348         /* Guess output device to choose reasonable mtu and needed_headroom */
349         if (iph->daddr) {
350                 struct flowi4 fl4;
351                 struct rtable *rt;
352
353                 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
354                                             tunnel->parms.iph.protocol,
355                                             iph->daddr, iph->saddr,
356                                             tunnel->parms.o_key,
357                                             RT_TOS(iph->tos),
358                                             tunnel->parms.link);
359                 if (!IS_ERR(rt)) {
360                         tdev = rt->dst.dev;
361                         ip_rt_put(rt);
362                 }
363                 if (dev->type != ARPHRD_ETHER)
364                         dev->flags |= IFF_POINTOPOINT;
365         }
366
367         if (!tdev && tunnel->parms.link)
368                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
369
370         if (tdev) {
371                 hlen = tdev->hard_header_len + tdev->needed_headroom;
372                 mtu = tdev->mtu;
373         }
374         dev->iflink = tunnel->parms.link;
375
376         dev->needed_headroom = t_hlen + hlen;
377         mtu -= (dev->hard_header_len + t_hlen);
378
379         if (mtu < 68)
380                 mtu = 68;
381
382         return mtu;
383 }
384
385 static struct ip_tunnel *ip_tunnel_create(struct net *net,
386                                           struct ip_tunnel_net *itn,
387                                           struct ip_tunnel_parm *parms)
388 {
389         struct ip_tunnel *nt, *fbt;
390         struct net_device *dev;
391
392         BUG_ON(!itn->fb_tunnel_dev);
393         fbt = netdev_priv(itn->fb_tunnel_dev);
394         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
395         if (IS_ERR(dev))
396                 return NULL;
397
398         dev->mtu = ip_tunnel_bind_dev(dev);
399
400         nt = netdev_priv(dev);
401         ip_tunnel_add(itn, nt);
402         return nt;
403 }
404
405 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
406                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
407 {
408         struct pcpu_tstats *tstats;
409         const struct iphdr *iph = ip_hdr(skb);
410         int err;
411
412 #ifdef CONFIG_NET_IPGRE_BROADCAST
413         if (ipv4_is_multicast(iph->daddr)) {
414                 /* Looped back packet, drop it! */
415                 if (rt_is_output_route(skb_rtable(skb)))
416                         goto drop;
417                 tunnel->dev->stats.multicast++;
418                 skb->pkt_type = PACKET_BROADCAST;
419         }
420 #endif
421
422         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
423              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
424                 tunnel->dev->stats.rx_crc_errors++;
425                 tunnel->dev->stats.rx_errors++;
426                 goto drop;
427         }
428
429         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
430                 if (!(tpi->flags&TUNNEL_SEQ) ||
431                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
432                         tunnel->dev->stats.rx_fifo_errors++;
433                         tunnel->dev->stats.rx_errors++;
434                         goto drop;
435                 }
436                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
437         }
438
439         err = IP_ECN_decapsulate(iph, skb);
440         if (unlikely(err)) {
441                 if (log_ecn_error)
442                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
443                                         &iph->saddr, iph->tos);
444                 if (err > 1) {
445                         ++tunnel->dev->stats.rx_frame_errors;
446                         ++tunnel->dev->stats.rx_errors;
447                         goto drop;
448                 }
449         }
450
451         tstats = this_cpu_ptr(tunnel->dev->tstats);
452         u64_stats_update_begin(&tstats->syncp);
453         tstats->rx_packets++;
454         tstats->rx_bytes += skb->len;
455         u64_stats_update_end(&tstats->syncp);
456
457         if (tunnel->net != dev_net(tunnel->dev))
458                 skb_scrub_packet(skb);
459
460         if (tunnel->dev->type == ARPHRD_ETHER) {
461                 skb->protocol = eth_type_trans(skb, tunnel->dev);
462                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
463         } else {
464                 skb->dev = tunnel->dev;
465         }
466         gro_cells_receive(&tunnel->gro_cells, skb);
467         return 0;
468
469 drop:
470         kfree_skb(skb);
471         return 0;
472 }
473 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
474
475 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
476                     const struct iphdr *tnl_params, const u8 protocol)
477 {
478         struct ip_tunnel *tunnel = netdev_priv(dev);
479         const struct iphdr *inner_iph;
480         struct flowi4 fl4;
481         u8     tos, ttl;
482         __be16 df;
483         struct rtable *rt;              /* Route to the other host */
484         unsigned int max_headroom;      /* The extra header space needed */
485         __be32 dst;
486         int mtu;
487         int err;
488
489         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
490
491         dst = tnl_params->daddr;
492         if (dst == 0) {
493                 /* NBMA tunnel */
494
495                 if (skb_dst(skb) == NULL) {
496                         dev->stats.tx_fifo_errors++;
497                         goto tx_error;
498                 }
499
500                 if (skb->protocol == htons(ETH_P_IP)) {
501                         rt = skb_rtable(skb);
502                         dst = rt_nexthop(rt, inner_iph->daddr);
503                 }
504 #if IS_ENABLED(CONFIG_IPV6)
505                 else if (skb->protocol == htons(ETH_P_IPV6)) {
506                         const struct in6_addr *addr6;
507                         struct neighbour *neigh;
508                         bool do_tx_error_icmp;
509                         int addr_type;
510
511                         neigh = dst_neigh_lookup(skb_dst(skb),
512                                                  &ipv6_hdr(skb)->daddr);
513                         if (neigh == NULL)
514                                 goto tx_error;
515
516                         addr6 = (const struct in6_addr *)&neigh->primary_key;
517                         addr_type = ipv6_addr_type(addr6);
518
519                         if (addr_type == IPV6_ADDR_ANY) {
520                                 addr6 = &ipv6_hdr(skb)->daddr;
521                                 addr_type = ipv6_addr_type(addr6);
522                         }
523
524                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
525                                 do_tx_error_icmp = true;
526                         else {
527                                 do_tx_error_icmp = false;
528                                 dst = addr6->s6_addr32[3];
529                         }
530                         neigh_release(neigh);
531                         if (do_tx_error_icmp)
532                                 goto tx_error_icmp;
533                 }
534 #endif
535                 else
536                         goto tx_error;
537         }
538
539         tos = tnl_params->tos;
540         if (tos & 0x1) {
541                 tos &= ~0x1;
542                 if (skb->protocol == htons(ETH_P_IP))
543                         tos = inner_iph->tos;
544                 else if (skb->protocol == htons(ETH_P_IPV6))
545                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
546         }
547
548         rt = ip_route_output_tunnel(tunnel->net, &fl4,
549                                     protocol,
550                                     dst, tnl_params->saddr,
551                                     tunnel->parms.o_key,
552                                     RT_TOS(tos),
553                                     tunnel->parms.link);
554         if (IS_ERR(rt)) {
555                 dev->stats.tx_carrier_errors++;
556                 goto tx_error;
557         }
558         if (rt->dst.dev == dev) {
559                 ip_rt_put(rt);
560                 dev->stats.collisions++;
561                 goto tx_error;
562         }
563         df = tnl_params->frag_off;
564
565         if (df)
566                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
567                                         - sizeof(struct iphdr);
568         else
569                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
570
571         if (skb_dst(skb))
572                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
573
574         if (skb->protocol == htons(ETH_P_IP)) {
575                 df |= (inner_iph->frag_off&htons(IP_DF));
576
577                 if (!skb_is_gso(skb) &&
578                     (inner_iph->frag_off&htons(IP_DF)) &&
579                      mtu < ntohs(inner_iph->tot_len)) {
580                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
581                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
582                         ip_rt_put(rt);
583                         goto tx_error;
584                 }
585         }
586 #if IS_ENABLED(CONFIG_IPV6)
587         else if (skb->protocol == htons(ETH_P_IPV6)) {
588                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
589
590                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
591                     mtu >= IPV6_MIN_MTU) {
592                         if ((tunnel->parms.iph.daddr &&
593                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
594                             rt6->rt6i_dst.plen == 128) {
595                                 rt6->rt6i_flags |= RTF_MODIFIED;
596                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
597                         }
598                 }
599
600                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
601                     mtu < skb->len) {
602                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
603                         ip_rt_put(rt);
604                         goto tx_error;
605                 }
606         }
607 #endif
608
609         if (tunnel->net != dev_net(dev))
610                 skb_scrub_packet(skb);
611
612         if (tunnel->err_count > 0) {
613                 if (time_before(jiffies,
614                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
615                         tunnel->err_count--;
616
617                         dst_link_failure(skb);
618                 } else
619                         tunnel->err_count = 0;
620         }
621
622         ttl = tnl_params->ttl;
623         if (ttl == 0) {
624                 if (skb->protocol == htons(ETH_P_IP))
625                         ttl = inner_iph->ttl;
626 #if IS_ENABLED(CONFIG_IPV6)
627                 else if (skb->protocol == htons(ETH_P_IPV6))
628                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
629 #endif
630                 else
631                         ttl = ip4_dst_hoplimit(&rt->dst);
632         }
633
634         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
635                         + rt->dst.header_len;
636         if (max_headroom > dev->needed_headroom) {
637                 dev->needed_headroom = max_headroom;
638                 if (skb_cow_head(skb, dev->needed_headroom)) {
639                         dev->stats.tx_dropped++;
640                         dev_kfree_skb(skb);
641                         return;
642                 }
643         }
644
645         err = iptunnel_xmit(dev_net(dev), rt, skb,
646                             fl4.saddr, fl4.daddr, protocol,
647                             ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df);
648         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
649
650         return;
651
652 #if IS_ENABLED(CONFIG_IPV6)
653 tx_error_icmp:
654         dst_link_failure(skb);
655 #endif
656 tx_error:
657         dev->stats.tx_errors++;
658         dev_kfree_skb(skb);
659 }
660 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
661
662 static void ip_tunnel_update(struct ip_tunnel_net *itn,
663                              struct ip_tunnel *t,
664                              struct net_device *dev,
665                              struct ip_tunnel_parm *p,
666                              bool set_mtu)
667 {
668         ip_tunnel_del(t);
669         t->parms.iph.saddr = p->iph.saddr;
670         t->parms.iph.daddr = p->iph.daddr;
671         t->parms.i_key = p->i_key;
672         t->parms.o_key = p->o_key;
673         if (dev->type != ARPHRD_ETHER) {
674                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
675                 memcpy(dev->broadcast, &p->iph.daddr, 4);
676         }
677         ip_tunnel_add(itn, t);
678
679         t->parms.iph.ttl = p->iph.ttl;
680         t->parms.iph.tos = p->iph.tos;
681         t->parms.iph.frag_off = p->iph.frag_off;
682
683         if (t->parms.link != p->link) {
684                 int mtu;
685
686                 t->parms.link = p->link;
687                 mtu = ip_tunnel_bind_dev(dev);
688                 if (set_mtu)
689                         dev->mtu = mtu;
690         }
691         netdev_state_change(dev);
692 }
693
694 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
695 {
696         int err = 0;
697         struct ip_tunnel *t;
698         struct net *net = dev_net(dev);
699         struct ip_tunnel *tunnel = netdev_priv(dev);
700         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
701
702         BUG_ON(!itn->fb_tunnel_dev);
703         switch (cmd) {
704         case SIOCGETTUNNEL:
705                 t = NULL;
706                 if (dev == itn->fb_tunnel_dev)
707                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
708                 if (t == NULL)
709                         t = netdev_priv(dev);
710                 memcpy(p, &t->parms, sizeof(*p));
711                 break;
712
713         case SIOCADDTUNNEL:
714         case SIOCCHGTUNNEL:
715                 err = -EPERM;
716                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
717                         goto done;
718                 if (p->iph.ttl)
719                         p->iph.frag_off |= htons(IP_DF);
720                 if (!(p->i_flags&TUNNEL_KEY))
721                         p->i_key = 0;
722                 if (!(p->o_flags&TUNNEL_KEY))
723                         p->o_key = 0;
724
725                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
726
727                 if (!t && (cmd == SIOCADDTUNNEL))
728                         t = ip_tunnel_create(net, itn, p);
729
730                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
731                         if (t != NULL) {
732                                 if (t->dev != dev) {
733                                         err = -EEXIST;
734                                         break;
735                                 }
736                         } else {
737                                 unsigned int nflags = 0;
738
739                                 if (ipv4_is_multicast(p->iph.daddr))
740                                         nflags = IFF_BROADCAST;
741                                 else if (p->iph.daddr)
742                                         nflags = IFF_POINTOPOINT;
743
744                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
745                                         err = -EINVAL;
746                                         break;
747                                 }
748
749                                 t = netdev_priv(dev);
750                         }
751                 }
752
753                 if (t) {
754                         err = 0;
755                         ip_tunnel_update(itn, t, dev, p, true);
756                 } else
757                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
758                 break;
759
760         case SIOCDELTUNNEL:
761                 err = -EPERM;
762                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
763                         goto done;
764
765                 if (dev == itn->fb_tunnel_dev) {
766                         err = -ENOENT;
767                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
768                         if (t == NULL)
769                                 goto done;
770                         err = -EPERM;
771                         if (t == netdev_priv(itn->fb_tunnel_dev))
772                                 goto done;
773                         dev = t->dev;
774                 }
775                 unregister_netdevice(dev);
776                 err = 0;
777                 break;
778
779         default:
780                 err = -EINVAL;
781         }
782
783 done:
784         return err;
785 }
786 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
787
788 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
789 {
790         struct ip_tunnel *tunnel = netdev_priv(dev);
791         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
792
793         if (new_mtu < 68 ||
794             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
795                 return -EINVAL;
796         dev->mtu = new_mtu;
797         return 0;
798 }
799 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
800
801 static void ip_tunnel_dev_free(struct net_device *dev)
802 {
803         struct ip_tunnel *tunnel = netdev_priv(dev);
804
805         gro_cells_destroy(&tunnel->gro_cells);
806         free_percpu(dev->tstats);
807         free_netdev(dev);
808 }
809
810 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
811 {
812         struct net *net = dev_net(dev);
813         struct ip_tunnel *tunnel = netdev_priv(dev);
814         struct ip_tunnel_net *itn;
815
816         itn = net_generic(net, tunnel->ip_tnl_net_id);
817
818         if (itn->fb_tunnel_dev != dev) {
819                 ip_tunnel_del(netdev_priv(dev));
820                 unregister_netdevice_queue(dev, head);
821         }
822 }
823 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
824
825 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
826                                   struct rtnl_link_ops *ops, char *devname)
827 {
828         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
829         struct ip_tunnel_parm parms;
830
831         itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
832         if (!itn->tunnels)
833                 return -ENOMEM;
834
835         if (!ops) {
836                 itn->fb_tunnel_dev = NULL;
837                 return 0;
838         }
839         memset(&parms, 0, sizeof(parms));
840         if (devname)
841                 strlcpy(parms.name, devname, IFNAMSIZ);
842
843         rtnl_lock();
844         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
845         rtnl_unlock();
846         if (IS_ERR(itn->fb_tunnel_dev)) {
847                 kfree(itn->tunnels);
848                 return PTR_ERR(itn->fb_tunnel_dev);
849         }
850
851         return 0;
852 }
853 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
854
855 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
856 {
857         int h;
858
859         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
860                 struct ip_tunnel *t;
861                 struct hlist_node *n;
862                 struct hlist_head *thead = &itn->tunnels[h];
863
864                 hlist_for_each_entry_safe(t, n, thead, hash_node)
865                         unregister_netdevice_queue(t->dev, head);
866         }
867         if (itn->fb_tunnel_dev)
868                 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
869 }
870
871 void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
872 {
873         LIST_HEAD(list);
874
875         rtnl_lock();
876         ip_tunnel_destroy(itn, &list);
877         unregister_netdevice_many(&list);
878         rtnl_unlock();
879         kfree(itn->tunnels);
880 }
881 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
882
883 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
884                       struct ip_tunnel_parm *p)
885 {
886         struct ip_tunnel *nt;
887         struct net *net = dev_net(dev);
888         struct ip_tunnel_net *itn;
889         int mtu;
890         int err;
891
892         nt = netdev_priv(dev);
893         itn = net_generic(net, nt->ip_tnl_net_id);
894
895         if (ip_tunnel_find(itn, p, dev->type))
896                 return -EEXIST;
897
898         nt->net = net;
899         nt->parms = *p;
900         err = register_netdevice(dev);
901         if (err)
902                 goto out;
903
904         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
905                 eth_hw_addr_random(dev);
906
907         mtu = ip_tunnel_bind_dev(dev);
908         if (!tb[IFLA_MTU])
909                 dev->mtu = mtu;
910
911         ip_tunnel_add(itn, nt);
912
913 out:
914         return err;
915 }
916 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
917
918 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
919                          struct ip_tunnel_parm *p)
920 {
921         struct ip_tunnel *t, *nt;
922         struct net *net = dev_net(dev);
923         struct ip_tunnel *tunnel = netdev_priv(dev);
924         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
925
926         if (dev == itn->fb_tunnel_dev)
927                 return -EINVAL;
928
929         nt = netdev_priv(dev);
930
931         t = ip_tunnel_find(itn, p, dev->type);
932
933         if (t) {
934                 if (t->dev != dev)
935                         return -EEXIST;
936         } else {
937                 t = nt;
938
939                 if (dev->type != ARPHRD_ETHER) {
940                         unsigned int nflags = 0;
941
942                         if (ipv4_is_multicast(p->iph.daddr))
943                                 nflags = IFF_BROADCAST;
944                         else if (p->iph.daddr)
945                                 nflags = IFF_POINTOPOINT;
946
947                         if ((dev->flags ^ nflags) &
948                             (IFF_POINTOPOINT | IFF_BROADCAST))
949                                 return -EINVAL;
950                 }
951         }
952
953         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
954         return 0;
955 }
956 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
957
958 int ip_tunnel_init(struct net_device *dev)
959 {
960         struct ip_tunnel *tunnel = netdev_priv(dev);
961         struct iphdr *iph = &tunnel->parms.iph;
962         int err;
963
964         dev->destructor = ip_tunnel_dev_free;
965         dev->tstats = alloc_percpu(struct pcpu_tstats);
966         if (!dev->tstats)
967                 return -ENOMEM;
968
969         err = gro_cells_init(&tunnel->gro_cells, dev);
970         if (err) {
971                 free_percpu(dev->tstats);
972                 return err;
973         }
974
975         tunnel->dev = dev;
976         strcpy(tunnel->parms.name, dev->name);
977         iph->version            = 4;
978         iph->ihl                = 5;
979
980         return 0;
981 }
982 EXPORT_SYMBOL_GPL(ip_tunnel_init);
983
984 void ip_tunnel_uninit(struct net_device *dev)
985 {
986         struct net *net = dev_net(dev);
987         struct ip_tunnel *tunnel = netdev_priv(dev);
988         struct ip_tunnel_net *itn;
989
990         itn = net_generic(net, tunnel->ip_tnl_net_id);
991         /* fb_tunnel_dev will be unregisted in net-exit call. */
992         if (itn->fb_tunnel_dev != dev)
993                 ip_tunnel_del(netdev_priv(dev));
994 }
995 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
996
997 /* Do least required initialization, rest of init is done in tunnel_init call */
998 void ip_tunnel_setup(struct net_device *dev, int net_id)
999 {
1000         struct ip_tunnel *tunnel = netdev_priv(dev);
1001         tunnel->ip_tnl_net_id = net_id;
1002 }
1003 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1004
1005 MODULE_LICENSE("GPL");