]> Pileus Git - ~andy/linux/blob - net/ipv4/ipip.c
Merge branch 'kbuild' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
[~andy/linux] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132         struct ip_tunnel __rcu *tunnels_wc[1];
133         struct ip_tunnel __rcu **tunnels[4];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141
142 /*
143  * Locking : hash tables are protected by RCU and RTNL
144  */
145
146 #define for_each_ip_tunnel_rcu(start) \
147         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
148
149 /* often modified stats are per cpu, other are shared (netdev->stats) */
150 struct pcpu_tstats {
151         u64     rx_packets;
152         u64     rx_bytes;
153         u64     tx_packets;
154         u64     tx_bytes;
155         struct u64_stats_sync   syncp;
156 };
157
158 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
159                                                   struct rtnl_link_stats64 *tot)
160 {
161         int i;
162
163         for_each_possible_cpu(i) {
164                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
165                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
166                 unsigned int start;
167
168                 do {
169                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
170                         rx_packets = tstats->rx_packets;
171                         tx_packets = tstats->tx_packets;
172                         rx_bytes = tstats->rx_bytes;
173                         tx_bytes = tstats->tx_bytes;
174                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
175
176                 tot->rx_packets += rx_packets;
177                 tot->tx_packets += tx_packets;
178                 tot->rx_bytes   += rx_bytes;
179                 tot->tx_bytes   += tx_bytes;
180         }
181
182         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
183         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
184         tot->tx_dropped = dev->stats.tx_dropped;
185         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
186         tot->tx_errors = dev->stats.tx_errors;
187         tot->collisions = dev->stats.collisions;
188
189         return tot;
190 }
191
192 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
193                 __be32 remote, __be32 local)
194 {
195         unsigned int h0 = HASH(remote);
196         unsigned int h1 = HASH(local);
197         struct ip_tunnel *t;
198         struct ipip_net *ipn = net_generic(net, ipip_net_id);
199
200         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
201                 if (local == t->parms.iph.saddr &&
202                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203                         return t;
204
205         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
206                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
207                         return t;
208
209         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
210                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
211                         return t;
212
213         t = rcu_dereference(ipn->tunnels_wc[0]);
214         if (t && (t->dev->flags&IFF_UP))
215                 return t;
216         return NULL;
217 }
218
219 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
220                 struct ip_tunnel_parm *parms)
221 {
222         __be32 remote = parms->iph.daddr;
223         __be32 local = parms->iph.saddr;
224         unsigned int h = 0;
225         int prio = 0;
226
227         if (remote) {
228                 prio |= 2;
229                 h ^= HASH(remote);
230         }
231         if (local) {
232                 prio |= 1;
233                 h ^= HASH(local);
234         }
235         return &ipn->tunnels[prio][h];
236 }
237
238 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
239                 struct ip_tunnel *t)
240 {
241         return __ipip_bucket(ipn, &t->parms);
242 }
243
244 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246         struct ip_tunnel __rcu **tp;
247         struct ip_tunnel *iter;
248
249         for (tp = ipip_bucket(ipn, t);
250              (iter = rtnl_dereference(*tp)) != NULL;
251              tp = &iter->next) {
252                 if (t == iter) {
253                         rcu_assign_pointer(*tp, t->next);
254                         break;
255                 }
256         }
257 }
258
259 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
260 {
261         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
262
263         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
264         rcu_assign_pointer(*tp, t);
265 }
266
267 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
268                 struct ip_tunnel_parm *parms, int create)
269 {
270         __be32 remote = parms->iph.daddr;
271         __be32 local = parms->iph.saddr;
272         struct ip_tunnel *t, *nt;
273         struct ip_tunnel __rcu **tp;
274         struct net_device *dev;
275         char name[IFNAMSIZ];
276         struct ipip_net *ipn = net_generic(net, ipip_net_id);
277
278         for (tp = __ipip_bucket(ipn, parms);
279                  (t = rtnl_dereference(*tp)) != NULL;
280                  tp = &t->next) {
281                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
282                         return t;
283         }
284         if (!create)
285                 return NULL;
286
287         if (parms->name[0])
288                 strlcpy(name, parms->name, IFNAMSIZ);
289         else
290                 strcpy(name, "tunl%d");
291
292         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
293         if (dev == NULL)
294                 return NULL;
295
296         dev_net_set(dev, net);
297
298         nt = netdev_priv(dev);
299         nt->parms = *parms;
300
301         if (ipip_tunnel_init(dev) < 0)
302                 goto failed_free;
303
304         if (register_netdevice(dev) < 0)
305                 goto failed_free;
306
307         strcpy(nt->parms.name, dev->name);
308
309         dev_hold(dev);
310         ipip_tunnel_link(ipn, nt);
311         return nt;
312
313 failed_free:
314         ipip_dev_free(dev);
315         return NULL;
316 }
317
318 /* called with RTNL */
319 static void ipip_tunnel_uninit(struct net_device *dev)
320 {
321         struct net *net = dev_net(dev);
322         struct ipip_net *ipn = net_generic(net, ipip_net_id);
323
324         if (dev == ipn->fb_tunnel_dev)
325                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
326         else
327                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
328         dev_put(dev);
329 }
330
331 static int ipip_err(struct sk_buff *skb, u32 info)
332 {
333
334 /* All the routers (except for Linux) return only
335    8 bytes of packet payload. It means, that precise relaying of
336    ICMP in the real Internet is absolutely infeasible.
337  */
338         const struct iphdr *iph = (const struct iphdr *)skb->data;
339         const int type = icmp_hdr(skb)->type;
340         const int code = icmp_hdr(skb)->code;
341         struct ip_tunnel *t;
342         int err;
343
344         switch (type) {
345         default:
346         case ICMP_PARAMETERPROB:
347                 return 0;
348
349         case ICMP_DEST_UNREACH:
350                 switch (code) {
351                 case ICMP_SR_FAILED:
352                 case ICMP_PORT_UNREACH:
353                         /* Impossible event. */
354                         return 0;
355                 default:
356                         /* All others are translated to HOST_UNREACH.
357                            rfc2003 contains "deep thoughts" about NET_UNREACH,
358                            I believe they are just ether pollution. --ANK
359                          */
360                         break;
361                 }
362                 break;
363         case ICMP_TIME_EXCEEDED:
364                 if (code != ICMP_EXC_TTL)
365                         return 0;
366                 break;
367         case ICMP_REDIRECT:
368                 break;
369         }
370
371         err = -ENOENT;
372         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
373         if (t == NULL)
374                 goto out;
375
376         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
377                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
378                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
379                 err = 0;
380                 goto out;
381         }
382
383         if (type == ICMP_REDIRECT) {
384                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
385                               IPPROTO_IPIP, 0);
386                 err = 0;
387                 goto out;
388         }
389
390         if (t->parms.iph.daddr == 0)
391                 goto out;
392
393         err = 0;
394         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
395                 goto out;
396
397         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
398                 t->err_count++;
399         else
400                 t->err_count = 1;
401         t->err_time = jiffies;
402 out:
403
404         return err;
405 }
406
407 static int ipip_rcv(struct sk_buff *skb)
408 {
409         struct ip_tunnel *tunnel;
410         const struct iphdr *iph = ip_hdr(skb);
411         int err;
412
413         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
414         if (tunnel != NULL) {
415                 struct pcpu_tstats *tstats;
416
417                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
418                         goto drop;
419
420                 secpath_reset(skb);
421
422                 skb->mac_header = skb->network_header;
423                 skb_reset_network_header(skb);
424                 skb->protocol = htons(ETH_P_IP);
425                 skb->pkt_type = PACKET_HOST;
426
427                 __skb_tunnel_rx(skb, tunnel->dev);
428
429                 err = IP_ECN_decapsulate(iph, skb);
430                 if (unlikely(err)) {
431                         if (log_ecn_error)
432                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
433                                                      &iph->saddr, iph->tos);
434                         if (err > 1) {
435                                 ++tunnel->dev->stats.rx_frame_errors;
436                                 ++tunnel->dev->stats.rx_errors;
437                                 goto drop;
438                         }
439                 }
440
441                 tstats = this_cpu_ptr(tunnel->dev->tstats);
442                 u64_stats_update_begin(&tstats->syncp);
443                 tstats->rx_packets++;
444                 tstats->rx_bytes += skb->len;
445                 u64_stats_update_end(&tstats->syncp);
446
447                 netif_rx(skb);
448                 return 0;
449         }
450
451         return -1;
452
453 drop:
454         kfree_skb(skb);
455         return 0;
456 }
457
458 /*
459  *      This function assumes it is being called from dev_queue_xmit()
460  *      and that skb is filled properly by that function.
461  */
462
463 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
464 {
465         struct ip_tunnel *tunnel = netdev_priv(dev);
466         struct pcpu_tstats *tstats;
467         const struct iphdr  *tiph = &tunnel->parms.iph;
468         u8     tos = tunnel->parms.iph.tos;
469         __be16 df = tiph->frag_off;
470         struct rtable *rt;                      /* Route to the other host */
471         struct net_device *tdev;                /* Device to other host */
472         const struct iphdr  *old_iph = ip_hdr(skb);
473         struct iphdr  *iph;                     /* Our new IP header */
474         unsigned int max_headroom;              /* The extra header space needed */
475         __be32 dst = tiph->daddr;
476         struct flowi4 fl4;
477         int    mtu;
478
479         if (skb->protocol != htons(ETH_P_IP))
480                 goto tx_error;
481
482         if (tos & 1)
483                 tos = old_iph->tos;
484
485         if (!dst) {
486                 /* NBMA tunnel */
487                 if ((rt = skb_rtable(skb)) == NULL) {
488                         dev->stats.tx_fifo_errors++;
489                         goto tx_error;
490                 }
491                 dst = rt_nexthop(rt, old_iph->daddr);
492         }
493
494         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
495                                    dst, tiph->saddr,
496                                    0, 0,
497                                    IPPROTO_IPIP, RT_TOS(tos),
498                                    tunnel->parms.link);
499         if (IS_ERR(rt)) {
500                 dev->stats.tx_carrier_errors++;
501                 goto tx_error_icmp;
502         }
503         tdev = rt->dst.dev;
504
505         if (tdev == dev) {
506                 ip_rt_put(rt);
507                 dev->stats.collisions++;
508                 goto tx_error;
509         }
510
511         df |= old_iph->frag_off & htons(IP_DF);
512
513         if (df) {
514                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
515
516                 if (mtu < 68) {
517                         dev->stats.collisions++;
518                         ip_rt_put(rt);
519                         goto tx_error;
520                 }
521
522                 if (skb_dst(skb))
523                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
524
525                 if ((old_iph->frag_off & htons(IP_DF)) &&
526                     mtu < ntohs(old_iph->tot_len)) {
527                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
528                                   htonl(mtu));
529                         ip_rt_put(rt);
530                         goto tx_error;
531                 }
532         }
533
534         if (tunnel->err_count > 0) {
535                 if (time_before(jiffies,
536                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
537                         tunnel->err_count--;
538                         dst_link_failure(skb);
539                 } else
540                         tunnel->err_count = 0;
541         }
542
543         /*
544          * Okay, now see if we can stuff it in the buffer as-is.
545          */
546         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
547
548         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
549             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
550                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
551                 if (!new_skb) {
552                         ip_rt_put(rt);
553                         dev->stats.tx_dropped++;
554                         dev_kfree_skb(skb);
555                         return NETDEV_TX_OK;
556                 }
557                 if (skb->sk)
558                         skb_set_owner_w(new_skb, skb->sk);
559                 dev_kfree_skb(skb);
560                 skb = new_skb;
561                 old_iph = ip_hdr(skb);
562         }
563
564         skb->transport_header = skb->network_header;
565         skb_push(skb, sizeof(struct iphdr));
566         skb_reset_network_header(skb);
567         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
568         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
569                               IPSKB_REROUTED);
570         skb_dst_drop(skb);
571         skb_dst_set(skb, &rt->dst);
572
573         /*
574          *      Push down and install the IPIP header.
575          */
576
577         iph                     =       ip_hdr(skb);
578         iph->version            =       4;
579         iph->ihl                =       sizeof(struct iphdr)>>2;
580         iph->frag_off           =       df;
581         iph->protocol           =       IPPROTO_IPIP;
582         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
583         iph->daddr              =       fl4.daddr;
584         iph->saddr              =       fl4.saddr;
585
586         if ((iph->ttl = tiph->ttl) == 0)
587                 iph->ttl        =       old_iph->ttl;
588
589         nf_reset(skb);
590         tstats = this_cpu_ptr(dev->tstats);
591         __IPTUNNEL_XMIT(tstats, &dev->stats);
592         return NETDEV_TX_OK;
593
594 tx_error_icmp:
595         dst_link_failure(skb);
596 tx_error:
597         dev->stats.tx_errors++;
598         dev_kfree_skb(skb);
599         return NETDEV_TX_OK;
600 }
601
602 static void ipip_tunnel_bind_dev(struct net_device *dev)
603 {
604         struct net_device *tdev = NULL;
605         struct ip_tunnel *tunnel;
606         const struct iphdr *iph;
607
608         tunnel = netdev_priv(dev);
609         iph = &tunnel->parms.iph;
610
611         if (iph->daddr) {
612                 struct rtable *rt;
613                 struct flowi4 fl4;
614
615                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
616                                            iph->daddr, iph->saddr,
617                                            0, 0,
618                                            IPPROTO_IPIP,
619                                            RT_TOS(iph->tos),
620                                            tunnel->parms.link);
621                 if (!IS_ERR(rt)) {
622                         tdev = rt->dst.dev;
623                         ip_rt_put(rt);
624                 }
625                 dev->flags |= IFF_POINTOPOINT;
626         }
627
628         if (!tdev && tunnel->parms.link)
629                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
630
631         if (tdev) {
632                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
633                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
634         }
635         dev->iflink = tunnel->parms.link;
636 }
637
638 static int
639 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
640 {
641         int err = 0;
642         struct ip_tunnel_parm p;
643         struct ip_tunnel *t;
644         struct net *net = dev_net(dev);
645         struct ipip_net *ipn = net_generic(net, ipip_net_id);
646
647         switch (cmd) {
648         case SIOCGETTUNNEL:
649                 t = NULL;
650                 if (dev == ipn->fb_tunnel_dev) {
651                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
652                                 err = -EFAULT;
653                                 break;
654                         }
655                         t = ipip_tunnel_locate(net, &p, 0);
656                 }
657                 if (t == NULL)
658                         t = netdev_priv(dev);
659                 memcpy(&p, &t->parms, sizeof(p));
660                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
661                         err = -EFAULT;
662                 break;
663
664         case SIOCADDTUNNEL:
665         case SIOCCHGTUNNEL:
666                 err = -EPERM;
667                 if (!capable(CAP_NET_ADMIN))
668                         goto done;
669
670                 err = -EFAULT;
671                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
672                         goto done;
673
674                 err = -EINVAL;
675                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
676                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
677                         goto done;
678                 if (p.iph.ttl)
679                         p.iph.frag_off |= htons(IP_DF);
680
681                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
682
683                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
684                         if (t != NULL) {
685                                 if (t->dev != dev) {
686                                         err = -EEXIST;
687                                         break;
688                                 }
689                         } else {
690                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
691                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
692                                         err = -EINVAL;
693                                         break;
694                                 }
695                                 t = netdev_priv(dev);
696                                 ipip_tunnel_unlink(ipn, t);
697                                 synchronize_net();
698                                 t->parms.iph.saddr = p.iph.saddr;
699                                 t->parms.iph.daddr = p.iph.daddr;
700                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
701                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
702                                 ipip_tunnel_link(ipn, t);
703                                 netdev_state_change(dev);
704                         }
705                 }
706
707                 if (t) {
708                         err = 0;
709                         if (cmd == SIOCCHGTUNNEL) {
710                                 t->parms.iph.ttl = p.iph.ttl;
711                                 t->parms.iph.tos = p.iph.tos;
712                                 t->parms.iph.frag_off = p.iph.frag_off;
713                                 if (t->parms.link != p.link) {
714                                         t->parms.link = p.link;
715                                         ipip_tunnel_bind_dev(dev);
716                                         netdev_state_change(dev);
717                                 }
718                         }
719                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
720                                 err = -EFAULT;
721                 } else
722                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
723                 break;
724
725         case SIOCDELTUNNEL:
726                 err = -EPERM;
727                 if (!capable(CAP_NET_ADMIN))
728                         goto done;
729
730                 if (dev == ipn->fb_tunnel_dev) {
731                         err = -EFAULT;
732                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
733                                 goto done;
734                         err = -ENOENT;
735                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
736                                 goto done;
737                         err = -EPERM;
738                         if (t->dev == ipn->fb_tunnel_dev)
739                                 goto done;
740                         dev = t->dev;
741                 }
742                 unregister_netdevice(dev);
743                 err = 0;
744                 break;
745
746         default:
747                 err = -EINVAL;
748         }
749
750 done:
751         return err;
752 }
753
754 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
755 {
756         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
757                 return -EINVAL;
758         dev->mtu = new_mtu;
759         return 0;
760 }
761
762 static const struct net_device_ops ipip_netdev_ops = {
763         .ndo_uninit     = ipip_tunnel_uninit,
764         .ndo_start_xmit = ipip_tunnel_xmit,
765         .ndo_do_ioctl   = ipip_tunnel_ioctl,
766         .ndo_change_mtu = ipip_tunnel_change_mtu,
767         .ndo_get_stats64 = ipip_get_stats64,
768 };
769
770 static void ipip_dev_free(struct net_device *dev)
771 {
772         free_percpu(dev->tstats);
773         free_netdev(dev);
774 }
775
776 static void ipip_tunnel_setup(struct net_device *dev)
777 {
778         dev->netdev_ops         = &ipip_netdev_ops;
779         dev->destructor         = ipip_dev_free;
780
781         dev->type               = ARPHRD_TUNNEL;
782         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
783         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
784         dev->flags              = IFF_NOARP;
785         dev->iflink             = 0;
786         dev->addr_len           = 4;
787         dev->features           |= NETIF_F_NETNS_LOCAL;
788         dev->features           |= NETIF_F_LLTX;
789         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
790 }
791
792 static int ipip_tunnel_init(struct net_device *dev)
793 {
794         struct ip_tunnel *tunnel = netdev_priv(dev);
795
796         tunnel->dev = dev;
797
798         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
799         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
800
801         ipip_tunnel_bind_dev(dev);
802
803         dev->tstats = alloc_percpu(struct pcpu_tstats);
804         if (!dev->tstats)
805                 return -ENOMEM;
806
807         return 0;
808 }
809
810 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
811 {
812         struct ip_tunnel *tunnel = netdev_priv(dev);
813         struct iphdr *iph = &tunnel->parms.iph;
814         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
815
816         tunnel->dev = dev;
817         strcpy(tunnel->parms.name, dev->name);
818
819         iph->version            = 4;
820         iph->protocol           = IPPROTO_IPIP;
821         iph->ihl                = 5;
822
823         dev->tstats = alloc_percpu(struct pcpu_tstats);
824         if (!dev->tstats)
825                 return -ENOMEM;
826
827         dev_hold(dev);
828         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
829         return 0;
830 }
831
832 static struct xfrm_tunnel ipip_handler __read_mostly = {
833         .handler        =       ipip_rcv,
834         .err_handler    =       ipip_err,
835         .priority       =       1,
836 };
837
838 static const char banner[] __initconst =
839         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
840
841 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
842 {
843         int prio;
844
845         for (prio = 1; prio < 4; prio++) {
846                 int h;
847                 for (h = 0; h < HASH_SIZE; h++) {
848                         struct ip_tunnel *t;
849
850                         t = rtnl_dereference(ipn->tunnels[prio][h]);
851                         while (t != NULL) {
852                                 unregister_netdevice_queue(t->dev, head);
853                                 t = rtnl_dereference(t->next);
854                         }
855                 }
856         }
857 }
858
859 static int __net_init ipip_init_net(struct net *net)
860 {
861         struct ipip_net *ipn = net_generic(net, ipip_net_id);
862         struct ip_tunnel *t;
863         int err;
864
865         ipn->tunnels[0] = ipn->tunnels_wc;
866         ipn->tunnels[1] = ipn->tunnels_l;
867         ipn->tunnels[2] = ipn->tunnels_r;
868         ipn->tunnels[3] = ipn->tunnels_r_l;
869
870         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
871                                            "tunl0",
872                                            ipip_tunnel_setup);
873         if (!ipn->fb_tunnel_dev) {
874                 err = -ENOMEM;
875                 goto err_alloc_dev;
876         }
877         dev_net_set(ipn->fb_tunnel_dev, net);
878
879         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
880         if (err)
881                 goto err_reg_dev;
882
883         if ((err = register_netdev(ipn->fb_tunnel_dev)))
884                 goto err_reg_dev;
885
886         t = netdev_priv(ipn->fb_tunnel_dev);
887
888         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
889         return 0;
890
891 err_reg_dev:
892         ipip_dev_free(ipn->fb_tunnel_dev);
893 err_alloc_dev:
894         /* nothing */
895         return err;
896 }
897
898 static void __net_exit ipip_exit_net(struct net *net)
899 {
900         struct ipip_net *ipn = net_generic(net, ipip_net_id);
901         LIST_HEAD(list);
902
903         rtnl_lock();
904         ipip_destroy_tunnels(ipn, &list);
905         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
906         unregister_netdevice_many(&list);
907         rtnl_unlock();
908 }
909
910 static struct pernet_operations ipip_net_ops = {
911         .init = ipip_init_net,
912         .exit = ipip_exit_net,
913         .id   = &ipip_net_id,
914         .size = sizeof(struct ipip_net),
915 };
916
917 static int __init ipip_init(void)
918 {
919         int err;
920
921         printk(banner);
922
923         err = register_pernet_device(&ipip_net_ops);
924         if (err < 0)
925                 return err;
926         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
927         if (err < 0) {
928                 unregister_pernet_device(&ipip_net_ops);
929                 pr_info("%s: can't register tunnel\n", __func__);
930         }
931         return err;
932 }
933
934 static void __exit ipip_fini(void)
935 {
936         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
937                 pr_info("%s: can't deregister tunnel\n", __func__);
938
939         unregister_pernet_device(&ipip_net_ops);
940 }
941
942 module_init(ipip_init);
943 module_exit(ipip_fini);
944 MODULE_LICENSE("GPL");
945 MODULE_ALIAS_NETDEV("tunl0");