]> Pileus Git - ~andy/linux/blob - net/ipv4/ipip.c
net: unify for_each_ip_tunnel_rcu()
[~andy/linux] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132         struct ip_tunnel __rcu *tunnels_wc[1];
133         struct ip_tunnel __rcu **tunnels[4];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 static struct rtnl_link_ops ipip_link_ops __read_mostly;
142
143 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144                                                   struct rtnl_link_stats64 *tot)
145 {
146         int i;
147
148         for_each_possible_cpu(i) {
149                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151                 unsigned int start;
152
153                 do {
154                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
155                         rx_packets = tstats->rx_packets;
156                         tx_packets = tstats->tx_packets;
157                         rx_bytes = tstats->rx_bytes;
158                         tx_bytes = tstats->tx_bytes;
159                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161                 tot->rx_packets += rx_packets;
162                 tot->tx_packets += tx_packets;
163                 tot->rx_bytes   += rx_bytes;
164                 tot->tx_bytes   += tx_bytes;
165         }
166
167         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169         tot->tx_dropped = dev->stats.tx_dropped;
170         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171         tot->tx_errors = dev->stats.tx_errors;
172         tot->collisions = dev->stats.collisions;
173
174         return tot;
175 }
176
177 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178                 __be32 remote, __be32 local)
179 {
180         unsigned int h0 = HASH(remote);
181         unsigned int h1 = HASH(local);
182         struct ip_tunnel *t;
183         struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185         for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186                 if (local == t->parms.iph.saddr &&
187                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188                         return t;
189
190         for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192                         return t;
193
194         for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196                         return t;
197
198         t = rcu_dereference(ipn->tunnels_wc[0]);
199         if (t && (t->dev->flags&IFF_UP))
200                 return t;
201         return NULL;
202 }
203
204 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205                 struct ip_tunnel_parm *parms)
206 {
207         __be32 remote = parms->iph.daddr;
208         __be32 local = parms->iph.saddr;
209         unsigned int h = 0;
210         int prio = 0;
211
212         if (remote) {
213                 prio |= 2;
214                 h ^= HASH(remote);
215         }
216         if (local) {
217                 prio |= 1;
218                 h ^= HASH(local);
219         }
220         return &ipn->tunnels[prio][h];
221 }
222
223 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224                 struct ip_tunnel *t)
225 {
226         return __ipip_bucket(ipn, &t->parms);
227 }
228
229 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230 {
231         struct ip_tunnel __rcu **tp;
232         struct ip_tunnel *iter;
233
234         for (tp = ipip_bucket(ipn, t);
235              (iter = rtnl_dereference(*tp)) != NULL;
236              tp = &iter->next) {
237                 if (t == iter) {
238                         rcu_assign_pointer(*tp, t->next);
239                         break;
240                 }
241         }
242 }
243
244 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249         rcu_assign_pointer(*tp, t);
250 }
251
252 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
253                 struct ip_tunnel_parm *parms, int create)
254 {
255         __be32 remote = parms->iph.daddr;
256         __be32 local = parms->iph.saddr;
257         struct ip_tunnel *t, *nt;
258         struct ip_tunnel __rcu **tp;
259         struct net_device *dev;
260         char name[IFNAMSIZ];
261         struct ipip_net *ipn = net_generic(net, ipip_net_id);
262
263         for (tp = __ipip_bucket(ipn, parms);
264                  (t = rtnl_dereference(*tp)) != NULL;
265                  tp = &t->next) {
266                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
267                         return t;
268         }
269         if (!create)
270                 return NULL;
271
272         if (parms->name[0])
273                 strlcpy(name, parms->name, IFNAMSIZ);
274         else
275                 strcpy(name, "tunl%d");
276
277         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
278         if (dev == NULL)
279                 return NULL;
280
281         dev_net_set(dev, net);
282
283         nt = netdev_priv(dev);
284         nt->parms = *parms;
285
286         if (ipip_tunnel_init(dev) < 0)
287                 goto failed_free;
288
289         if (register_netdevice(dev) < 0)
290                 goto failed_free;
291
292         strcpy(nt->parms.name, dev->name);
293         dev->rtnl_link_ops = &ipip_link_ops;
294
295         dev_hold(dev);
296         ipip_tunnel_link(ipn, nt);
297         return nt;
298
299 failed_free:
300         ipip_dev_free(dev);
301         return NULL;
302 }
303
304 /* called with RTNL */
305 static void ipip_tunnel_uninit(struct net_device *dev)
306 {
307         struct net *net = dev_net(dev);
308         struct ipip_net *ipn = net_generic(net, ipip_net_id);
309
310         if (dev == ipn->fb_tunnel_dev)
311                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
312         else
313                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
314         dev_put(dev);
315 }
316
317 static int ipip_err(struct sk_buff *skb, u32 info)
318 {
319
320 /* All the routers (except for Linux) return only
321    8 bytes of packet payload. It means, that precise relaying of
322    ICMP in the real Internet is absolutely infeasible.
323  */
324         const struct iphdr *iph = (const struct iphdr *)skb->data;
325         const int type = icmp_hdr(skb)->type;
326         const int code = icmp_hdr(skb)->code;
327         struct ip_tunnel *t;
328         int err;
329
330         switch (type) {
331         default:
332         case ICMP_PARAMETERPROB:
333                 return 0;
334
335         case ICMP_DEST_UNREACH:
336                 switch (code) {
337                 case ICMP_SR_FAILED:
338                 case ICMP_PORT_UNREACH:
339                         /* Impossible event. */
340                         return 0;
341                 default:
342                         /* All others are translated to HOST_UNREACH.
343                            rfc2003 contains "deep thoughts" about NET_UNREACH,
344                            I believe they are just ether pollution. --ANK
345                          */
346                         break;
347                 }
348                 break;
349         case ICMP_TIME_EXCEEDED:
350                 if (code != ICMP_EXC_TTL)
351                         return 0;
352                 break;
353         case ICMP_REDIRECT:
354                 break;
355         }
356
357         err = -ENOENT;
358         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
359         if (t == NULL)
360                 goto out;
361
362         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
363                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
364                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
365                 err = 0;
366                 goto out;
367         }
368
369         if (type == ICMP_REDIRECT) {
370                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
371                               IPPROTO_IPIP, 0);
372                 err = 0;
373                 goto out;
374         }
375
376         if (t->parms.iph.daddr == 0)
377                 goto out;
378
379         err = 0;
380         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
381                 goto out;
382
383         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
384                 t->err_count++;
385         else
386                 t->err_count = 1;
387         t->err_time = jiffies;
388 out:
389
390         return err;
391 }
392
393 static int ipip_rcv(struct sk_buff *skb)
394 {
395         struct ip_tunnel *tunnel;
396         const struct iphdr *iph = ip_hdr(skb);
397         int err;
398
399         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
400         if (tunnel != NULL) {
401                 struct pcpu_tstats *tstats;
402
403                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
404                         goto drop;
405
406                 secpath_reset(skb);
407
408                 skb->mac_header = skb->network_header;
409                 skb_reset_network_header(skb);
410                 skb->protocol = htons(ETH_P_IP);
411                 skb->pkt_type = PACKET_HOST;
412
413                 __skb_tunnel_rx(skb, tunnel->dev);
414
415                 err = IP_ECN_decapsulate(iph, skb);
416                 if (unlikely(err)) {
417                         if (log_ecn_error)
418                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
419                                                      &iph->saddr, iph->tos);
420                         if (err > 1) {
421                                 ++tunnel->dev->stats.rx_frame_errors;
422                                 ++tunnel->dev->stats.rx_errors;
423                                 goto drop;
424                         }
425                 }
426
427                 tstats = this_cpu_ptr(tunnel->dev->tstats);
428                 u64_stats_update_begin(&tstats->syncp);
429                 tstats->rx_packets++;
430                 tstats->rx_bytes += skb->len;
431                 u64_stats_update_end(&tstats->syncp);
432
433                 netif_rx(skb);
434                 return 0;
435         }
436
437         return -1;
438
439 drop:
440         kfree_skb(skb);
441         return 0;
442 }
443
444 /*
445  *      This function assumes it is being called from dev_queue_xmit()
446  *      and that skb is filled properly by that function.
447  */
448
449 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
450 {
451         struct ip_tunnel *tunnel = netdev_priv(dev);
452         const struct iphdr  *tiph = &tunnel->parms.iph;
453         u8     tos = tunnel->parms.iph.tos;
454         __be16 df = tiph->frag_off;
455         struct rtable *rt;                      /* Route to the other host */
456         struct net_device *tdev;                /* Device to other host */
457         const struct iphdr  *old_iph = ip_hdr(skb);
458         struct iphdr  *iph;                     /* Our new IP header */
459         unsigned int max_headroom;              /* The extra header space needed */
460         __be32 dst = tiph->daddr;
461         struct flowi4 fl4;
462         int    mtu;
463
464         if (skb->protocol != htons(ETH_P_IP))
465                 goto tx_error;
466
467         if (skb->ip_summed == CHECKSUM_PARTIAL &&
468             skb_checksum_help(skb))
469                 goto tx_error;
470
471         if (tos & 1)
472                 tos = old_iph->tos;
473
474         if (!dst) {
475                 /* NBMA tunnel */
476                 if ((rt = skb_rtable(skb)) == NULL) {
477                         dev->stats.tx_fifo_errors++;
478                         goto tx_error;
479                 }
480                 dst = rt_nexthop(rt, old_iph->daddr);
481         }
482
483         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
484                                    dst, tiph->saddr,
485                                    0, 0,
486                                    IPPROTO_IPIP, RT_TOS(tos),
487                                    tunnel->parms.link);
488         if (IS_ERR(rt)) {
489                 dev->stats.tx_carrier_errors++;
490                 goto tx_error_icmp;
491         }
492         tdev = rt->dst.dev;
493
494         if (tdev == dev) {
495                 ip_rt_put(rt);
496                 dev->stats.collisions++;
497                 goto tx_error;
498         }
499
500         df |= old_iph->frag_off & htons(IP_DF);
501
502         if (df) {
503                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
504
505                 if (mtu < 68) {
506                         dev->stats.collisions++;
507                         ip_rt_put(rt);
508                         goto tx_error;
509                 }
510
511                 if (skb_dst(skb))
512                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
513
514                 if ((old_iph->frag_off & htons(IP_DF)) &&
515                     mtu < ntohs(old_iph->tot_len)) {
516                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
517                                   htonl(mtu));
518                         ip_rt_put(rt);
519                         goto tx_error;
520                 }
521         }
522
523         if (tunnel->err_count > 0) {
524                 if (time_before(jiffies,
525                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
526                         tunnel->err_count--;
527                         dst_link_failure(skb);
528                 } else
529                         tunnel->err_count = 0;
530         }
531
532         /*
533          * Okay, now see if we can stuff it in the buffer as-is.
534          */
535         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
536
537         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
538             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
539                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
540                 if (!new_skb) {
541                         ip_rt_put(rt);
542                         dev->stats.tx_dropped++;
543                         dev_kfree_skb(skb);
544                         return NETDEV_TX_OK;
545                 }
546                 if (skb->sk)
547                         skb_set_owner_w(new_skb, skb->sk);
548                 dev_kfree_skb(skb);
549                 skb = new_skb;
550                 old_iph = ip_hdr(skb);
551         }
552
553         skb->transport_header = skb->network_header;
554         skb_push(skb, sizeof(struct iphdr));
555         skb_reset_network_header(skb);
556         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
557         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
558                               IPSKB_REROUTED);
559         skb_dst_drop(skb);
560         skb_dst_set(skb, &rt->dst);
561
562         /*
563          *      Push down and install the IPIP header.
564          */
565
566         iph                     =       ip_hdr(skb);
567         iph->version            =       4;
568         iph->ihl                =       sizeof(struct iphdr)>>2;
569         iph->frag_off           =       df;
570         iph->protocol           =       IPPROTO_IPIP;
571         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
572         iph->daddr              =       fl4.daddr;
573         iph->saddr              =       fl4.saddr;
574
575         if ((iph->ttl = tiph->ttl) == 0)
576                 iph->ttl        =       old_iph->ttl;
577
578         iptunnel_xmit(skb, dev);
579         return NETDEV_TX_OK;
580
581 tx_error_icmp:
582         dst_link_failure(skb);
583 tx_error:
584         dev->stats.tx_errors++;
585         dev_kfree_skb(skb);
586         return NETDEV_TX_OK;
587 }
588
589 static void ipip_tunnel_bind_dev(struct net_device *dev)
590 {
591         struct net_device *tdev = NULL;
592         struct ip_tunnel *tunnel;
593         const struct iphdr *iph;
594
595         tunnel = netdev_priv(dev);
596         iph = &tunnel->parms.iph;
597
598         if (iph->daddr) {
599                 struct rtable *rt;
600                 struct flowi4 fl4;
601
602                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
603                                            iph->daddr, iph->saddr,
604                                            0, 0,
605                                            IPPROTO_IPIP,
606                                            RT_TOS(iph->tos),
607                                            tunnel->parms.link);
608                 if (!IS_ERR(rt)) {
609                         tdev = rt->dst.dev;
610                         ip_rt_put(rt);
611                 }
612                 dev->flags |= IFF_POINTOPOINT;
613         }
614
615         if (!tdev && tunnel->parms.link)
616                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
617
618         if (tdev) {
619                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
620                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
621         }
622         dev->iflink = tunnel->parms.link;
623 }
624
625 static int
626 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
627 {
628         int err = 0;
629         struct ip_tunnel_parm p;
630         struct ip_tunnel *t;
631         struct net *net = dev_net(dev);
632         struct ipip_net *ipn = net_generic(net, ipip_net_id);
633
634         switch (cmd) {
635         case SIOCGETTUNNEL:
636                 t = NULL;
637                 if (dev == ipn->fb_tunnel_dev) {
638                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
639                                 err = -EFAULT;
640                                 break;
641                         }
642                         t = ipip_tunnel_locate(net, &p, 0);
643                 }
644                 if (t == NULL)
645                         t = netdev_priv(dev);
646                 memcpy(&p, &t->parms, sizeof(p));
647                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
648                         err = -EFAULT;
649                 break;
650
651         case SIOCADDTUNNEL:
652         case SIOCCHGTUNNEL:
653                 err = -EPERM;
654                 if (!capable(CAP_NET_ADMIN))
655                         goto done;
656
657                 err = -EFAULT;
658                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
659                         goto done;
660
661                 err = -EINVAL;
662                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
663                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
664                         goto done;
665                 if (p.iph.ttl)
666                         p.iph.frag_off |= htons(IP_DF);
667
668                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
669
670                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
671                         if (t != NULL) {
672                                 if (t->dev != dev) {
673                                         err = -EEXIST;
674                                         break;
675                                 }
676                         } else {
677                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
678                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
679                                         err = -EINVAL;
680                                         break;
681                                 }
682                                 t = netdev_priv(dev);
683                                 ipip_tunnel_unlink(ipn, t);
684                                 synchronize_net();
685                                 t->parms.iph.saddr = p.iph.saddr;
686                                 t->parms.iph.daddr = p.iph.daddr;
687                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
688                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
689                                 ipip_tunnel_link(ipn, t);
690                                 netdev_state_change(dev);
691                         }
692                 }
693
694                 if (t) {
695                         err = 0;
696                         if (cmd == SIOCCHGTUNNEL) {
697                                 t->parms.iph.ttl = p.iph.ttl;
698                                 t->parms.iph.tos = p.iph.tos;
699                                 t->parms.iph.frag_off = p.iph.frag_off;
700                                 if (t->parms.link != p.link) {
701                                         t->parms.link = p.link;
702                                         ipip_tunnel_bind_dev(dev);
703                                         netdev_state_change(dev);
704                                 }
705                         }
706                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
707                                 err = -EFAULT;
708                 } else
709                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
710                 break;
711
712         case SIOCDELTUNNEL:
713                 err = -EPERM;
714                 if (!capable(CAP_NET_ADMIN))
715                         goto done;
716
717                 if (dev == ipn->fb_tunnel_dev) {
718                         err = -EFAULT;
719                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
720                                 goto done;
721                         err = -ENOENT;
722                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
723                                 goto done;
724                         err = -EPERM;
725                         if (t->dev == ipn->fb_tunnel_dev)
726                                 goto done;
727                         dev = t->dev;
728                 }
729                 unregister_netdevice(dev);
730                 err = 0;
731                 break;
732
733         default:
734                 err = -EINVAL;
735         }
736
737 done:
738         return err;
739 }
740
741 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
742 {
743         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
744                 return -EINVAL;
745         dev->mtu = new_mtu;
746         return 0;
747 }
748
749 static const struct net_device_ops ipip_netdev_ops = {
750         .ndo_uninit     = ipip_tunnel_uninit,
751         .ndo_start_xmit = ipip_tunnel_xmit,
752         .ndo_do_ioctl   = ipip_tunnel_ioctl,
753         .ndo_change_mtu = ipip_tunnel_change_mtu,
754         .ndo_get_stats64 = ipip_get_stats64,
755 };
756
757 static void ipip_dev_free(struct net_device *dev)
758 {
759         free_percpu(dev->tstats);
760         free_netdev(dev);
761 }
762
763 #define IPIP_FEATURES (NETIF_F_SG |             \
764                        NETIF_F_FRAGLIST |       \
765                        NETIF_F_HIGHDMA |        \
766                        NETIF_F_HW_CSUM)
767
768 static void ipip_tunnel_setup(struct net_device *dev)
769 {
770         dev->netdev_ops         = &ipip_netdev_ops;
771         dev->destructor         = ipip_dev_free;
772
773         dev->type               = ARPHRD_TUNNEL;
774         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
775         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
776         dev->flags              = IFF_NOARP;
777         dev->iflink             = 0;
778         dev->addr_len           = 4;
779         dev->features           |= NETIF_F_NETNS_LOCAL;
780         dev->features           |= NETIF_F_LLTX;
781         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
782
783         dev->features           |= IPIP_FEATURES;
784         dev->hw_features        |= IPIP_FEATURES;
785 }
786
787 static int ipip_tunnel_init(struct net_device *dev)
788 {
789         struct ip_tunnel *tunnel = netdev_priv(dev);
790
791         tunnel->dev = dev;
792
793         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
794         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
795
796         ipip_tunnel_bind_dev(dev);
797
798         dev->tstats = alloc_percpu(struct pcpu_tstats);
799         if (!dev->tstats)
800                 return -ENOMEM;
801
802         return 0;
803 }
804
805 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
806 {
807         struct ip_tunnel *tunnel = netdev_priv(dev);
808         struct iphdr *iph = &tunnel->parms.iph;
809         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
810
811         tunnel->dev = dev;
812         strcpy(tunnel->parms.name, dev->name);
813
814         iph->version            = 4;
815         iph->protocol           = IPPROTO_IPIP;
816         iph->ihl                = 5;
817
818         dev->tstats = alloc_percpu(struct pcpu_tstats);
819         if (!dev->tstats)
820                 return -ENOMEM;
821
822         dev_hold(dev);
823         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
824         return 0;
825 }
826
827 static size_t ipip_get_size(const struct net_device *dev)
828 {
829         return
830                 /* IFLA_IPTUN_LINK */
831                 nla_total_size(4) +
832                 /* IFLA_IPTUN_LOCAL */
833                 nla_total_size(4) +
834                 /* IFLA_IPTUN_REMOTE */
835                 nla_total_size(4) +
836                 /* IFLA_IPTUN_TTL */
837                 nla_total_size(1) +
838                 /* IFLA_IPTUN_TOS */
839                 nla_total_size(1) +
840                 0;
841 }
842
843 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
844 {
845         struct ip_tunnel *tunnel = netdev_priv(dev);
846         struct ip_tunnel_parm *parm = &tunnel->parms;
847
848         if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
849             nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
850             nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
851             nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
852             nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos))
853                 goto nla_put_failure;
854         return 0;
855
856 nla_put_failure:
857         return -EMSGSIZE;
858 }
859
860 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
861         .kind           = "ipip",
862         .maxtype        = IFLA_IPTUN_MAX,
863         .priv_size      = sizeof(struct ip_tunnel),
864         .get_size       = ipip_get_size,
865         .fill_info      = ipip_fill_info,
866 };
867
868 static struct xfrm_tunnel ipip_handler __read_mostly = {
869         .handler        =       ipip_rcv,
870         .err_handler    =       ipip_err,
871         .priority       =       1,
872 };
873
874 static const char banner[] __initconst =
875         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
876
877 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
878 {
879         int prio;
880
881         for (prio = 1; prio < 4; prio++) {
882                 int h;
883                 for (h = 0; h < HASH_SIZE; h++) {
884                         struct ip_tunnel *t;
885
886                         t = rtnl_dereference(ipn->tunnels[prio][h]);
887                         while (t != NULL) {
888                                 unregister_netdevice_queue(t->dev, head);
889                                 t = rtnl_dereference(t->next);
890                         }
891                 }
892         }
893 }
894
895 static int __net_init ipip_init_net(struct net *net)
896 {
897         struct ipip_net *ipn = net_generic(net, ipip_net_id);
898         struct ip_tunnel *t;
899         int err;
900
901         ipn->tunnels[0] = ipn->tunnels_wc;
902         ipn->tunnels[1] = ipn->tunnels_l;
903         ipn->tunnels[2] = ipn->tunnels_r;
904         ipn->tunnels[3] = ipn->tunnels_r_l;
905
906         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
907                                            "tunl0",
908                                            ipip_tunnel_setup);
909         if (!ipn->fb_tunnel_dev) {
910                 err = -ENOMEM;
911                 goto err_alloc_dev;
912         }
913         dev_net_set(ipn->fb_tunnel_dev, net);
914
915         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
916         if (err)
917                 goto err_reg_dev;
918
919         if ((err = register_netdev(ipn->fb_tunnel_dev)))
920                 goto err_reg_dev;
921
922         t = netdev_priv(ipn->fb_tunnel_dev);
923
924         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
925         return 0;
926
927 err_reg_dev:
928         ipip_dev_free(ipn->fb_tunnel_dev);
929 err_alloc_dev:
930         /* nothing */
931         return err;
932 }
933
934 static void __net_exit ipip_exit_net(struct net *net)
935 {
936         struct ipip_net *ipn = net_generic(net, ipip_net_id);
937         LIST_HEAD(list);
938
939         rtnl_lock();
940         ipip_destroy_tunnels(ipn, &list);
941         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
942         unregister_netdevice_many(&list);
943         rtnl_unlock();
944 }
945
946 static struct pernet_operations ipip_net_ops = {
947         .init = ipip_init_net,
948         .exit = ipip_exit_net,
949         .id   = &ipip_net_id,
950         .size = sizeof(struct ipip_net),
951 };
952
953 static int __init ipip_init(void)
954 {
955         int err;
956
957         printk(banner);
958
959         err = register_pernet_device(&ipip_net_ops);
960         if (err < 0)
961                 return err;
962         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
963         if (err < 0) {
964                 pr_info("%s: can't register tunnel\n", __func__);
965                 goto xfrm_tunnel_failed;
966         }
967         err = rtnl_link_register(&ipip_link_ops);
968         if (err < 0)
969                 goto rtnl_link_failed;
970
971 out:
972         return err;
973
974 rtnl_link_failed:
975         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
976 xfrm_tunnel_failed:
977         unregister_pernet_device(&ipip_net_ops);
978         goto out;
979 }
980
981 static void __exit ipip_fini(void)
982 {
983         rtnl_link_unregister(&ipip_link_ops);
984         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
985                 pr_info("%s: can't deregister tunnel\n", __func__);
986
987         unregister_pernet_device(&ipip_net_ops);
988 }
989
990 module_init(ipip_init);
991 module_exit(ipip_fini);
992 MODULE_LICENSE("GPL");
993 MODULE_ALIAS_NETDEV("tunl0");