]> Pileus Git - ~andy/linux/blob - net/core/netpoll.c
netpoll: fix the skb check in pkt_is_ns
[~andy/linux] / net / core / netpoll.c
1 /*
2  * Common framework for low-level network console, dump, and debugger code
3  *
4  * Sep 8 2003  Matt Mackall <mpm@selenic.com>
5  *
6  * based on the netconsole code from:
7  *
8  * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
9  * Copyright (C) 2002  Red Hat, Inc.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/moduleparam.h>
15 #include <linux/kernel.h>
16 #include <linux/netdevice.h>
17 #include <linux/etherdevice.h>
18 #include <linux/string.h>
19 #include <linux/if_arp.h>
20 #include <linux/inetdevice.h>
21 #include <linux/inet.h>
22 #include <linux/interrupt.h>
23 #include <linux/netpoll.h>
24 #include <linux/sched.h>
25 #include <linux/delay.h>
26 #include <linux/rcupdate.h>
27 #include <linux/workqueue.h>
28 #include <linux/slab.h>
29 #include <linux/export.h>
30 #include <linux/if_vlan.h>
31 #include <net/tcp.h>
32 #include <net/udp.h>
33 #include <net/addrconf.h>
34 #include <net/ndisc.h>
35 #include <net/ip6_checksum.h>
36 #include <asm/unaligned.h>
37 #include <trace/events/napi.h>
38
39 /*
40  * We maintain a small pool of fully-sized skbs, to make sure the
41  * message gets out even in extreme OOM situations.
42  */
43
44 #define MAX_UDP_CHUNK 1460
45 #define MAX_SKBS 32
46
47 static struct sk_buff_head skb_pool;
48
49 static atomic_t trapped;
50
51 DEFINE_STATIC_SRCU(netpoll_srcu);
52
53 #define USEC_PER_POLL   50
54 #define NETPOLL_RX_ENABLED  1
55 #define NETPOLL_RX_DROP     2
56
57 #define MAX_SKB_SIZE                                                    \
58         (sizeof(struct ethhdr) +                                        \
59          sizeof(struct iphdr) +                                         \
60          sizeof(struct udphdr) +                                        \
61          MAX_UDP_CHUNK)
62
63 static void zap_completion_queue(void);
64 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
65 static void netpoll_async_cleanup(struct work_struct *work);
66
67 static unsigned int carrier_timeout = 4;
68 module_param(carrier_timeout, uint, 0644);
69
70 #define np_info(np, fmt, ...)                           \
71         pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
72 #define np_err(np, fmt, ...)                            \
73         pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
74 #define np_notice(np, fmt, ...)                         \
75         pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
76
77 static void queue_process(struct work_struct *work)
78 {
79         struct netpoll_info *npinfo =
80                 container_of(work, struct netpoll_info, tx_work.work);
81         struct sk_buff *skb;
82         unsigned long flags;
83
84         while ((skb = skb_dequeue(&npinfo->txq))) {
85                 struct net_device *dev = skb->dev;
86                 const struct net_device_ops *ops = dev->netdev_ops;
87                 struct netdev_queue *txq;
88
89                 if (!netif_device_present(dev) || !netif_running(dev)) {
90                         __kfree_skb(skb);
91                         continue;
92                 }
93
94                 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
95
96                 local_irq_save(flags);
97                 __netif_tx_lock(txq, smp_processor_id());
98                 if (netif_xmit_frozen_or_stopped(txq) ||
99                     ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
100                         skb_queue_head(&npinfo->txq, skb);
101                         __netif_tx_unlock(txq);
102                         local_irq_restore(flags);
103
104                         schedule_delayed_work(&npinfo->tx_work, HZ/10);
105                         return;
106                 }
107                 __netif_tx_unlock(txq);
108                 local_irq_restore(flags);
109         }
110 }
111
112 static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
113                             unsigned short ulen, __be32 saddr, __be32 daddr)
114 {
115         __wsum psum;
116
117         if (uh->check == 0 || skb_csum_unnecessary(skb))
118                 return 0;
119
120         psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
121
122         if (skb->ip_summed == CHECKSUM_COMPLETE &&
123             !csum_fold(csum_add(psum, skb->csum)))
124                 return 0;
125
126         skb->csum = psum;
127
128         return __skb_checksum_complete(skb);
129 }
130
131 /*
132  * Check whether delayed processing was scheduled for our NIC. If so,
133  * we attempt to grab the poll lock and use ->poll() to pump the card.
134  * If this fails, either we've recursed in ->poll() or it's already
135  * running on another CPU.
136  *
137  * Note: we don't mask interrupts with this lock because we're using
138  * trylock here and interrupts are already disabled in the softirq
139  * case. Further, we test the poll_owner to avoid recursion on UP
140  * systems where the lock doesn't exist.
141  *
142  * In cases where there is bi-directional communications, reading only
143  * one message at a time can lead to packets being dropped by the
144  * network adapter, forcing superfluous retries and possibly timeouts.
145  * Thus, we set our budget to greater than 1.
146  */
147 static int poll_one_napi(struct netpoll_info *npinfo,
148                          struct napi_struct *napi, int budget)
149 {
150         int work;
151
152         /* net_rx_action's ->poll() invocations and our's are
153          * synchronized by this test which is only made while
154          * holding the napi->poll_lock.
155          */
156         if (!test_bit(NAPI_STATE_SCHED, &napi->state))
157                 return budget;
158
159         npinfo->rx_flags |= NETPOLL_RX_DROP;
160         atomic_inc(&trapped);
161         set_bit(NAPI_STATE_NPSVC, &napi->state);
162
163         work = napi->poll(napi, budget);
164         trace_napi_poll(napi);
165
166         clear_bit(NAPI_STATE_NPSVC, &napi->state);
167         atomic_dec(&trapped);
168         npinfo->rx_flags &= ~NETPOLL_RX_DROP;
169
170         return budget - work;
171 }
172
173 static void poll_napi(struct net_device *dev)
174 {
175         struct napi_struct *napi;
176         int budget = 16;
177
178         list_for_each_entry(napi, &dev->napi_list, dev_list) {
179                 if (napi->poll_owner != smp_processor_id() &&
180                     spin_trylock(&napi->poll_lock)) {
181                         budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
182                                                napi, budget);
183                         spin_unlock(&napi->poll_lock);
184
185                         if (!budget)
186                                 break;
187                 }
188         }
189 }
190
191 static void service_neigh_queue(struct netpoll_info *npi)
192 {
193         if (npi) {
194                 struct sk_buff *skb;
195
196                 while ((skb = skb_dequeue(&npi->neigh_tx)))
197                         netpoll_neigh_reply(skb, npi);
198         }
199 }
200
201 static void netpoll_poll_dev(struct net_device *dev)
202 {
203         const struct net_device_ops *ops;
204         struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
205
206         /* Don't do any rx activity if the dev_lock mutex is held
207          * the dev_open/close paths use this to block netpoll activity
208          * while changing device state
209          */
210         if (down_trylock(&ni->dev_lock))
211                 return;
212
213         if (!netif_running(dev)) {
214                 up(&ni->dev_lock);
215                 return;
216         }
217
218         ops = dev->netdev_ops;
219         if (!ops->ndo_poll_controller) {
220                 up(&ni->dev_lock);
221                 return;
222         }
223
224         /* Process pending work on NIC */
225         ops->ndo_poll_controller(dev);
226
227         poll_napi(dev);
228
229         up(&ni->dev_lock);
230
231         if (dev->flags & IFF_SLAVE) {
232                 if (ni) {
233                         struct net_device *bond_dev;
234                         struct sk_buff *skb;
235                         struct netpoll_info *bond_ni;
236
237                         bond_dev = netdev_master_upper_dev_get_rcu(dev);
238                         bond_ni = rcu_dereference_bh(bond_dev->npinfo);
239                         while ((skb = skb_dequeue(&ni->neigh_tx))) {
240                                 skb->dev = bond_dev;
241                                 skb_queue_tail(&bond_ni->neigh_tx, skb);
242                         }
243                 }
244         }
245
246         service_neigh_queue(ni);
247
248         zap_completion_queue();
249 }
250
251 void netpoll_rx_disable(struct net_device *dev)
252 {
253         struct netpoll_info *ni;
254         int idx;
255         might_sleep();
256         idx = srcu_read_lock(&netpoll_srcu);
257         ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
258         if (ni)
259                 down(&ni->dev_lock);
260         srcu_read_unlock(&netpoll_srcu, idx);
261 }
262 EXPORT_SYMBOL(netpoll_rx_disable);
263
264 void netpoll_rx_enable(struct net_device *dev)
265 {
266         struct netpoll_info *ni;
267         rcu_read_lock();
268         ni = rcu_dereference(dev->npinfo);
269         if (ni)
270                 up(&ni->dev_lock);
271         rcu_read_unlock();
272 }
273 EXPORT_SYMBOL(netpoll_rx_enable);
274
275 static void refill_skbs(void)
276 {
277         struct sk_buff *skb;
278         unsigned long flags;
279
280         spin_lock_irqsave(&skb_pool.lock, flags);
281         while (skb_pool.qlen < MAX_SKBS) {
282                 skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
283                 if (!skb)
284                         break;
285
286                 __skb_queue_tail(&skb_pool, skb);
287         }
288         spin_unlock_irqrestore(&skb_pool.lock, flags);
289 }
290
291 static void zap_completion_queue(void)
292 {
293         unsigned long flags;
294         struct softnet_data *sd = &get_cpu_var(softnet_data);
295
296         if (sd->completion_queue) {
297                 struct sk_buff *clist;
298
299                 local_irq_save(flags);
300                 clist = sd->completion_queue;
301                 sd->completion_queue = NULL;
302                 local_irq_restore(flags);
303
304                 while (clist != NULL) {
305                         struct sk_buff *skb = clist;
306                         clist = clist->next;
307                         if (skb->destructor) {
308                                 atomic_inc(&skb->users);
309                                 dev_kfree_skb_any(skb); /* put this one back */
310                         } else {
311                                 __kfree_skb(skb);
312                         }
313                 }
314         }
315
316         put_cpu_var(softnet_data);
317 }
318
319 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
320 {
321         int count = 0;
322         struct sk_buff *skb;
323
324         zap_completion_queue();
325         refill_skbs();
326 repeat:
327
328         skb = alloc_skb(len, GFP_ATOMIC);
329         if (!skb)
330                 skb = skb_dequeue(&skb_pool);
331
332         if (!skb) {
333                 if (++count < 10) {
334                         netpoll_poll_dev(np->dev);
335                         goto repeat;
336                 }
337                 return NULL;
338         }
339
340         atomic_set(&skb->users, 1);
341         skb_reserve(skb, reserve);
342         return skb;
343 }
344
345 static int netpoll_owner_active(struct net_device *dev)
346 {
347         struct napi_struct *napi;
348
349         list_for_each_entry(napi, &dev->napi_list, dev_list) {
350                 if (napi->poll_owner == smp_processor_id())
351                         return 1;
352         }
353         return 0;
354 }
355
356 /* call with IRQ disabled */
357 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
358                              struct net_device *dev)
359 {
360         int status = NETDEV_TX_BUSY;
361         unsigned long tries;
362         const struct net_device_ops *ops = dev->netdev_ops;
363         /* It is up to the caller to keep npinfo alive. */
364         struct netpoll_info *npinfo;
365
366         WARN_ON_ONCE(!irqs_disabled());
367
368         npinfo = rcu_dereference_bh(np->dev->npinfo);
369         if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
370                 __kfree_skb(skb);
371                 return;
372         }
373
374         /* don't get messages out of order, and no recursion */
375         if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
376                 struct netdev_queue *txq;
377
378                 txq = netdev_pick_tx(dev, skb, NULL);
379
380                 /* try until next clock tick */
381                 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
382                      tries > 0; --tries) {
383                         if (__netif_tx_trylock(txq)) {
384                                 if (!netif_xmit_stopped(txq)) {
385                                         if (vlan_tx_tag_present(skb) &&
386                                             !vlan_hw_offload_capable(netif_skb_features(skb),
387                                                                      skb->vlan_proto)) {
388                                                 skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
389                                                 if (unlikely(!skb)) {
390                                                         /* This is actually a packet drop, but we
391                                                          * don't want the code at the end of this
392                                                          * function to try and re-queue a NULL skb.
393                                                          */
394                                                         status = NETDEV_TX_OK;
395                                                         goto unlock_txq;
396                                                 }
397                                                 skb->vlan_tci = 0;
398                                         }
399
400                                         status = ops->ndo_start_xmit(skb, dev);
401                                         if (status == NETDEV_TX_OK)
402                                                 txq_trans_update(txq);
403                                 }
404                         unlock_txq:
405                                 __netif_tx_unlock(txq);
406
407                                 if (status == NETDEV_TX_OK)
408                                         break;
409
410                         }
411
412                         /* tickle device maybe there is some cleanup */
413                         netpoll_poll_dev(np->dev);
414
415                         udelay(USEC_PER_POLL);
416                 }
417
418                 WARN_ONCE(!irqs_disabled(),
419                         "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
420                         dev->name, ops->ndo_start_xmit);
421
422         }
423
424         if (status != NETDEV_TX_OK) {
425                 skb_queue_tail(&npinfo->txq, skb);
426                 schedule_delayed_work(&npinfo->tx_work,0);
427         }
428 }
429 EXPORT_SYMBOL(netpoll_send_skb_on_dev);
430
431 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
432 {
433         int total_len, ip_len, udp_len;
434         struct sk_buff *skb;
435         struct udphdr *udph;
436         struct iphdr *iph;
437         struct ethhdr *eth;
438         static atomic_t ip_ident;
439         struct ipv6hdr *ip6h;
440
441         udp_len = len + sizeof(*udph);
442         if (np->ipv6)
443                 ip_len = udp_len + sizeof(*ip6h);
444         else
445                 ip_len = udp_len + sizeof(*iph);
446
447         total_len = ip_len + LL_RESERVED_SPACE(np->dev);
448
449         skb = find_skb(np, total_len + np->dev->needed_tailroom,
450                        total_len - len);
451         if (!skb)
452                 return;
453
454         skb_copy_to_linear_data(skb, msg, len);
455         skb_put(skb, len);
456
457         skb_push(skb, sizeof(*udph));
458         skb_reset_transport_header(skb);
459         udph = udp_hdr(skb);
460         udph->source = htons(np->local_port);
461         udph->dest = htons(np->remote_port);
462         udph->len = htons(udp_len);
463
464         if (np->ipv6) {
465                 udph->check = 0;
466                 udph->check = csum_ipv6_magic(&np->local_ip.in6,
467                                               &np->remote_ip.in6,
468                                               udp_len, IPPROTO_UDP,
469                                               csum_partial(udph, udp_len, 0));
470                 if (udph->check == 0)
471                         udph->check = CSUM_MANGLED_0;
472
473                 skb_push(skb, sizeof(*ip6h));
474                 skb_reset_network_header(skb);
475                 ip6h = ipv6_hdr(skb);
476
477                 /* ip6h->version = 6; ip6h->priority = 0; */
478                 put_unaligned(0x60, (unsigned char *)ip6h);
479                 ip6h->flow_lbl[0] = 0;
480                 ip6h->flow_lbl[1] = 0;
481                 ip6h->flow_lbl[2] = 0;
482
483                 ip6h->payload_len = htons(sizeof(struct udphdr) + len);
484                 ip6h->nexthdr = IPPROTO_UDP;
485                 ip6h->hop_limit = 32;
486                 ip6h->saddr = np->local_ip.in6;
487                 ip6h->daddr = np->remote_ip.in6;
488
489                 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
490                 skb_reset_mac_header(skb);
491                 skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
492         } else {
493                 udph->check = 0;
494                 udph->check = csum_tcpudp_magic(np->local_ip.ip,
495                                                 np->remote_ip.ip,
496                                                 udp_len, IPPROTO_UDP,
497                                                 csum_partial(udph, udp_len, 0));
498                 if (udph->check == 0)
499                         udph->check = CSUM_MANGLED_0;
500
501                 skb_push(skb, sizeof(*iph));
502                 skb_reset_network_header(skb);
503                 iph = ip_hdr(skb);
504
505                 /* iph->version = 4; iph->ihl = 5; */
506                 put_unaligned(0x45, (unsigned char *)iph);
507                 iph->tos      = 0;
508                 put_unaligned(htons(ip_len), &(iph->tot_len));
509                 iph->id       = htons(atomic_inc_return(&ip_ident));
510                 iph->frag_off = 0;
511                 iph->ttl      = 64;
512                 iph->protocol = IPPROTO_UDP;
513                 iph->check    = 0;
514                 put_unaligned(np->local_ip.ip, &(iph->saddr));
515                 put_unaligned(np->remote_ip.ip, &(iph->daddr));
516                 iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
517
518                 eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
519                 skb_reset_mac_header(skb);
520                 skb->protocol = eth->h_proto = htons(ETH_P_IP);
521         }
522
523         ether_addr_copy(eth->h_source, np->dev->dev_addr);
524         ether_addr_copy(eth->h_dest, np->remote_mac);
525
526         skb->dev = np->dev;
527
528         netpoll_send_skb(np, skb);
529 }
530 EXPORT_SYMBOL(netpoll_send_udp);
531
532 static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
533 {
534         int size, type = ARPOP_REPLY;
535         __be32 sip, tip;
536         unsigned char *sha;
537         struct sk_buff *send_skb;
538         struct netpoll *np, *tmp;
539         unsigned long flags;
540         int hlen, tlen;
541         int hits = 0, proto;
542
543         if (list_empty(&npinfo->rx_np))
544                 return;
545
546         /* Before checking the packet, we do some early
547            inspection whether this is interesting at all */
548         spin_lock_irqsave(&npinfo->rx_lock, flags);
549         list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
550                 if (np->dev == skb->dev)
551                         hits++;
552         }
553         spin_unlock_irqrestore(&npinfo->rx_lock, flags);
554
555         /* No netpoll struct is using this dev */
556         if (!hits)
557                 return;
558
559         proto = ntohs(eth_hdr(skb)->h_proto);
560         if (proto == ETH_P_ARP) {
561                 struct arphdr *arp;
562                 unsigned char *arp_ptr;
563                 /* No arp on this interface */
564                 if (skb->dev->flags & IFF_NOARP)
565                         return;
566
567                 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
568                         return;
569
570                 skb_reset_network_header(skb);
571                 skb_reset_transport_header(skb);
572                 arp = arp_hdr(skb);
573
574                 if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
575                      arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
576                     arp->ar_pro != htons(ETH_P_IP) ||
577                     arp->ar_op != htons(ARPOP_REQUEST))
578                         return;
579
580                 arp_ptr = (unsigned char *)(arp+1);
581                 /* save the location of the src hw addr */
582                 sha = arp_ptr;
583                 arp_ptr += skb->dev->addr_len;
584                 memcpy(&sip, arp_ptr, 4);
585                 arp_ptr += 4;
586                 /* If we actually cared about dst hw addr,
587                    it would get copied here */
588                 arp_ptr += skb->dev->addr_len;
589                 memcpy(&tip, arp_ptr, 4);
590
591                 /* Should we ignore arp? */
592                 if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
593                         return;
594
595                 size = arp_hdr_len(skb->dev);
596
597                 spin_lock_irqsave(&npinfo->rx_lock, flags);
598                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
599                         if (tip != np->local_ip.ip)
600                                 continue;
601
602                         hlen = LL_RESERVED_SPACE(np->dev);
603                         tlen = np->dev->needed_tailroom;
604                         send_skb = find_skb(np, size + hlen + tlen, hlen);
605                         if (!send_skb)
606                                 continue;
607
608                         skb_reset_network_header(send_skb);
609                         arp = (struct arphdr *) skb_put(send_skb, size);
610                         send_skb->dev = skb->dev;
611                         send_skb->protocol = htons(ETH_P_ARP);
612
613                         /* Fill the device header for the ARP frame */
614                         if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
615                                             sha, np->dev->dev_addr,
616                                             send_skb->len) < 0) {
617                                 kfree_skb(send_skb);
618                                 continue;
619                         }
620
621                         /*
622                          * Fill out the arp protocol part.
623                          *
624                          * we only support ethernet device type,
625                          * which (according to RFC 1390) should
626                          * always equal 1 (Ethernet).
627                          */
628
629                         arp->ar_hrd = htons(np->dev->type);
630                         arp->ar_pro = htons(ETH_P_IP);
631                         arp->ar_hln = np->dev->addr_len;
632                         arp->ar_pln = 4;
633                         arp->ar_op = htons(type);
634
635                         arp_ptr = (unsigned char *)(arp + 1);
636                         memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
637                         arp_ptr += np->dev->addr_len;
638                         memcpy(arp_ptr, &tip, 4);
639                         arp_ptr += 4;
640                         memcpy(arp_ptr, sha, np->dev->addr_len);
641                         arp_ptr += np->dev->addr_len;
642                         memcpy(arp_ptr, &sip, 4);
643
644                         netpoll_send_skb(np, send_skb);
645
646                         /* If there are several rx_skb_hooks for the same
647                          * address we're fine by sending a single reply
648                          */
649                         break;
650                 }
651                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
652         } else if( proto == ETH_P_IPV6) {
653 #if IS_ENABLED(CONFIG_IPV6)
654                 struct nd_msg *msg;
655                 u8 *lladdr = NULL;
656                 struct ipv6hdr *hdr;
657                 struct icmp6hdr *icmp6h;
658                 const struct in6_addr *saddr;
659                 const struct in6_addr *daddr;
660                 struct inet6_dev *in6_dev = NULL;
661                 struct in6_addr *target;
662
663                 in6_dev = in6_dev_get(skb->dev);
664                 if (!in6_dev || !in6_dev->cnf.accept_ra)
665                         return;
666
667                 if (!pskb_may_pull(skb, skb->len))
668                         return;
669
670                 msg = (struct nd_msg *)skb_transport_header(skb);
671
672                 __skb_push(skb, skb->data - skb_transport_header(skb));
673
674                 if (ipv6_hdr(skb)->hop_limit != 255)
675                         return;
676                 if (msg->icmph.icmp6_code != 0)
677                         return;
678                 if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
679                         return;
680
681                 saddr = &ipv6_hdr(skb)->saddr;
682                 daddr = &ipv6_hdr(skb)->daddr;
683
684                 size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
685
686                 spin_lock_irqsave(&npinfo->rx_lock, flags);
687                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
688                         if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
689                                 continue;
690
691                         hlen = LL_RESERVED_SPACE(np->dev);
692                         tlen = np->dev->needed_tailroom;
693                         send_skb = find_skb(np, size + hlen + tlen, hlen);
694                         if (!send_skb)
695                                 continue;
696
697                         send_skb->protocol = htons(ETH_P_IPV6);
698                         send_skb->dev = skb->dev;
699
700                         skb_reset_network_header(send_skb);
701                         hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
702                         *(__be32*)hdr = htonl(0x60000000);
703                         hdr->payload_len = htons(size);
704                         hdr->nexthdr = IPPROTO_ICMPV6;
705                         hdr->hop_limit = 255;
706                         hdr->saddr = *saddr;
707                         hdr->daddr = *daddr;
708
709                         icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
710                         icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
711                         icmp6h->icmp6_router = 0;
712                         icmp6h->icmp6_solicited = 1;
713
714                         target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
715                         *target = msg->target;
716                         icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
717                                                               IPPROTO_ICMPV6,
718                                                               csum_partial(icmp6h,
719                                                                            size, 0));
720
721                         if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
722                                             lladdr, np->dev->dev_addr,
723                                             send_skb->len) < 0) {
724                                 kfree_skb(send_skb);
725                                 continue;
726                         }
727
728                         netpoll_send_skb(np, send_skb);
729
730                         /* If there are several rx_skb_hooks for the same
731                          * address, we're fine by sending a single reply
732                          */
733                         break;
734                 }
735                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
736 #endif
737         }
738 }
739
740 static bool pkt_is_ns(struct sk_buff *skb)
741 {
742         struct nd_msg *msg;
743         struct ipv6hdr *hdr;
744
745         if (skb->protocol != htons(ETH_P_IPV6))
746                 return false;
747         if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
748                 return false;
749
750         msg = (struct nd_msg *)skb_transport_header(skb);
751         __skb_push(skb, skb->data - skb_transport_header(skb));
752         hdr = ipv6_hdr(skb);
753
754         if (hdr->nexthdr != IPPROTO_ICMPV6)
755                 return false;
756         if (hdr->hop_limit != 255)
757                 return false;
758         if (msg->icmph.icmp6_code != 0)
759                 return false;
760         if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
761                 return false;
762
763         return true;
764 }
765
766 int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
767 {
768         int proto, len, ulen, data_len;
769         int hits = 0, offset;
770         const struct iphdr *iph;
771         struct udphdr *uh;
772         struct netpoll *np, *tmp;
773         uint16_t source;
774
775         if (list_empty(&npinfo->rx_np))
776                 goto out;
777
778         if (skb->dev->type != ARPHRD_ETHER)
779                 goto out;
780
781         /* check if netpoll clients need ARP */
782         if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
783                 skb_queue_tail(&npinfo->neigh_tx, skb);
784                 return 1;
785         } else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
786                 skb_queue_tail(&npinfo->neigh_tx, skb);
787                 return 1;
788         }
789
790         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
791                 skb = vlan_untag(skb);
792                 if (unlikely(!skb))
793                         goto out;
794         }
795
796         proto = ntohs(eth_hdr(skb)->h_proto);
797         if (proto != ETH_P_IP && proto != ETH_P_IPV6)
798                 goto out;
799         if (skb->pkt_type == PACKET_OTHERHOST)
800                 goto out;
801         if (skb_shared(skb))
802                 goto out;
803
804         if (proto == ETH_P_IP) {
805                 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
806                         goto out;
807                 iph = (struct iphdr *)skb->data;
808                 if (iph->ihl < 5 || iph->version != 4)
809                         goto out;
810                 if (!pskb_may_pull(skb, iph->ihl*4))
811                         goto out;
812                 iph = (struct iphdr *)skb->data;
813                 if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
814                         goto out;
815
816                 len = ntohs(iph->tot_len);
817                 if (skb->len < len || len < iph->ihl*4)
818                         goto out;
819
820                 /*
821                  * Our transport medium may have padded the buffer out.
822                  * Now We trim to the true length of the frame.
823                  */
824                 if (pskb_trim_rcsum(skb, len))
825                         goto out;
826
827                 iph = (struct iphdr *)skb->data;
828                 if (iph->protocol != IPPROTO_UDP)
829                         goto out;
830
831                 len -= iph->ihl*4;
832                 uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
833                 offset = (unsigned char *)(uh + 1) - skb->data;
834                 ulen = ntohs(uh->len);
835                 data_len = skb->len - offset;
836                 source = ntohs(uh->source);
837
838                 if (ulen != len)
839                         goto out;
840                 if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
841                         goto out;
842                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
843                         if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
844                                 continue;
845                         if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
846                                 continue;
847                         if (np->local_port && np->local_port != ntohs(uh->dest))
848                                 continue;
849
850                         np->rx_skb_hook(np, source, skb, offset, data_len);
851                         hits++;
852                 }
853         } else {
854 #if IS_ENABLED(CONFIG_IPV6)
855                 const struct ipv6hdr *ip6h;
856
857                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
858                         goto out;
859                 ip6h = (struct ipv6hdr *)skb->data;
860                 if (ip6h->version != 6)
861                         goto out;
862                 len = ntohs(ip6h->payload_len);
863                 if (!len)
864                         goto out;
865                 if (len + sizeof(struct ipv6hdr) > skb->len)
866                         goto out;
867                 if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
868                         goto out;
869                 ip6h = ipv6_hdr(skb);
870                 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
871                         goto out;
872                 uh = udp_hdr(skb);
873                 offset = (unsigned char *)(uh + 1) - skb->data;
874                 ulen = ntohs(uh->len);
875                 data_len = skb->len - offset;
876                 source = ntohs(uh->source);
877                 if (ulen != skb->len)
878                         goto out;
879                 if (udp6_csum_init(skb, uh, IPPROTO_UDP))
880                         goto out;
881                 list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
882                         if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
883                                 continue;
884                         if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
885                                 continue;
886                         if (np->local_port && np->local_port != ntohs(uh->dest))
887                                 continue;
888
889                         np->rx_skb_hook(np, source, skb, offset, data_len);
890                         hits++;
891                 }
892 #endif
893         }
894
895         if (!hits)
896                 goto out;
897
898         kfree_skb(skb);
899         return 1;
900
901 out:
902         if (atomic_read(&trapped)) {
903                 kfree_skb(skb);
904                 return 1;
905         }
906
907         return 0;
908 }
909
910 void netpoll_print_options(struct netpoll *np)
911 {
912         np_info(np, "local port %d\n", np->local_port);
913         if (np->ipv6)
914                 np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
915         else
916                 np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
917         np_info(np, "interface '%s'\n", np->dev_name);
918         np_info(np, "remote port %d\n", np->remote_port);
919         if (np->ipv6)
920                 np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
921         else
922                 np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
923         np_info(np, "remote ethernet address %pM\n", np->remote_mac);
924 }
925 EXPORT_SYMBOL(netpoll_print_options);
926
927 static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
928 {
929         const char *end;
930
931         if (!strchr(str, ':') &&
932             in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
933                 if (!*end)
934                         return 0;
935         }
936         if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
937 #if IS_ENABLED(CONFIG_IPV6)
938                 if (!*end)
939                         return 1;
940 #else
941                 return -1;
942 #endif
943         }
944         return -1;
945 }
946
947 int netpoll_parse_options(struct netpoll *np, char *opt)
948 {
949         char *cur=opt, *delim;
950         int ipv6;
951         bool ipversion_set = false;
952
953         if (*cur != '@') {
954                 if ((delim = strchr(cur, '@')) == NULL)
955                         goto parse_failed;
956                 *delim = 0;
957                 if (kstrtou16(cur, 10, &np->local_port))
958                         goto parse_failed;
959                 cur = delim;
960         }
961         cur++;
962
963         if (*cur != '/') {
964                 ipversion_set = true;
965                 if ((delim = strchr(cur, '/')) == NULL)
966                         goto parse_failed;
967                 *delim = 0;
968                 ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
969                 if (ipv6 < 0)
970                         goto parse_failed;
971                 else
972                         np->ipv6 = (bool)ipv6;
973                 cur = delim;
974         }
975         cur++;
976
977         if (*cur != ',') {
978                 /* parse out dev name */
979                 if ((delim = strchr(cur, ',')) == NULL)
980                         goto parse_failed;
981                 *delim = 0;
982                 strlcpy(np->dev_name, cur, sizeof(np->dev_name));
983                 cur = delim;
984         }
985         cur++;
986
987         if (*cur != '@') {
988                 /* dst port */
989                 if ((delim = strchr(cur, '@')) == NULL)
990                         goto parse_failed;
991                 *delim = 0;
992                 if (*cur == ' ' || *cur == '\t')
993                         np_info(np, "warning: whitespace is not allowed\n");
994                 if (kstrtou16(cur, 10, &np->remote_port))
995                         goto parse_failed;
996                 cur = delim;
997         }
998         cur++;
999
1000         /* dst ip */
1001         if ((delim = strchr(cur, '/')) == NULL)
1002                 goto parse_failed;
1003         *delim = 0;
1004         ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
1005         if (ipv6 < 0)
1006                 goto parse_failed;
1007         else if (ipversion_set && np->ipv6 != (bool)ipv6)
1008                 goto parse_failed;
1009         else
1010                 np->ipv6 = (bool)ipv6;
1011         cur = delim + 1;
1012
1013         if (*cur != 0) {
1014                 /* MAC address */
1015                 if (!mac_pton(cur, np->remote_mac))
1016                         goto parse_failed;
1017         }
1018
1019         netpoll_print_options(np);
1020
1021         return 0;
1022
1023  parse_failed:
1024         np_info(np, "couldn't parse config at '%s'!\n", cur);
1025         return -1;
1026 }
1027 EXPORT_SYMBOL(netpoll_parse_options);
1028
1029 int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
1030 {
1031         struct netpoll_info *npinfo;
1032         const struct net_device_ops *ops;
1033         unsigned long flags;
1034         int err;
1035
1036         np->dev = ndev;
1037         strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1038         INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
1039
1040         if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
1041             !ndev->netdev_ops->ndo_poll_controller) {
1042                 np_err(np, "%s doesn't support polling, aborting\n",
1043                        np->dev_name);
1044                 err = -ENOTSUPP;
1045                 goto out;
1046         }
1047
1048         if (!ndev->npinfo) {
1049                 npinfo = kmalloc(sizeof(*npinfo), gfp);
1050                 if (!npinfo) {
1051                         err = -ENOMEM;
1052                         goto out;
1053                 }
1054
1055                 npinfo->rx_flags = 0;
1056                 INIT_LIST_HEAD(&npinfo->rx_np);
1057
1058                 spin_lock_init(&npinfo->rx_lock);
1059                 sema_init(&npinfo->dev_lock, 1);
1060                 skb_queue_head_init(&npinfo->neigh_tx);
1061                 skb_queue_head_init(&npinfo->txq);
1062                 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
1063
1064                 atomic_set(&npinfo->refcnt, 1);
1065
1066                 ops = np->dev->netdev_ops;
1067                 if (ops->ndo_netpoll_setup) {
1068                         err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1069                         if (err)
1070                                 goto free_npinfo;
1071                 }
1072         } else {
1073                 npinfo = rtnl_dereference(ndev->npinfo);
1074                 atomic_inc(&npinfo->refcnt);
1075         }
1076
1077         npinfo->netpoll = np;
1078
1079         if (np->rx_skb_hook) {
1080                 spin_lock_irqsave(&npinfo->rx_lock, flags);
1081                 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
1082                 list_add_tail(&np->rx, &npinfo->rx_np);
1083                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1084         }
1085
1086         /* last thing to do is link it to the net device structure */
1087         rcu_assign_pointer(ndev->npinfo, npinfo);
1088
1089         return 0;
1090
1091 free_npinfo:
1092         kfree(npinfo);
1093 out:
1094         return err;
1095 }
1096 EXPORT_SYMBOL_GPL(__netpoll_setup);
1097
1098 int netpoll_setup(struct netpoll *np)
1099 {
1100         struct net_device *ndev = NULL;
1101         struct in_device *in_dev;
1102         int err;
1103
1104         rtnl_lock();
1105         if (np->dev_name) {
1106                 struct net *net = current->nsproxy->net_ns;
1107                 ndev = __dev_get_by_name(net, np->dev_name);
1108         }
1109         if (!ndev) {
1110                 np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1111                 err = -ENODEV;
1112                 goto unlock;
1113         }
1114         dev_hold(ndev);
1115
1116         if (netdev_master_upper_dev_get(ndev)) {
1117                 np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1118                 err = -EBUSY;
1119                 goto put;
1120         }
1121
1122         if (!netif_running(ndev)) {
1123                 unsigned long atmost, atleast;
1124
1125                 np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
1126
1127                 err = dev_open(ndev);
1128
1129                 if (err) {
1130                         np_err(np, "failed to open %s\n", ndev->name);
1131                         goto put;
1132                 }
1133
1134                 rtnl_unlock();
1135                 atleast = jiffies + HZ/10;
1136                 atmost = jiffies + carrier_timeout * HZ;
1137                 while (!netif_carrier_ok(ndev)) {
1138                         if (time_after(jiffies, atmost)) {
1139                                 np_notice(np, "timeout waiting for carrier\n");
1140                                 break;
1141                         }
1142                         msleep(1);
1143                 }
1144
1145                 /* If carrier appears to come up instantly, we don't
1146                  * trust it and pause so that we don't pump all our
1147                  * queued console messages into the bitbucket.
1148                  */
1149
1150                 if (time_before(jiffies, atleast)) {
1151                         np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
1152                         msleep(4000);
1153                 }
1154                 rtnl_lock();
1155         }
1156
1157         if (!np->local_ip.ip) {
1158                 if (!np->ipv6) {
1159                         in_dev = __in_dev_get_rtnl(ndev);
1160
1161                         if (!in_dev || !in_dev->ifa_list) {
1162                                 np_err(np, "no IP address for %s, aborting\n",
1163                                        np->dev_name);
1164                                 err = -EDESTADDRREQ;
1165                                 goto put;
1166                         }
1167
1168                         np->local_ip.ip = in_dev->ifa_list->ifa_local;
1169                         np_info(np, "local IP %pI4\n", &np->local_ip.ip);
1170                 } else {
1171 #if IS_ENABLED(CONFIG_IPV6)
1172                         struct inet6_dev *idev;
1173
1174                         err = -EDESTADDRREQ;
1175                         idev = __in6_dev_get(ndev);
1176                         if (idev) {
1177                                 struct inet6_ifaddr *ifp;
1178
1179                                 read_lock_bh(&idev->lock);
1180                                 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1181                                         if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
1182                                                 continue;
1183                                         np->local_ip.in6 = ifp->addr;
1184                                         err = 0;
1185                                         break;
1186                                 }
1187                                 read_unlock_bh(&idev->lock);
1188                         }
1189                         if (err) {
1190                                 np_err(np, "no IPv6 address for %s, aborting\n",
1191                                        np->dev_name);
1192                                 goto put;
1193                         } else
1194                                 np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
1195 #else
1196                         np_err(np, "IPv6 is not supported %s, aborting\n",
1197                                np->dev_name);
1198                         err = -EINVAL;
1199                         goto put;
1200 #endif
1201                 }
1202         }
1203
1204         /* fill up the skb queue */
1205         refill_skbs();
1206
1207         err = __netpoll_setup(np, ndev, GFP_KERNEL);
1208         if (err)
1209                 goto put;
1210
1211         rtnl_unlock();
1212         return 0;
1213
1214 put:
1215         dev_put(ndev);
1216 unlock:
1217         rtnl_unlock();
1218         return err;
1219 }
1220 EXPORT_SYMBOL(netpoll_setup);
1221
1222 static int __init netpoll_init(void)
1223 {
1224         skb_queue_head_init(&skb_pool);
1225         return 0;
1226 }
1227 core_initcall(netpoll_init);
1228
1229 static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
1230 {
1231         struct netpoll_info *npinfo =
1232                         container_of(rcu_head, struct netpoll_info, rcu);
1233
1234         skb_queue_purge(&npinfo->neigh_tx);
1235         skb_queue_purge(&npinfo->txq);
1236
1237         /* we can't call cancel_delayed_work_sync here, as we are in softirq */
1238         cancel_delayed_work(&npinfo->tx_work);
1239
1240         /* clean after last, unfinished work */
1241         __skb_queue_purge(&npinfo->txq);
1242         /* now cancel it again */
1243         cancel_delayed_work(&npinfo->tx_work);
1244         kfree(npinfo);
1245 }
1246
1247 void __netpoll_cleanup(struct netpoll *np)
1248 {
1249         struct netpoll_info *npinfo;
1250         unsigned long flags;
1251
1252         /* rtnl_dereference would be preferable here but
1253          * rcu_cleanup_netpoll path can put us in here safely without
1254          * holding the rtnl, so plain rcu_dereference it is
1255          */
1256         npinfo = rtnl_dereference(np->dev->npinfo);
1257         if (!npinfo)
1258                 return;
1259
1260         if (!list_empty(&npinfo->rx_np)) {
1261                 spin_lock_irqsave(&npinfo->rx_lock, flags);
1262                 list_del(&np->rx);
1263                 if (list_empty(&npinfo->rx_np))
1264                         npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
1265                 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
1266         }
1267
1268         synchronize_srcu(&netpoll_srcu);
1269
1270         if (atomic_dec_and_test(&npinfo->refcnt)) {
1271                 const struct net_device_ops *ops;
1272
1273                 ops = np->dev->netdev_ops;
1274                 if (ops->ndo_netpoll_cleanup)
1275                         ops->ndo_netpoll_cleanup(np->dev);
1276
1277                 rcu_assign_pointer(np->dev->npinfo, NULL);
1278                 call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
1279         }
1280 }
1281 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
1282
1283 static void netpoll_async_cleanup(struct work_struct *work)
1284 {
1285         struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
1286
1287         rtnl_lock();
1288         __netpoll_cleanup(np);
1289         rtnl_unlock();
1290         kfree(np);
1291 }
1292
1293 void __netpoll_free_async(struct netpoll *np)
1294 {
1295         schedule_work(&np->cleanup_work);
1296 }
1297 EXPORT_SYMBOL_GPL(__netpoll_free_async);
1298
1299 void netpoll_cleanup(struct netpoll *np)
1300 {
1301         rtnl_lock();
1302         if (!np->dev)
1303                 goto out;
1304         __netpoll_cleanup(np);
1305         dev_put(np->dev);
1306         np->dev = NULL;
1307 out:
1308         rtnl_unlock();
1309 }
1310 EXPORT_SYMBOL(netpoll_cleanup);
1311
1312 int netpoll_trap(void)
1313 {
1314         return atomic_read(&trapped);
1315 }
1316 EXPORT_SYMBOL(netpoll_trap);
1317
1318 void netpoll_set_trap(int trap)
1319 {
1320         if (trap)
1321                 atomic_inc(&trapped);
1322         else
1323                 atomic_dec(&trapped);
1324 }
1325 EXPORT_SYMBOL(netpoll_set_trap);