]> Pileus Git - ~andy/linux/blob - net/core/neighbour.c
Merge remote-tracking branches 'asoc/topic/ad1836', 'asoc/topic/ad193x', 'asoc/topic...
[~andy/linux] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)              \
45 do {                                            \
46         if (level <= NEIGH_DEBUG)               \
47                 pr_debug(fmt, ##__VA_ARGS__);   \
48 } while (0)
49
50 #define PNEIGH_HASHMASK         0xF
51
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76
77    Reference count prevents destruction.
78
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125         int shrunk = 0;
126         int i;
127         struct neigh_hash_table *nht;
128
129         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131         write_lock_bh(&tbl->lock);
132         nht = rcu_dereference_protected(tbl->nht,
133                                         lockdep_is_held(&tbl->lock));
134         for (i = 0; i < (1 << nht->hash_shift); i++) {
135                 struct neighbour *n;
136                 struct neighbour __rcu **np;
137
138                 np = &nht->hash_buckets[i];
139                 while ((n = rcu_dereference_protected(*np,
140                                         lockdep_is_held(&tbl->lock))) != NULL) {
141                         /* Neighbour record may be discarded if:
142                          * - nobody refers to it.
143                          * - it is not permanent
144                          */
145                         write_lock(&n->lock);
146                         if (atomic_read(&n->refcnt) == 1 &&
147                             !(n->nud_state & NUD_PERMANENT)) {
148                                 rcu_assign_pointer(*np,
149                                         rcu_dereference_protected(n->next,
150                                                   lockdep_is_held(&tbl->lock)));
151                                 n->dead = 1;
152                                 shrunk  = 1;
153                                 write_unlock(&n->lock);
154                                 neigh_cleanup_and_release(n);
155                                 continue;
156                         }
157                         write_unlock(&n->lock);
158                         np = &n->next;
159                 }
160         }
161
162         tbl->last_flush = jiffies;
163
164         write_unlock_bh(&tbl->lock);
165
166         return shrunk;
167 }
168
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171         neigh_hold(n);
172         if (unlikely(mod_timer(&n->timer, when))) {
173                 printk("NEIGH: BUG, double timer add, state is %x\n",
174                        n->nud_state);
175                 dump_stack();
176         }
177 }
178
179 static int neigh_del_timer(struct neighbour *n)
180 {
181         if ((n->nud_state & NUD_IN_TIMER) &&
182             del_timer(&n->timer)) {
183                 neigh_release(n);
184                 return 1;
185         }
186         return 0;
187 }
188
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191         struct sk_buff *skb;
192
193         while ((skb = skb_dequeue(list)) != NULL) {
194                 dev_put(skb->dev);
195                 kfree_skb(skb);
196         }
197 }
198
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201         int i;
202         struct neigh_hash_table *nht;
203
204         nht = rcu_dereference_protected(tbl->nht,
205                                         lockdep_is_held(&tbl->lock));
206
207         for (i = 0; i < (1 << nht->hash_shift); i++) {
208                 struct neighbour *n;
209                 struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211                 while ((n = rcu_dereference_protected(*np,
212                                         lockdep_is_held(&tbl->lock))) != NULL) {
213                         if (dev && n->dev != dev) {
214                                 np = &n->next;
215                                 continue;
216                         }
217                         rcu_assign_pointer(*np,
218                                    rcu_dereference_protected(n->next,
219                                                 lockdep_is_held(&tbl->lock)));
220                         write_lock(&n->lock);
221                         neigh_del_timer(n);
222                         n->dead = 1;
223
224                         if (atomic_read(&n->refcnt) != 1) {
225                                 /* The most unpleasant situation.
226                                    We must destroy neighbour entry,
227                                    but someone still uses it.
228
229                                    The destroy will be delayed until
230                                    the last user releases us, but
231                                    we must kill timers etc. and move
232                                    it to safe state.
233                                  */
234                                 __skb_queue_purge(&n->arp_queue);
235                                 n->arp_queue_len_bytes = 0;
236                                 n->output = neigh_blackhole;
237                                 if (n->nud_state & NUD_VALID)
238                                         n->nud_state = NUD_NOARP;
239                                 else
240                                         n->nud_state = NUD_NONE;
241                                 neigh_dbg(2, "neigh %p is stray\n", n);
242                         }
243                         write_unlock(&n->lock);
244                         neigh_cleanup_and_release(n);
245                 }
246         }
247 }
248
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251         write_lock_bh(&tbl->lock);
252         neigh_flush_dev(tbl, dev);
253         write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         pneigh_ifdown(tbl, dev);
262         write_unlock_bh(&tbl->lock);
263
264         del_timer_sync(&tbl->proxy_timer);
265         pneigh_queue_purge(&tbl->proxy_queue);
266         return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272         struct neighbour *n = NULL;
273         unsigned long now = jiffies;
274         int entries;
275
276         entries = atomic_inc_return(&tbl->entries) - 1;
277         if (entries >= tbl->gc_thresh3 ||
278             (entries >= tbl->gc_thresh2 &&
279              time_after(now, tbl->last_flush + 5 * HZ))) {
280                 if (!neigh_forced_gc(tbl) &&
281                     entries >= tbl->gc_thresh3)
282                         goto out_entries;
283         }
284
285         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286         if (!n)
287                 goto out_entries;
288
289         __skb_queue_head_init(&n->arp_queue);
290         rwlock_init(&n->lock);
291         seqlock_init(&n->ha_lock);
292         n->updated        = n->used = now;
293         n->nud_state      = NUD_NONE;
294         n->output         = neigh_blackhole;
295         seqlock_init(&n->hh.hh_lock);
296         n->parms          = neigh_parms_clone(&tbl->parms);
297         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299         NEIGH_CACHE_STAT_INC(tbl, allocs);
300         n->tbl            = tbl;
301         atomic_set(&n->refcnt, 1);
302         n->dead           = 1;
303 out:
304         return n;
305
306 out_entries:
307         atomic_dec(&tbl->entries);
308         goto out;
309 }
310
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313         get_random_bytes(x, sizeof(*x));
314         *x |= 1;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322         int i;
323
324         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325         if (!ret)
326                 return NULL;
327         if (size <= PAGE_SIZE)
328                 buckets = kzalloc(size, GFP_ATOMIC);
329         else
330                 buckets = (struct neighbour __rcu **)
331                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332                                            get_order(size));
333         if (!buckets) {
334                 kfree(ret);
335                 return NULL;
336         }
337         ret->hash_buckets = buckets;
338         ret->hash_shift = shift;
339         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341         return ret;
342 }
343
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346         struct neigh_hash_table *nht = container_of(head,
347                                                     struct neigh_hash_table,
348                                                     rcu);
349         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350         struct neighbour __rcu **buckets = nht->hash_buckets;
351
352         if (size <= PAGE_SIZE)
353                 kfree(buckets);
354         else
355                 free_pages((unsigned long)buckets, get_order(size));
356         kfree(nht);
357 }
358
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360                                                 unsigned long new_shift)
361 {
362         unsigned int i, hash;
363         struct neigh_hash_table *new_nht, *old_nht;
364
365         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367         old_nht = rcu_dereference_protected(tbl->nht,
368                                             lockdep_is_held(&tbl->lock));
369         new_nht = neigh_hash_alloc(new_shift);
370         if (!new_nht)
371                 return old_nht;
372
373         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374                 struct neighbour *n, *next;
375
376                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377                                                    lockdep_is_held(&tbl->lock));
378                      n != NULL;
379                      n = next) {
380                         hash = tbl->hash(n->primary_key, n->dev,
381                                          new_nht->hash_rnd);
382
383                         hash >>= (32 - new_nht->hash_shift);
384                         next = rcu_dereference_protected(n->next,
385                                                 lockdep_is_held(&tbl->lock));
386
387                         rcu_assign_pointer(n->next,
388                                            rcu_dereference_protected(
389                                                 new_nht->hash_buckets[hash],
390                                                 lockdep_is_held(&tbl->lock)));
391                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392                 }
393         }
394
395         rcu_assign_pointer(tbl->nht, new_nht);
396         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397         return new_nht;
398 }
399
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401                                struct net_device *dev)
402 {
403         struct neighbour *n;
404         int key_len = tbl->key_len;
405         u32 hash_val;
406         struct neigh_hash_table *nht;
407
408         NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410         rcu_read_lock_bh();
411         nht = rcu_dereference_bh(tbl->nht);
412         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415              n != NULL;
416              n = rcu_dereference_bh(n->next)) {
417                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418                         if (!atomic_inc_not_zero(&n->refcnt))
419                                 n = NULL;
420                         NEIGH_CACHE_STAT_INC(tbl, hits);
421                         break;
422                 }
423         }
424
425         rcu_read_unlock_bh();
426         return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431                                      const void *pkey)
432 {
433         struct neighbour *n;
434         int key_len = tbl->key_len;
435         u32 hash_val;
436         struct neigh_hash_table *nht;
437
438         NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440         rcu_read_lock_bh();
441         nht = rcu_dereference_bh(tbl->nht);
442         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445              n != NULL;
446              n = rcu_dereference_bh(n->next)) {
447                 if (!memcmp(n->primary_key, pkey, key_len) &&
448                     net_eq(dev_net(n->dev), net)) {
449                         if (!atomic_inc_not_zero(&n->refcnt))
450                                 n = NULL;
451                         NEIGH_CACHE_STAT_INC(tbl, hits);
452                         break;
453                 }
454         }
455
456         rcu_read_unlock_bh();
457         return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462                                  struct net_device *dev, bool want_ref)
463 {
464         u32 hash_val;
465         int key_len = tbl->key_len;
466         int error;
467         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468         struct neigh_hash_table *nht;
469
470         if (!n) {
471                 rc = ERR_PTR(-ENOBUFS);
472                 goto out;
473         }
474
475         memcpy(n->primary_key, pkey, key_len);
476         n->dev = dev;
477         dev_hold(dev);
478
479         /* Protocol specific setup. */
480         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481                 rc = ERR_PTR(error);
482                 goto out_neigh_release;
483         }
484
485         if (dev->netdev_ops->ndo_neigh_construct) {
486                 error = dev->netdev_ops->ndo_neigh_construct(n);
487                 if (error < 0) {
488                         rc = ERR_PTR(error);
489                         goto out_neigh_release;
490                 }
491         }
492
493         /* Device specific setup. */
494         if (n->parms->neigh_setup &&
495             (error = n->parms->neigh_setup(n)) < 0) {
496                 rc = ERR_PTR(error);
497                 goto out_neigh_release;
498         }
499
500         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502         write_lock_bh(&tbl->lock);
503         nht = rcu_dereference_protected(tbl->nht,
504                                         lockdep_is_held(&tbl->lock));
505
506         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511         if (n->parms->dead) {
512                 rc = ERR_PTR(-EINVAL);
513                 goto out_tbl_unlock;
514         }
515
516         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517                                             lockdep_is_held(&tbl->lock));
518              n1 != NULL;
519              n1 = rcu_dereference_protected(n1->next,
520                         lockdep_is_held(&tbl->lock))) {
521                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522                         if (want_ref)
523                                 neigh_hold(n1);
524                         rc = n1;
525                         goto out_tbl_unlock;
526                 }
527         }
528
529         n->dead = 0;
530         if (want_ref)
531                 neigh_hold(n);
532         rcu_assign_pointer(n->next,
533                            rcu_dereference_protected(nht->hash_buckets[hash_val],
534                                                      lockdep_is_held(&tbl->lock)));
535         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536         write_unlock_bh(&tbl->lock);
537         neigh_dbg(2, "neigh %p is created\n", n);
538         rc = n;
539 out:
540         return rc;
541 out_tbl_unlock:
542         write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544         neigh_release(n);
545         goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551         u32 hash_val = *(u32 *)(pkey + key_len - 4);
552         hash_val ^= (hash_val >> 16);
553         hash_val ^= hash_val >> 8;
554         hash_val ^= hash_val >> 4;
555         hash_val &= PNEIGH_HASHMASK;
556         return hash_val;
557 }
558
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560                                               struct net *net,
561                                               const void *pkey,
562                                               int key_len,
563                                               struct net_device *dev)
564 {
565         while (n) {
566                 if (!memcmp(n->key, pkey, key_len) &&
567                     net_eq(pneigh_net(n), net) &&
568                     (n->dev == dev || !n->dev))
569                         return n;
570                 n = n->next;
571         }
572         return NULL;
573 }
574
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576                 struct net *net, const void *pkey, struct net_device *dev)
577 {
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582                                  net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587                                     struct net *net, const void *pkey,
588                                     struct net_device *dev, int creat)
589 {
590         struct pneigh_entry *n;
591         int key_len = tbl->key_len;
592         u32 hash_val = pneigh_hash(pkey, key_len);
593
594         read_lock_bh(&tbl->lock);
595         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                               net, pkey, key_len, dev);
597         read_unlock_bh(&tbl->lock);
598
599         if (n || !creat)
600                 goto out;
601
602         ASSERT_RTNL();
603
604         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605         if (!n)
606                 goto out;
607
608         write_pnet(&n->net, hold_net(net));
609         memcpy(n->key, pkey, key_len);
610         n->dev = dev;
611         if (dev)
612                 dev_hold(dev);
613
614         if (tbl->pconstructor && tbl->pconstructor(n)) {
615                 if (dev)
616                         dev_put(dev);
617                 release_net(net);
618                 kfree(n);
619                 n = NULL;
620                 goto out;
621         }
622
623         write_lock_bh(&tbl->lock);
624         n->next = tbl->phash_buckets[hash_val];
625         tbl->phash_buckets[hash_val] = n;
626         write_unlock_bh(&tbl->lock);
627 out:
628         return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631
632
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634                   struct net_device *dev)
635 {
636         struct pneigh_entry *n, **np;
637         int key_len = tbl->key_len;
638         u32 hash_val = pneigh_hash(pkey, key_len);
639
640         write_lock_bh(&tbl->lock);
641         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642              np = &n->next) {
643                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644                     net_eq(pneigh_net(n), net)) {
645                         *np = n->next;
646                         write_unlock_bh(&tbl->lock);
647                         if (tbl->pdestructor)
648                                 tbl->pdestructor(n);
649                         if (n->dev)
650                                 dev_put(n->dev);
651                         release_net(pneigh_net(n));
652                         kfree(n);
653                         return 0;
654                 }
655         }
656         write_unlock_bh(&tbl->lock);
657         return -ENOENT;
658 }
659
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662         struct pneigh_entry *n, **np;
663         u32 h;
664
665         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666                 np = &tbl->phash_buckets[h];
667                 while ((n = *np) != NULL) {
668                         if (!dev || n->dev == dev) {
669                                 *np = n->next;
670                                 if (tbl->pdestructor)
671                                         tbl->pdestructor(n);
672                                 if (n->dev)
673                                         dev_put(n->dev);
674                                 release_net(pneigh_net(n));
675                                 kfree(n);
676                                 continue;
677                         }
678                         np = &n->next;
679                 }
680         }
681         return -ENOENT;
682 }
683
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688         if (atomic_dec_and_test(&parms->refcnt))
689                 neigh_parms_destroy(parms);
690 }
691
692 /*
693  *      neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698         struct net_device *dev = neigh->dev;
699
700         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702         if (!neigh->dead) {
703                 pr_warn("Destroying alive neighbour %p\n", neigh);
704                 dump_stack();
705                 return;
706         }
707
708         if (neigh_del_timer(neigh))
709                 pr_warn("Impossible event\n");
710
711         write_lock_bh(&neigh->lock);
712         __skb_queue_purge(&neigh->arp_queue);
713         write_unlock_bh(&neigh->lock);
714         neigh->arp_queue_len_bytes = 0;
715
716         if (dev->netdev_ops->ndo_neigh_destroy)
717                 dev->netdev_ops->ndo_neigh_destroy(neigh);
718
719         dev_put(dev);
720         neigh_parms_put(neigh->parms);
721
722         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
723
724         atomic_dec(&neigh->tbl->entries);
725         kfree_rcu(neigh, rcu);
726 }
727 EXPORT_SYMBOL(neigh_destroy);
728
729 /* Neighbour state is suspicious;
730    disable fast path.
731
732    Called with write_locked neigh.
733  */
734 static void neigh_suspect(struct neighbour *neigh)
735 {
736         neigh_dbg(2, "neigh %p is suspected\n", neigh);
737
738         neigh->output = neigh->ops->output;
739 }
740
741 /* Neighbour state is OK;
742    enable fast path.
743
744    Called with write_locked neigh.
745  */
746 static void neigh_connect(struct neighbour *neigh)
747 {
748         neigh_dbg(2, "neigh %p is connected\n", neigh);
749
750         neigh->output = neigh->ops->connected_output;
751 }
752
753 static void neigh_periodic_work(struct work_struct *work)
754 {
755         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
756         struct neighbour *n;
757         struct neighbour __rcu **np;
758         unsigned int i;
759         struct neigh_hash_table *nht;
760
761         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
762
763         write_lock_bh(&tbl->lock);
764         nht = rcu_dereference_protected(tbl->nht,
765                                         lockdep_is_held(&tbl->lock));
766
767         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
768                 goto out;
769
770         /*
771          *      periodically recompute ReachableTime from random function
772          */
773
774         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
775                 struct neigh_parms *p;
776                 tbl->last_rand = jiffies;
777                 for (p = &tbl->parms; p; p = p->next)
778                         p->reachable_time =
779                                 neigh_rand_reach_time(p->base_reachable_time);
780         }
781
782         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
783                 np = &nht->hash_buckets[i];
784
785                 while ((n = rcu_dereference_protected(*np,
786                                 lockdep_is_held(&tbl->lock))) != NULL) {
787                         unsigned int state;
788
789                         write_lock(&n->lock);
790
791                         state = n->nud_state;
792                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
793                                 write_unlock(&n->lock);
794                                 goto next_elt;
795                         }
796
797                         if (time_before(n->used, n->confirmed))
798                                 n->used = n->confirmed;
799
800                         if (atomic_read(&n->refcnt) == 1 &&
801                             (state == NUD_FAILED ||
802                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
803                                 *np = n->next;
804                                 n->dead = 1;
805                                 write_unlock(&n->lock);
806                                 neigh_cleanup_and_release(n);
807                                 continue;
808                         }
809                         write_unlock(&n->lock);
810
811 next_elt:
812                         np = &n->next;
813                 }
814                 /*
815                  * It's fine to release lock here, even if hash table
816                  * grows while we are preempted.
817                  */
818                 write_unlock_bh(&tbl->lock);
819                 cond_resched();
820                 write_lock_bh(&tbl->lock);
821                 nht = rcu_dereference_protected(tbl->nht,
822                                                 lockdep_is_held(&tbl->lock));
823         }
824 out:
825         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
826          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
827          * base_reachable_time.
828          */
829         schedule_delayed_work(&tbl->gc_work,
830                               tbl->parms.base_reachable_time >> 1);
831         write_unlock_bh(&tbl->lock);
832 }
833
834 static __inline__ int neigh_max_probes(struct neighbour *n)
835 {
836         struct neigh_parms *p = n->parms;
837         return (n->nud_state & NUD_PROBE) ?
838                 p->ucast_probes :
839                 p->ucast_probes + p->app_probes + p->mcast_probes;
840 }
841
842 static void neigh_invalidate(struct neighbour *neigh)
843         __releases(neigh->lock)
844         __acquires(neigh->lock)
845 {
846         struct sk_buff *skb;
847
848         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
849         neigh_dbg(2, "neigh %p is failed\n", neigh);
850         neigh->updated = jiffies;
851
852         /* It is very thin place. report_unreachable is very complicated
853            routine. Particularly, it can hit the same neighbour entry!
854
855            So that, we try to be accurate and avoid dead loop. --ANK
856          */
857         while (neigh->nud_state == NUD_FAILED &&
858                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
859                 write_unlock(&neigh->lock);
860                 neigh->ops->error_report(neigh, skb);
861                 write_lock(&neigh->lock);
862         }
863         __skb_queue_purge(&neigh->arp_queue);
864         neigh->arp_queue_len_bytes = 0;
865 }
866
867 static void neigh_probe(struct neighbour *neigh)
868         __releases(neigh->lock)
869 {
870         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
871         /* keep skb alive even if arp_queue overflows */
872         if (skb)
873                 skb = skb_copy(skb, GFP_ATOMIC);
874         write_unlock(&neigh->lock);
875         neigh->ops->solicit(neigh, skb);
876         atomic_inc(&neigh->probes);
877         kfree_skb(skb);
878 }
879
880 /* Called when a timer expires for a neighbour entry. */
881
882 static void neigh_timer_handler(unsigned long arg)
883 {
884         unsigned long now, next;
885         struct neighbour *neigh = (struct neighbour *)arg;
886         unsigned int state;
887         int notify = 0;
888
889         write_lock(&neigh->lock);
890
891         state = neigh->nud_state;
892         now = jiffies;
893         next = now + HZ;
894
895         if (!(state & NUD_IN_TIMER))
896                 goto out;
897
898         if (state & NUD_REACHABLE) {
899                 if (time_before_eq(now,
900                                    neigh->confirmed + neigh->parms->reachable_time)) {
901                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
902                         next = neigh->confirmed + neigh->parms->reachable_time;
903                 } else if (time_before_eq(now,
904                                           neigh->used + neigh->parms->delay_probe_time)) {
905                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
906                         neigh->nud_state = NUD_DELAY;
907                         neigh->updated = jiffies;
908                         neigh_suspect(neigh);
909                         next = now + neigh->parms->delay_probe_time;
910                 } else {
911                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
912                         neigh->nud_state = NUD_STALE;
913                         neigh->updated = jiffies;
914                         neigh_suspect(neigh);
915                         notify = 1;
916                 }
917         } else if (state & NUD_DELAY) {
918                 if (time_before_eq(now,
919                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
920                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
921                         neigh->nud_state = NUD_REACHABLE;
922                         neigh->updated = jiffies;
923                         neigh_connect(neigh);
924                         notify = 1;
925                         next = neigh->confirmed + neigh->parms->reachable_time;
926                 } else {
927                         neigh_dbg(2, "neigh %p is probed\n", neigh);
928                         neigh->nud_state = NUD_PROBE;
929                         neigh->updated = jiffies;
930                         atomic_set(&neigh->probes, 0);
931                         next = now + neigh->parms->retrans_time;
932                 }
933         } else {
934                 /* NUD_PROBE|NUD_INCOMPLETE */
935                 next = now + neigh->parms->retrans_time;
936         }
937
938         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
939             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
940                 neigh->nud_state = NUD_FAILED;
941                 notify = 1;
942                 neigh_invalidate(neigh);
943         }
944
945         if (neigh->nud_state & NUD_IN_TIMER) {
946                 if (time_before(next, jiffies + HZ/2))
947                         next = jiffies + HZ/2;
948                 if (!mod_timer(&neigh->timer, next))
949                         neigh_hold(neigh);
950         }
951         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
952                 neigh_probe(neigh);
953         } else {
954 out:
955                 write_unlock(&neigh->lock);
956         }
957
958         if (notify)
959                 neigh_update_notify(neigh);
960
961         neigh_release(neigh);
962 }
963
964 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
965 {
966         int rc;
967         bool immediate_probe = false;
968
969         write_lock_bh(&neigh->lock);
970
971         rc = 0;
972         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
973                 goto out_unlock_bh;
974
975         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
976                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
977                         unsigned long next, now = jiffies;
978
979                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
980                         neigh->nud_state     = NUD_INCOMPLETE;
981                         neigh->updated = now;
982                         next = now + max(neigh->parms->retrans_time, HZ/2);
983                         neigh_add_timer(neigh, next);
984                         immediate_probe = true;
985                 } else {
986                         neigh->nud_state = NUD_FAILED;
987                         neigh->updated = jiffies;
988                         write_unlock_bh(&neigh->lock);
989
990                         kfree_skb(skb);
991                         return 1;
992                 }
993         } else if (neigh->nud_state & NUD_STALE) {
994                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
995                 neigh->nud_state = NUD_DELAY;
996                 neigh->updated = jiffies;
997                 neigh_add_timer(neigh,
998                                 jiffies + neigh->parms->delay_probe_time);
999         }
1000
1001         if (neigh->nud_state == NUD_INCOMPLETE) {
1002                 if (skb) {
1003                         while (neigh->arp_queue_len_bytes + skb->truesize >
1004                                neigh->parms->queue_len_bytes) {
1005                                 struct sk_buff *buff;
1006
1007                                 buff = __skb_dequeue(&neigh->arp_queue);
1008                                 if (!buff)
1009                                         break;
1010                                 neigh->arp_queue_len_bytes -= buff->truesize;
1011                                 kfree_skb(buff);
1012                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1013                         }
1014                         skb_dst_force(skb);
1015                         __skb_queue_tail(&neigh->arp_queue, skb);
1016                         neigh->arp_queue_len_bytes += skb->truesize;
1017                 }
1018                 rc = 1;
1019         }
1020 out_unlock_bh:
1021         if (immediate_probe)
1022                 neigh_probe(neigh);
1023         else
1024                 write_unlock(&neigh->lock);
1025         local_bh_enable();
1026         return rc;
1027 }
1028 EXPORT_SYMBOL(__neigh_event_send);
1029
1030 static void neigh_update_hhs(struct neighbour *neigh)
1031 {
1032         struct hh_cache *hh;
1033         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1034                 = NULL;
1035
1036         if (neigh->dev->header_ops)
1037                 update = neigh->dev->header_ops->cache_update;
1038
1039         if (update) {
1040                 hh = &neigh->hh;
1041                 if (hh->hh_len) {
1042                         write_seqlock_bh(&hh->hh_lock);
1043                         update(hh, neigh->dev, neigh->ha);
1044                         write_sequnlock_bh(&hh->hh_lock);
1045                 }
1046         }
1047 }
1048
1049
1050
1051 /* Generic update routine.
1052    -- lladdr is new lladdr or NULL, if it is not supplied.
1053    -- new    is new state.
1054    -- flags
1055         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1056                                 if it is different.
1057         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1058                                 lladdr instead of overriding it
1059                                 if it is different.
1060                                 It also allows to retain current state
1061                                 if lladdr is unchanged.
1062         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1063
1064         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1065                                 NTF_ROUTER flag.
1066         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1067                                 a router.
1068
1069    Caller MUST hold reference count on the entry.
1070  */
1071
1072 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1073                  u32 flags)
1074 {
1075         u8 old;
1076         int err;
1077         int notify = 0;
1078         struct net_device *dev;
1079         int update_isrouter = 0;
1080
1081         write_lock_bh(&neigh->lock);
1082
1083         dev    = neigh->dev;
1084         old    = neigh->nud_state;
1085         err    = -EPERM;
1086
1087         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1088             (old & (NUD_NOARP | NUD_PERMANENT)))
1089                 goto out;
1090
1091         if (!(new & NUD_VALID)) {
1092                 neigh_del_timer(neigh);
1093                 if (old & NUD_CONNECTED)
1094                         neigh_suspect(neigh);
1095                 neigh->nud_state = new;
1096                 err = 0;
1097                 notify = old & NUD_VALID;
1098                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1099                     (new & NUD_FAILED)) {
1100                         neigh_invalidate(neigh);
1101                         notify = 1;
1102                 }
1103                 goto out;
1104         }
1105
1106         /* Compare new lladdr with cached one */
1107         if (!dev->addr_len) {
1108                 /* First case: device needs no address. */
1109                 lladdr = neigh->ha;
1110         } else if (lladdr) {
1111                 /* The second case: if something is already cached
1112                    and a new address is proposed:
1113                    - compare new & old
1114                    - if they are different, check override flag
1115                  */
1116                 if ((old & NUD_VALID) &&
1117                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1118                         lladdr = neigh->ha;
1119         } else {
1120                 /* No address is supplied; if we know something,
1121                    use it, otherwise discard the request.
1122                  */
1123                 err = -EINVAL;
1124                 if (!(old & NUD_VALID))
1125                         goto out;
1126                 lladdr = neigh->ha;
1127         }
1128
1129         if (new & NUD_CONNECTED)
1130                 neigh->confirmed = jiffies;
1131         neigh->updated = jiffies;
1132
1133         /* If entry was valid and address is not changed,
1134            do not change entry state, if new one is STALE.
1135          */
1136         err = 0;
1137         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1138         if (old & NUD_VALID) {
1139                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1140                         update_isrouter = 0;
1141                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1142                             (old & NUD_CONNECTED)) {
1143                                 lladdr = neigh->ha;
1144                                 new = NUD_STALE;
1145                         } else
1146                                 goto out;
1147                 } else {
1148                         if (lladdr == neigh->ha && new == NUD_STALE &&
1149                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1150                              (old & NUD_CONNECTED))
1151                             )
1152                                 new = old;
1153                 }
1154         }
1155
1156         if (new != old) {
1157                 neigh_del_timer(neigh);
1158                 if (new & NUD_IN_TIMER)
1159                         neigh_add_timer(neigh, (jiffies +
1160                                                 ((new & NUD_REACHABLE) ?
1161                                                  neigh->parms->reachable_time :
1162                                                  0)));
1163                 neigh->nud_state = new;
1164                 notify = 1;
1165         }
1166
1167         if (lladdr != neigh->ha) {
1168                 write_seqlock(&neigh->ha_lock);
1169                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1170                 write_sequnlock(&neigh->ha_lock);
1171                 neigh_update_hhs(neigh);
1172                 if (!(new & NUD_CONNECTED))
1173                         neigh->confirmed = jiffies -
1174                                       (neigh->parms->base_reachable_time << 1);
1175                 notify = 1;
1176         }
1177         if (new == old)
1178                 goto out;
1179         if (new & NUD_CONNECTED)
1180                 neigh_connect(neigh);
1181         else
1182                 neigh_suspect(neigh);
1183         if (!(old & NUD_VALID)) {
1184                 struct sk_buff *skb;
1185
1186                 /* Again: avoid dead loop if something went wrong */
1187
1188                 while (neigh->nud_state & NUD_VALID &&
1189                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1190                         struct dst_entry *dst = skb_dst(skb);
1191                         struct neighbour *n2, *n1 = neigh;
1192                         write_unlock_bh(&neigh->lock);
1193
1194                         rcu_read_lock();
1195
1196                         /* Why not just use 'neigh' as-is?  The problem is that
1197                          * things such as shaper, eql, and sch_teql can end up
1198                          * using alternative, different, neigh objects to output
1199                          * the packet in the output path.  So what we need to do
1200                          * here is re-lookup the top-level neigh in the path so
1201                          * we can reinject the packet there.
1202                          */
1203                         n2 = NULL;
1204                         if (dst) {
1205                                 n2 = dst_neigh_lookup_skb(dst, skb);
1206                                 if (n2)
1207                                         n1 = n2;
1208                         }
1209                         n1->output(n1, skb);
1210                         if (n2)
1211                                 neigh_release(n2);
1212                         rcu_read_unlock();
1213
1214                         write_lock_bh(&neigh->lock);
1215                 }
1216                 __skb_queue_purge(&neigh->arp_queue);
1217                 neigh->arp_queue_len_bytes = 0;
1218         }
1219 out:
1220         if (update_isrouter) {
1221                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1222                         (neigh->flags | NTF_ROUTER) :
1223                         (neigh->flags & ~NTF_ROUTER);
1224         }
1225         write_unlock_bh(&neigh->lock);
1226
1227         if (notify)
1228                 neigh_update_notify(neigh);
1229
1230         return err;
1231 }
1232 EXPORT_SYMBOL(neigh_update);
1233
1234 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1235                                  u8 *lladdr, void *saddr,
1236                                  struct net_device *dev)
1237 {
1238         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1239                                                  lladdr || !dev->addr_len);
1240         if (neigh)
1241                 neigh_update(neigh, lladdr, NUD_STALE,
1242                              NEIGH_UPDATE_F_OVERRIDE);
1243         return neigh;
1244 }
1245 EXPORT_SYMBOL(neigh_event_ns);
1246
1247 /* called with read_lock_bh(&n->lock); */
1248 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1249 {
1250         struct net_device *dev = dst->dev;
1251         __be16 prot = dst->ops->protocol;
1252         struct hh_cache *hh = &n->hh;
1253
1254         write_lock_bh(&n->lock);
1255
1256         /* Only one thread can come in here and initialize the
1257          * hh_cache entry.
1258          */
1259         if (!hh->hh_len)
1260                 dev->header_ops->cache(n, hh, prot);
1261
1262         write_unlock_bh(&n->lock);
1263 }
1264
1265 /* This function can be used in contexts, where only old dev_queue_xmit
1266  * worked, f.e. if you want to override normal output path (eql, shaper),
1267  * but resolution is not made yet.
1268  */
1269
1270 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1271 {
1272         struct net_device *dev = skb->dev;
1273
1274         __skb_pull(skb, skb_network_offset(skb));
1275
1276         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1277                             skb->len) < 0 &&
1278             dev->header_ops->rebuild(skb))
1279                 return 0;
1280
1281         return dev_queue_xmit(skb);
1282 }
1283 EXPORT_SYMBOL(neigh_compat_output);
1284
1285 /* Slow and careful. */
1286
1287 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1288 {
1289         struct dst_entry *dst = skb_dst(skb);
1290         int rc = 0;
1291
1292         if (!dst)
1293                 goto discard;
1294
1295         if (!neigh_event_send(neigh, skb)) {
1296                 int err;
1297                 struct net_device *dev = neigh->dev;
1298                 unsigned int seq;
1299
1300                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1301                         neigh_hh_init(neigh, dst);
1302
1303                 do {
1304                         __skb_pull(skb, skb_network_offset(skb));
1305                         seq = read_seqbegin(&neigh->ha_lock);
1306                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1307                                               neigh->ha, NULL, skb->len);
1308                 } while (read_seqretry(&neigh->ha_lock, seq));
1309
1310                 if (err >= 0)
1311                         rc = dev_queue_xmit(skb);
1312                 else
1313                         goto out_kfree_skb;
1314         }
1315 out:
1316         return rc;
1317 discard:
1318         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1319 out_kfree_skb:
1320         rc = -EINVAL;
1321         kfree_skb(skb);
1322         goto out;
1323 }
1324 EXPORT_SYMBOL(neigh_resolve_output);
1325
1326 /* As fast as possible without hh cache */
1327
1328 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1329 {
1330         struct net_device *dev = neigh->dev;
1331         unsigned int seq;
1332         int err;
1333
1334         do {
1335                 __skb_pull(skb, skb_network_offset(skb));
1336                 seq = read_seqbegin(&neigh->ha_lock);
1337                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1338                                       neigh->ha, NULL, skb->len);
1339         } while (read_seqretry(&neigh->ha_lock, seq));
1340
1341         if (err >= 0)
1342                 err = dev_queue_xmit(skb);
1343         else {
1344                 err = -EINVAL;
1345                 kfree_skb(skb);
1346         }
1347         return err;
1348 }
1349 EXPORT_SYMBOL(neigh_connected_output);
1350
1351 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1352 {
1353         return dev_queue_xmit(skb);
1354 }
1355 EXPORT_SYMBOL(neigh_direct_output);
1356
1357 static void neigh_proxy_process(unsigned long arg)
1358 {
1359         struct neigh_table *tbl = (struct neigh_table *)arg;
1360         long sched_next = 0;
1361         unsigned long now = jiffies;
1362         struct sk_buff *skb, *n;
1363
1364         spin_lock(&tbl->proxy_queue.lock);
1365
1366         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1367                 long tdif = NEIGH_CB(skb)->sched_next - now;
1368
1369                 if (tdif <= 0) {
1370                         struct net_device *dev = skb->dev;
1371
1372                         __skb_unlink(skb, &tbl->proxy_queue);
1373                         if (tbl->proxy_redo && netif_running(dev)) {
1374                                 rcu_read_lock();
1375                                 tbl->proxy_redo(skb);
1376                                 rcu_read_unlock();
1377                         } else {
1378                                 kfree_skb(skb);
1379                         }
1380
1381                         dev_put(dev);
1382                 } else if (!sched_next || tdif < sched_next)
1383                         sched_next = tdif;
1384         }
1385         del_timer(&tbl->proxy_timer);
1386         if (sched_next)
1387                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1388         spin_unlock(&tbl->proxy_queue.lock);
1389 }
1390
1391 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1392                     struct sk_buff *skb)
1393 {
1394         unsigned long now = jiffies;
1395         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1396
1397         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1398                 kfree_skb(skb);
1399                 return;
1400         }
1401
1402         NEIGH_CB(skb)->sched_next = sched_next;
1403         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1404
1405         spin_lock(&tbl->proxy_queue.lock);
1406         if (del_timer(&tbl->proxy_timer)) {
1407                 if (time_before(tbl->proxy_timer.expires, sched_next))
1408                         sched_next = tbl->proxy_timer.expires;
1409         }
1410         skb_dst_drop(skb);
1411         dev_hold(skb->dev);
1412         __skb_queue_tail(&tbl->proxy_queue, skb);
1413         mod_timer(&tbl->proxy_timer, sched_next);
1414         spin_unlock(&tbl->proxy_queue.lock);
1415 }
1416 EXPORT_SYMBOL(pneigh_enqueue);
1417
1418 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1419                                                       struct net *net, int ifindex)
1420 {
1421         struct neigh_parms *p;
1422
1423         for (p = &tbl->parms; p; p = p->next) {
1424                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1425                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1426                         return p;
1427         }
1428
1429         return NULL;
1430 }
1431
1432 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1433                                       struct neigh_table *tbl)
1434 {
1435         struct neigh_parms *p;
1436         struct net *net = dev_net(dev);
1437         const struct net_device_ops *ops = dev->netdev_ops;
1438
1439         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1440         if (p) {
1441                 p->tbl            = tbl;
1442                 atomic_set(&p->refcnt, 1);
1443                 p->reachable_time =
1444                                 neigh_rand_reach_time(p->base_reachable_time);
1445                 dev_hold(dev);
1446                 p->dev = dev;
1447                 write_pnet(&p->net, hold_net(net));
1448                 p->sysctl_table = NULL;
1449
1450                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1451                         release_net(net);
1452                         dev_put(dev);
1453                         kfree(p);
1454                         return NULL;
1455                 }
1456
1457                 write_lock_bh(&tbl->lock);
1458                 p->next         = tbl->parms.next;
1459                 tbl->parms.next = p;
1460                 write_unlock_bh(&tbl->lock);
1461         }
1462         return p;
1463 }
1464 EXPORT_SYMBOL(neigh_parms_alloc);
1465
1466 static void neigh_rcu_free_parms(struct rcu_head *head)
1467 {
1468         struct neigh_parms *parms =
1469                 container_of(head, struct neigh_parms, rcu_head);
1470
1471         neigh_parms_put(parms);
1472 }
1473
1474 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1475 {
1476         struct neigh_parms **p;
1477
1478         if (!parms || parms == &tbl->parms)
1479                 return;
1480         write_lock_bh(&tbl->lock);
1481         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1482                 if (*p == parms) {
1483                         *p = parms->next;
1484                         parms->dead = 1;
1485                         write_unlock_bh(&tbl->lock);
1486                         if (parms->dev)
1487                                 dev_put(parms->dev);
1488                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1489                         return;
1490                 }
1491         }
1492         write_unlock_bh(&tbl->lock);
1493         neigh_dbg(1, "%s: not found\n", __func__);
1494 }
1495 EXPORT_SYMBOL(neigh_parms_release);
1496
1497 static void neigh_parms_destroy(struct neigh_parms *parms)
1498 {
1499         release_net(neigh_parms_net(parms));
1500         kfree(parms);
1501 }
1502
1503 static struct lock_class_key neigh_table_proxy_queue_class;
1504
1505 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1506 {
1507         unsigned long now = jiffies;
1508         unsigned long phsize;
1509
1510         write_pnet(&tbl->parms.net, &init_net);
1511         atomic_set(&tbl->parms.refcnt, 1);
1512         tbl->parms.reachable_time =
1513                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1514
1515         tbl->stats = alloc_percpu(struct neigh_statistics);
1516         if (!tbl->stats)
1517                 panic("cannot create neighbour cache statistics");
1518
1519 #ifdef CONFIG_PROC_FS
1520         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1521                               &neigh_stat_seq_fops, tbl))
1522                 panic("cannot create neighbour proc dir entry");
1523 #endif
1524
1525         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1526
1527         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1528         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1529
1530         if (!tbl->nht || !tbl->phash_buckets)
1531                 panic("cannot allocate neighbour cache hashes");
1532
1533         if (!tbl->entry_size)
1534                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1535                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1536         else
1537                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1538
1539         rwlock_init(&tbl->lock);
1540         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1541         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1542         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1543         skb_queue_head_init_class(&tbl->proxy_queue,
1544                         &neigh_table_proxy_queue_class);
1545
1546         tbl->last_flush = now;
1547         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1548 }
1549
1550 void neigh_table_init(struct neigh_table *tbl)
1551 {
1552         struct neigh_table *tmp;
1553
1554         neigh_table_init_no_netlink(tbl);
1555         write_lock(&neigh_tbl_lock);
1556         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1557                 if (tmp->family == tbl->family)
1558                         break;
1559         }
1560         tbl->next       = neigh_tables;
1561         neigh_tables    = tbl;
1562         write_unlock(&neigh_tbl_lock);
1563
1564         if (unlikely(tmp)) {
1565                 pr_err("Registering multiple tables for family %d\n",
1566                        tbl->family);
1567                 dump_stack();
1568         }
1569 }
1570 EXPORT_SYMBOL(neigh_table_init);
1571
1572 int neigh_table_clear(struct neigh_table *tbl)
1573 {
1574         struct neigh_table **tp;
1575
1576         /* It is not clean... Fix it to unload IPv6 module safely */
1577         cancel_delayed_work_sync(&tbl->gc_work);
1578         del_timer_sync(&tbl->proxy_timer);
1579         pneigh_queue_purge(&tbl->proxy_queue);
1580         neigh_ifdown(tbl, NULL);
1581         if (atomic_read(&tbl->entries))
1582                 pr_crit("neighbour leakage\n");
1583         write_lock(&neigh_tbl_lock);
1584         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1585                 if (*tp == tbl) {
1586                         *tp = tbl->next;
1587                         break;
1588                 }
1589         }
1590         write_unlock(&neigh_tbl_lock);
1591
1592         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1593                  neigh_hash_free_rcu);
1594         tbl->nht = NULL;
1595
1596         kfree(tbl->phash_buckets);
1597         tbl->phash_buckets = NULL;
1598
1599         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1600
1601         free_percpu(tbl->stats);
1602         tbl->stats = NULL;
1603
1604         return 0;
1605 }
1606 EXPORT_SYMBOL(neigh_table_clear);
1607
1608 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1609 {
1610         struct net *net = sock_net(skb->sk);
1611         struct ndmsg *ndm;
1612         struct nlattr *dst_attr;
1613         struct neigh_table *tbl;
1614         struct net_device *dev = NULL;
1615         int err = -EINVAL;
1616
1617         ASSERT_RTNL();
1618         if (nlmsg_len(nlh) < sizeof(*ndm))
1619                 goto out;
1620
1621         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1622         if (dst_attr == NULL)
1623                 goto out;
1624
1625         ndm = nlmsg_data(nlh);
1626         if (ndm->ndm_ifindex) {
1627                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1628                 if (dev == NULL) {
1629                         err = -ENODEV;
1630                         goto out;
1631                 }
1632         }
1633
1634         read_lock(&neigh_tbl_lock);
1635         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1636                 struct neighbour *neigh;
1637
1638                 if (tbl->family != ndm->ndm_family)
1639                         continue;
1640                 read_unlock(&neigh_tbl_lock);
1641
1642                 if (nla_len(dst_attr) < tbl->key_len)
1643                         goto out;
1644
1645                 if (ndm->ndm_flags & NTF_PROXY) {
1646                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1647                         goto out;
1648                 }
1649
1650                 if (dev == NULL)
1651                         goto out;
1652
1653                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1654                 if (neigh == NULL) {
1655                         err = -ENOENT;
1656                         goto out;
1657                 }
1658
1659                 err = neigh_update(neigh, NULL, NUD_FAILED,
1660                                    NEIGH_UPDATE_F_OVERRIDE |
1661                                    NEIGH_UPDATE_F_ADMIN);
1662                 neigh_release(neigh);
1663                 goto out;
1664         }
1665         read_unlock(&neigh_tbl_lock);
1666         err = -EAFNOSUPPORT;
1667
1668 out:
1669         return err;
1670 }
1671
1672 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1673 {
1674         struct net *net = sock_net(skb->sk);
1675         struct ndmsg *ndm;
1676         struct nlattr *tb[NDA_MAX+1];
1677         struct neigh_table *tbl;
1678         struct net_device *dev = NULL;
1679         int err;
1680
1681         ASSERT_RTNL();
1682         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1683         if (err < 0)
1684                 goto out;
1685
1686         err = -EINVAL;
1687         if (tb[NDA_DST] == NULL)
1688                 goto out;
1689
1690         ndm = nlmsg_data(nlh);
1691         if (ndm->ndm_ifindex) {
1692                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1693                 if (dev == NULL) {
1694                         err = -ENODEV;
1695                         goto out;
1696                 }
1697
1698                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1699                         goto out;
1700         }
1701
1702         read_lock(&neigh_tbl_lock);
1703         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1704                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1705                 struct neighbour *neigh;
1706                 void *dst, *lladdr;
1707
1708                 if (tbl->family != ndm->ndm_family)
1709                         continue;
1710                 read_unlock(&neigh_tbl_lock);
1711
1712                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1713                         goto out;
1714                 dst = nla_data(tb[NDA_DST]);
1715                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1716
1717                 if (ndm->ndm_flags & NTF_PROXY) {
1718                         struct pneigh_entry *pn;
1719
1720                         err = -ENOBUFS;
1721                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1722                         if (pn) {
1723                                 pn->flags = ndm->ndm_flags;
1724                                 err = 0;
1725                         }
1726                         goto out;
1727                 }
1728
1729                 if (dev == NULL)
1730                         goto out;
1731
1732                 neigh = neigh_lookup(tbl, dst, dev);
1733                 if (neigh == NULL) {
1734                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1735                                 err = -ENOENT;
1736                                 goto out;
1737                         }
1738
1739                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1740                         if (IS_ERR(neigh)) {
1741                                 err = PTR_ERR(neigh);
1742                                 goto out;
1743                         }
1744                 } else {
1745                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1746                                 err = -EEXIST;
1747                                 neigh_release(neigh);
1748                                 goto out;
1749                         }
1750
1751                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1752                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1753                 }
1754
1755                 if (ndm->ndm_flags & NTF_USE) {
1756                         neigh_event_send(neigh, NULL);
1757                         err = 0;
1758                 } else
1759                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1760                 neigh_release(neigh);
1761                 goto out;
1762         }
1763
1764         read_unlock(&neigh_tbl_lock);
1765         err = -EAFNOSUPPORT;
1766 out:
1767         return err;
1768 }
1769
1770 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1771 {
1772         struct nlattr *nest;
1773
1774         nest = nla_nest_start(skb, NDTA_PARMS);
1775         if (nest == NULL)
1776                 return -ENOBUFS;
1777
1778         if ((parms->dev &&
1779              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1780             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1781             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1782             /* approximative value for deprecated QUEUE_LEN (in packets) */
1783             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1784                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1785             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1786             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1787             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1788             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1789             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1790             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1791                           parms->base_reachable_time) ||
1792             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1793             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1794                           parms->delay_probe_time) ||
1795             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1796             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1797             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1798             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1799                 goto nla_put_failure;
1800         return nla_nest_end(skb, nest);
1801
1802 nla_put_failure:
1803         nla_nest_cancel(skb, nest);
1804         return -EMSGSIZE;
1805 }
1806
1807 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1808                               u32 pid, u32 seq, int type, int flags)
1809 {
1810         struct nlmsghdr *nlh;
1811         struct ndtmsg *ndtmsg;
1812
1813         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1814         if (nlh == NULL)
1815                 return -EMSGSIZE;
1816
1817         ndtmsg = nlmsg_data(nlh);
1818
1819         read_lock_bh(&tbl->lock);
1820         ndtmsg->ndtm_family = tbl->family;
1821         ndtmsg->ndtm_pad1   = 0;
1822         ndtmsg->ndtm_pad2   = 0;
1823
1824         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1825             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1826             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1827             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1828             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1829                 goto nla_put_failure;
1830         {
1831                 unsigned long now = jiffies;
1832                 unsigned int flush_delta = now - tbl->last_flush;
1833                 unsigned int rand_delta = now - tbl->last_rand;
1834                 struct neigh_hash_table *nht;
1835                 struct ndt_config ndc = {
1836                         .ndtc_key_len           = tbl->key_len,
1837                         .ndtc_entry_size        = tbl->entry_size,
1838                         .ndtc_entries           = atomic_read(&tbl->entries),
1839                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1840                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1841                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1842                 };
1843
1844                 rcu_read_lock_bh();
1845                 nht = rcu_dereference_bh(tbl->nht);
1846                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1847                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1848                 rcu_read_unlock_bh();
1849
1850                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1851                         goto nla_put_failure;
1852         }
1853
1854         {
1855                 int cpu;
1856                 struct ndt_stats ndst;
1857
1858                 memset(&ndst, 0, sizeof(ndst));
1859
1860                 for_each_possible_cpu(cpu) {
1861                         struct neigh_statistics *st;
1862
1863                         st = per_cpu_ptr(tbl->stats, cpu);
1864                         ndst.ndts_allocs                += st->allocs;
1865                         ndst.ndts_destroys              += st->destroys;
1866                         ndst.ndts_hash_grows            += st->hash_grows;
1867                         ndst.ndts_res_failed            += st->res_failed;
1868                         ndst.ndts_lookups               += st->lookups;
1869                         ndst.ndts_hits                  += st->hits;
1870                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1871                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1872                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1873                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1874                 }
1875
1876                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1877                         goto nla_put_failure;
1878         }
1879
1880         BUG_ON(tbl->parms.dev);
1881         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1882                 goto nla_put_failure;
1883
1884         read_unlock_bh(&tbl->lock);
1885         return nlmsg_end(skb, nlh);
1886
1887 nla_put_failure:
1888         read_unlock_bh(&tbl->lock);
1889         nlmsg_cancel(skb, nlh);
1890         return -EMSGSIZE;
1891 }
1892
1893 static int neightbl_fill_param_info(struct sk_buff *skb,
1894                                     struct neigh_table *tbl,
1895                                     struct neigh_parms *parms,
1896                                     u32 pid, u32 seq, int type,
1897                                     unsigned int flags)
1898 {
1899         struct ndtmsg *ndtmsg;
1900         struct nlmsghdr *nlh;
1901
1902         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1903         if (nlh == NULL)
1904                 return -EMSGSIZE;
1905
1906         ndtmsg = nlmsg_data(nlh);
1907
1908         read_lock_bh(&tbl->lock);
1909         ndtmsg->ndtm_family = tbl->family;
1910         ndtmsg->ndtm_pad1   = 0;
1911         ndtmsg->ndtm_pad2   = 0;
1912
1913         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1914             neightbl_fill_parms(skb, parms) < 0)
1915                 goto errout;
1916
1917         read_unlock_bh(&tbl->lock);
1918         return nlmsg_end(skb, nlh);
1919 errout:
1920         read_unlock_bh(&tbl->lock);
1921         nlmsg_cancel(skb, nlh);
1922         return -EMSGSIZE;
1923 }
1924
1925 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1926         [NDTA_NAME]             = { .type = NLA_STRING },
1927         [NDTA_THRESH1]          = { .type = NLA_U32 },
1928         [NDTA_THRESH2]          = { .type = NLA_U32 },
1929         [NDTA_THRESH3]          = { .type = NLA_U32 },
1930         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1931         [NDTA_PARMS]            = { .type = NLA_NESTED },
1932 };
1933
1934 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1935         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1936         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1937         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1938         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1939         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1940         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1941         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1942         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1943         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1944         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1945         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1946         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1947         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1948 };
1949
1950 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1951 {
1952         struct net *net = sock_net(skb->sk);
1953         struct neigh_table *tbl;
1954         struct ndtmsg *ndtmsg;
1955         struct nlattr *tb[NDTA_MAX+1];
1956         int err;
1957
1958         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1959                           nl_neightbl_policy);
1960         if (err < 0)
1961                 goto errout;
1962
1963         if (tb[NDTA_NAME] == NULL) {
1964                 err = -EINVAL;
1965                 goto errout;
1966         }
1967
1968         ndtmsg = nlmsg_data(nlh);
1969         read_lock(&neigh_tbl_lock);
1970         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1971                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1972                         continue;
1973
1974                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1975                         break;
1976         }
1977
1978         if (tbl == NULL) {
1979                 err = -ENOENT;
1980                 goto errout_locked;
1981         }
1982
1983         /*
1984          * We acquire tbl->lock to be nice to the periodic timers and
1985          * make sure they always see a consistent set of values.
1986          */
1987         write_lock_bh(&tbl->lock);
1988
1989         if (tb[NDTA_PARMS]) {
1990                 struct nlattr *tbp[NDTPA_MAX+1];
1991                 struct neigh_parms *p;
1992                 int i, ifindex = 0;
1993
1994                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1995                                        nl_ntbl_parm_policy);
1996                 if (err < 0)
1997                         goto errout_tbl_lock;
1998
1999                 if (tbp[NDTPA_IFINDEX])
2000                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2001
2002                 p = lookup_neigh_parms(tbl, net, ifindex);
2003                 if (p == NULL) {
2004                         err = -ENOENT;
2005                         goto errout_tbl_lock;
2006                 }
2007
2008                 for (i = 1; i <= NDTPA_MAX; i++) {
2009                         if (tbp[i] == NULL)
2010                                 continue;
2011
2012                         switch (i) {
2013                         case NDTPA_QUEUE_LEN:
2014                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2015                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2016                                 break;
2017                         case NDTPA_QUEUE_LENBYTES:
2018                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2019                                 break;
2020                         case NDTPA_PROXY_QLEN:
2021                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2022                                 break;
2023                         case NDTPA_APP_PROBES:
2024                                 p->app_probes = nla_get_u32(tbp[i]);
2025                                 break;
2026                         case NDTPA_UCAST_PROBES:
2027                                 p->ucast_probes = nla_get_u32(tbp[i]);
2028                                 break;
2029                         case NDTPA_MCAST_PROBES:
2030                                 p->mcast_probes = nla_get_u32(tbp[i]);
2031                                 break;
2032                         case NDTPA_BASE_REACHABLE_TIME:
2033                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2034                                 break;
2035                         case NDTPA_GC_STALETIME:
2036                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2037                                 break;
2038                         case NDTPA_DELAY_PROBE_TIME:
2039                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2040                                 break;
2041                         case NDTPA_RETRANS_TIME:
2042                                 p->retrans_time = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_ANYCAST_DELAY:
2045                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         case NDTPA_PROXY_DELAY:
2048                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2049                                 break;
2050                         case NDTPA_LOCKTIME:
2051                                 p->locktime = nla_get_msecs(tbp[i]);
2052                                 break;
2053                         }
2054                 }
2055         }
2056
2057         err = -ENOENT;
2058         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2059              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2060             !net_eq(net, &init_net))
2061                 goto errout_tbl_lock;
2062
2063         if (tb[NDTA_THRESH1])
2064                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2065
2066         if (tb[NDTA_THRESH2])
2067                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2068
2069         if (tb[NDTA_THRESH3])
2070                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2071
2072         if (tb[NDTA_GC_INTERVAL])
2073                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2074
2075         err = 0;
2076
2077 errout_tbl_lock:
2078         write_unlock_bh(&tbl->lock);
2079 errout_locked:
2080         read_unlock(&neigh_tbl_lock);
2081 errout:
2082         return err;
2083 }
2084
2085 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2086 {
2087         struct net *net = sock_net(skb->sk);
2088         int family, tidx, nidx = 0;
2089         int tbl_skip = cb->args[0];
2090         int neigh_skip = cb->args[1];
2091         struct neigh_table *tbl;
2092
2093         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2094
2095         read_lock(&neigh_tbl_lock);
2096         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2097                 struct neigh_parms *p;
2098
2099                 if (tidx < tbl_skip || (family && tbl->family != family))
2100                         continue;
2101
2102                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2103                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2104                                        NLM_F_MULTI) <= 0)
2105                         break;
2106
2107                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2108                         if (!net_eq(neigh_parms_net(p), net))
2109                                 continue;
2110
2111                         if (nidx < neigh_skip)
2112                                 goto next;
2113
2114                         if (neightbl_fill_param_info(skb, tbl, p,
2115                                                      NETLINK_CB(cb->skb).portid,
2116                                                      cb->nlh->nlmsg_seq,
2117                                                      RTM_NEWNEIGHTBL,
2118                                                      NLM_F_MULTI) <= 0)
2119                                 goto out;
2120                 next:
2121                         nidx++;
2122                 }
2123
2124                 neigh_skip = 0;
2125         }
2126 out:
2127         read_unlock(&neigh_tbl_lock);
2128         cb->args[0] = tidx;
2129         cb->args[1] = nidx;
2130
2131         return skb->len;
2132 }
2133
2134 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2135                            u32 pid, u32 seq, int type, unsigned int flags)
2136 {
2137         unsigned long now = jiffies;
2138         struct nda_cacheinfo ci;
2139         struct nlmsghdr *nlh;
2140         struct ndmsg *ndm;
2141
2142         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2143         if (nlh == NULL)
2144                 return -EMSGSIZE;
2145
2146         ndm = nlmsg_data(nlh);
2147         ndm->ndm_family  = neigh->ops->family;
2148         ndm->ndm_pad1    = 0;
2149         ndm->ndm_pad2    = 0;
2150         ndm->ndm_flags   = neigh->flags;
2151         ndm->ndm_type    = neigh->type;
2152         ndm->ndm_ifindex = neigh->dev->ifindex;
2153
2154         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2155                 goto nla_put_failure;
2156
2157         read_lock_bh(&neigh->lock);
2158         ndm->ndm_state   = neigh->nud_state;
2159         if (neigh->nud_state & NUD_VALID) {
2160                 char haddr[MAX_ADDR_LEN];
2161
2162                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164                         read_unlock_bh(&neigh->lock);
2165                         goto nla_put_failure;
2166                 }
2167         }
2168
2169         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2170         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2171         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2172         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2173         read_unlock_bh(&neigh->lock);
2174
2175         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2176             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2177                 goto nla_put_failure;
2178
2179         return nlmsg_end(skb, nlh);
2180
2181 nla_put_failure:
2182         nlmsg_cancel(skb, nlh);
2183         return -EMSGSIZE;
2184 }
2185
2186 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2187                             u32 pid, u32 seq, int type, unsigned int flags,
2188                             struct neigh_table *tbl)
2189 {
2190         struct nlmsghdr *nlh;
2191         struct ndmsg *ndm;
2192
2193         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2194         if (nlh == NULL)
2195                 return -EMSGSIZE;
2196
2197         ndm = nlmsg_data(nlh);
2198         ndm->ndm_family  = tbl->family;
2199         ndm->ndm_pad1    = 0;
2200         ndm->ndm_pad2    = 0;
2201         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2202         ndm->ndm_type    = NDA_DST;
2203         ndm->ndm_ifindex = pn->dev->ifindex;
2204         ndm->ndm_state   = NUD_NONE;
2205
2206         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2207                 goto nla_put_failure;
2208
2209         return nlmsg_end(skb, nlh);
2210
2211 nla_put_failure:
2212         nlmsg_cancel(skb, nlh);
2213         return -EMSGSIZE;
2214 }
2215
2216 static void neigh_update_notify(struct neighbour *neigh)
2217 {
2218         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2219         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2220 }
2221
2222 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2223                             struct netlink_callback *cb)
2224 {
2225         struct net *net = sock_net(skb->sk);
2226         struct neighbour *n;
2227         int rc, h, s_h = cb->args[1];
2228         int idx, s_idx = idx = cb->args[2];
2229         struct neigh_hash_table *nht;
2230
2231         rcu_read_lock_bh();
2232         nht = rcu_dereference_bh(tbl->nht);
2233
2234         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2235                 if (h > s_h)
2236                         s_idx = 0;
2237                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2238                      n != NULL;
2239                      n = rcu_dereference_bh(n->next)) {
2240                         if (!net_eq(dev_net(n->dev), net))
2241                                 continue;
2242                         if (idx < s_idx)
2243                                 goto next;
2244                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2245                                             cb->nlh->nlmsg_seq,
2246                                             RTM_NEWNEIGH,
2247                                             NLM_F_MULTI) <= 0) {
2248                                 rc = -1;
2249                                 goto out;
2250                         }
2251 next:
2252                         idx++;
2253                 }
2254         }
2255         rc = skb->len;
2256 out:
2257         rcu_read_unlock_bh();
2258         cb->args[1] = h;
2259         cb->args[2] = idx;
2260         return rc;
2261 }
2262
2263 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2264                              struct netlink_callback *cb)
2265 {
2266         struct pneigh_entry *n;
2267         struct net *net = sock_net(skb->sk);
2268         int rc, h, s_h = cb->args[3];
2269         int idx, s_idx = idx = cb->args[4];
2270
2271         read_lock_bh(&tbl->lock);
2272
2273         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2274                 if (h > s_h)
2275                         s_idx = 0;
2276                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2277                         if (dev_net(n->dev) != net)
2278                                 continue;
2279                         if (idx < s_idx)
2280                                 goto next;
2281                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2282                                             cb->nlh->nlmsg_seq,
2283                                             RTM_NEWNEIGH,
2284                                             NLM_F_MULTI, tbl) <= 0) {
2285                                 read_unlock_bh(&tbl->lock);
2286                                 rc = -1;
2287                                 goto out;
2288                         }
2289                 next:
2290                         idx++;
2291                 }
2292         }
2293
2294         read_unlock_bh(&tbl->lock);
2295         rc = skb->len;
2296 out:
2297         cb->args[3] = h;
2298         cb->args[4] = idx;
2299         return rc;
2300
2301 }
2302
2303 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2304 {
2305         struct neigh_table *tbl;
2306         int t, family, s_t;
2307         int proxy = 0;
2308         int err;
2309
2310         read_lock(&neigh_tbl_lock);
2311         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2312
2313         /* check for full ndmsg structure presence, family member is
2314          * the same for both structures
2315          */
2316         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2317             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2318                 proxy = 1;
2319
2320         s_t = cb->args[0];
2321
2322         for (tbl = neigh_tables, t = 0; tbl;
2323              tbl = tbl->next, t++) {
2324                 if (t < s_t || (family && tbl->family != family))
2325                         continue;
2326                 if (t > s_t)
2327                         memset(&cb->args[1], 0, sizeof(cb->args) -
2328                                                 sizeof(cb->args[0]));
2329                 if (proxy)
2330                         err = pneigh_dump_table(tbl, skb, cb);
2331                 else
2332                         err = neigh_dump_table(tbl, skb, cb);
2333                 if (err < 0)
2334                         break;
2335         }
2336         read_unlock(&neigh_tbl_lock);
2337
2338         cb->args[0] = t;
2339         return skb->len;
2340 }
2341
2342 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2343 {
2344         int chain;
2345         struct neigh_hash_table *nht;
2346
2347         rcu_read_lock_bh();
2348         nht = rcu_dereference_bh(tbl->nht);
2349
2350         read_lock(&tbl->lock); /* avoid resizes */
2351         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2352                 struct neighbour *n;
2353
2354                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2355                      n != NULL;
2356                      n = rcu_dereference_bh(n->next))
2357                         cb(n, cookie);
2358         }
2359         read_unlock(&tbl->lock);
2360         rcu_read_unlock_bh();
2361 }
2362 EXPORT_SYMBOL(neigh_for_each);
2363
2364 /* The tbl->lock must be held as a writer and BH disabled. */
2365 void __neigh_for_each_release(struct neigh_table *tbl,
2366                               int (*cb)(struct neighbour *))
2367 {
2368         int chain;
2369         struct neigh_hash_table *nht;
2370
2371         nht = rcu_dereference_protected(tbl->nht,
2372                                         lockdep_is_held(&tbl->lock));
2373         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2374                 struct neighbour *n;
2375                 struct neighbour __rcu **np;
2376
2377                 np = &nht->hash_buckets[chain];
2378                 while ((n = rcu_dereference_protected(*np,
2379                                         lockdep_is_held(&tbl->lock))) != NULL) {
2380                         int release;
2381
2382                         write_lock(&n->lock);
2383                         release = cb(n);
2384                         if (release) {
2385                                 rcu_assign_pointer(*np,
2386                                         rcu_dereference_protected(n->next,
2387                                                 lockdep_is_held(&tbl->lock)));
2388                                 n->dead = 1;
2389                         } else
2390                                 np = &n->next;
2391                         write_unlock(&n->lock);
2392                         if (release)
2393                                 neigh_cleanup_and_release(n);
2394                 }
2395         }
2396 }
2397 EXPORT_SYMBOL(__neigh_for_each_release);
2398
2399 #ifdef CONFIG_PROC_FS
2400
2401 static struct neighbour *neigh_get_first(struct seq_file *seq)
2402 {
2403         struct neigh_seq_state *state = seq->private;
2404         struct net *net = seq_file_net(seq);
2405         struct neigh_hash_table *nht = state->nht;
2406         struct neighbour *n = NULL;
2407         int bucket = state->bucket;
2408
2409         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2410         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2411                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2412
2413                 while (n) {
2414                         if (!net_eq(dev_net(n->dev), net))
2415                                 goto next;
2416                         if (state->neigh_sub_iter) {
2417                                 loff_t fakep = 0;
2418                                 void *v;
2419
2420                                 v = state->neigh_sub_iter(state, n, &fakep);
2421                                 if (!v)
2422                                         goto next;
2423                         }
2424                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2425                                 break;
2426                         if (n->nud_state & ~NUD_NOARP)
2427                                 break;
2428 next:
2429                         n = rcu_dereference_bh(n->next);
2430                 }
2431
2432                 if (n)
2433                         break;
2434         }
2435         state->bucket = bucket;
2436
2437         return n;
2438 }
2439
2440 static struct neighbour *neigh_get_next(struct seq_file *seq,
2441                                         struct neighbour *n,
2442                                         loff_t *pos)
2443 {
2444         struct neigh_seq_state *state = seq->private;
2445         struct net *net = seq_file_net(seq);
2446         struct neigh_hash_table *nht = state->nht;
2447
2448         if (state->neigh_sub_iter) {
2449                 void *v = state->neigh_sub_iter(state, n, pos);
2450                 if (v)
2451                         return n;
2452         }
2453         n = rcu_dereference_bh(n->next);
2454
2455         while (1) {
2456                 while (n) {
2457                         if (!net_eq(dev_net(n->dev), net))
2458                                 goto next;
2459                         if (state->neigh_sub_iter) {
2460                                 void *v = state->neigh_sub_iter(state, n, pos);
2461                                 if (v)
2462                                         return n;
2463                                 goto next;
2464                         }
2465                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2466                                 break;
2467
2468                         if (n->nud_state & ~NUD_NOARP)
2469                                 break;
2470 next:
2471                         n = rcu_dereference_bh(n->next);
2472                 }
2473
2474                 if (n)
2475                         break;
2476
2477                 if (++state->bucket >= (1 << nht->hash_shift))
2478                         break;
2479
2480                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2481         }
2482
2483         if (n && pos)
2484                 --(*pos);
2485         return n;
2486 }
2487
2488 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2489 {
2490         struct neighbour *n = neigh_get_first(seq);
2491
2492         if (n) {
2493                 --(*pos);
2494                 while (*pos) {
2495                         n = neigh_get_next(seq, n, pos);
2496                         if (!n)
2497                                 break;
2498                 }
2499         }
2500         return *pos ? NULL : n;
2501 }
2502
2503 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2504 {
2505         struct neigh_seq_state *state = seq->private;
2506         struct net *net = seq_file_net(seq);
2507         struct neigh_table *tbl = state->tbl;
2508         struct pneigh_entry *pn = NULL;
2509         int bucket = state->bucket;
2510
2511         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2512         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2513                 pn = tbl->phash_buckets[bucket];
2514                 while (pn && !net_eq(pneigh_net(pn), net))
2515                         pn = pn->next;
2516                 if (pn)
2517                         break;
2518         }
2519         state->bucket = bucket;
2520
2521         return pn;
2522 }
2523
2524 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2525                                             struct pneigh_entry *pn,
2526                                             loff_t *pos)
2527 {
2528         struct neigh_seq_state *state = seq->private;
2529         struct net *net = seq_file_net(seq);
2530         struct neigh_table *tbl = state->tbl;
2531
2532         do {
2533                 pn = pn->next;
2534         } while (pn && !net_eq(pneigh_net(pn), net));
2535
2536         while (!pn) {
2537                 if (++state->bucket > PNEIGH_HASHMASK)
2538                         break;
2539                 pn = tbl->phash_buckets[state->bucket];
2540                 while (pn && !net_eq(pneigh_net(pn), net))
2541                         pn = pn->next;
2542                 if (pn)
2543                         break;
2544         }
2545
2546         if (pn && pos)
2547                 --(*pos);
2548
2549         return pn;
2550 }
2551
2552 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2553 {
2554         struct pneigh_entry *pn = pneigh_get_first(seq);
2555
2556         if (pn) {
2557                 --(*pos);
2558                 while (*pos) {
2559                         pn = pneigh_get_next(seq, pn, pos);
2560                         if (!pn)
2561                                 break;
2562                 }
2563         }
2564         return *pos ? NULL : pn;
2565 }
2566
2567 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2568 {
2569         struct neigh_seq_state *state = seq->private;
2570         void *rc;
2571         loff_t idxpos = *pos;
2572
2573         rc = neigh_get_idx(seq, &idxpos);
2574         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2575                 rc = pneigh_get_idx(seq, &idxpos);
2576
2577         return rc;
2578 }
2579
2580 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2581         __acquires(rcu_bh)
2582 {
2583         struct neigh_seq_state *state = seq->private;
2584
2585         state->tbl = tbl;
2586         state->bucket = 0;
2587         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2588
2589         rcu_read_lock_bh();
2590         state->nht = rcu_dereference_bh(tbl->nht);
2591
2592         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2593 }
2594 EXPORT_SYMBOL(neigh_seq_start);
2595
2596 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2597 {
2598         struct neigh_seq_state *state;
2599         void *rc;
2600
2601         if (v == SEQ_START_TOKEN) {
2602                 rc = neigh_get_first(seq);
2603                 goto out;
2604         }
2605
2606         state = seq->private;
2607         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2608                 rc = neigh_get_next(seq, v, NULL);
2609                 if (rc)
2610                         goto out;
2611                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2612                         rc = pneigh_get_first(seq);
2613         } else {
2614                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2615                 rc = pneigh_get_next(seq, v, NULL);
2616         }
2617 out:
2618         ++(*pos);
2619         return rc;
2620 }
2621 EXPORT_SYMBOL(neigh_seq_next);
2622
2623 void neigh_seq_stop(struct seq_file *seq, void *v)
2624         __releases(rcu_bh)
2625 {
2626         rcu_read_unlock_bh();
2627 }
2628 EXPORT_SYMBOL(neigh_seq_stop);
2629
2630 /* statistics via seq_file */
2631
2632 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2633 {
2634         struct neigh_table *tbl = seq->private;
2635         int cpu;
2636
2637         if (*pos == 0)
2638                 return SEQ_START_TOKEN;
2639
2640         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2641                 if (!cpu_possible(cpu))
2642                         continue;
2643                 *pos = cpu+1;
2644                 return per_cpu_ptr(tbl->stats, cpu);
2645         }
2646         return NULL;
2647 }
2648
2649 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2650 {
2651         struct neigh_table *tbl = seq->private;
2652         int cpu;
2653
2654         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2655                 if (!cpu_possible(cpu))
2656                         continue;
2657                 *pos = cpu+1;
2658                 return per_cpu_ptr(tbl->stats, cpu);
2659         }
2660         return NULL;
2661 }
2662
2663 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2664 {
2665
2666 }
2667
2668 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2669 {
2670         struct neigh_table *tbl = seq->private;
2671         struct neigh_statistics *st = v;
2672
2673         if (v == SEQ_START_TOKEN) {
2674                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2675                 return 0;
2676         }
2677
2678         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2679                         "%08lx %08lx  %08lx %08lx %08lx\n",
2680                    atomic_read(&tbl->entries),
2681
2682                    st->allocs,
2683                    st->destroys,
2684                    st->hash_grows,
2685
2686                    st->lookups,
2687                    st->hits,
2688
2689                    st->res_failed,
2690
2691                    st->rcv_probes_mcast,
2692                    st->rcv_probes_ucast,
2693
2694                    st->periodic_gc_runs,
2695                    st->forced_gc_runs,
2696                    st->unres_discards
2697                    );
2698
2699         return 0;
2700 }
2701
2702 static const struct seq_operations neigh_stat_seq_ops = {
2703         .start  = neigh_stat_seq_start,
2704         .next   = neigh_stat_seq_next,
2705         .stop   = neigh_stat_seq_stop,
2706         .show   = neigh_stat_seq_show,
2707 };
2708
2709 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2710 {
2711         int ret = seq_open(file, &neigh_stat_seq_ops);
2712
2713         if (!ret) {
2714                 struct seq_file *sf = file->private_data;
2715                 sf->private = PDE_DATA(inode);
2716         }
2717         return ret;
2718 };
2719
2720 static const struct file_operations neigh_stat_seq_fops = {
2721         .owner   = THIS_MODULE,
2722         .open    = neigh_stat_seq_open,
2723         .read    = seq_read,
2724         .llseek  = seq_lseek,
2725         .release = seq_release,
2726 };
2727
2728 #endif /* CONFIG_PROC_FS */
2729
2730 static inline size_t neigh_nlmsg_size(void)
2731 {
2732         return NLMSG_ALIGN(sizeof(struct ndmsg))
2733                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2734                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2735                + nla_total_size(sizeof(struct nda_cacheinfo))
2736                + nla_total_size(4); /* NDA_PROBES */
2737 }
2738
2739 static void __neigh_notify(struct neighbour *n, int type, int flags)
2740 {
2741         struct net *net = dev_net(n->dev);
2742         struct sk_buff *skb;
2743         int err = -ENOBUFS;
2744
2745         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2746         if (skb == NULL)
2747                 goto errout;
2748
2749         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2750         if (err < 0) {
2751                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2752                 WARN_ON(err == -EMSGSIZE);
2753                 kfree_skb(skb);
2754                 goto errout;
2755         }
2756         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2757         return;
2758 errout:
2759         if (err < 0)
2760                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2761 }
2762
2763 void neigh_app_ns(struct neighbour *n)
2764 {
2765         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2766 }
2767 EXPORT_SYMBOL(neigh_app_ns);
2768
2769 #ifdef CONFIG_SYSCTL
2770 static int zero;
2771 static int int_max = INT_MAX;
2772 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2773
2774 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2775                            void __user *buffer, size_t *lenp, loff_t *ppos)
2776 {
2777         int size, ret;
2778         struct ctl_table tmp = *ctl;
2779
2780         tmp.extra1 = &zero;
2781         tmp.extra2 = &unres_qlen_max;
2782         tmp.data = &size;
2783
2784         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2785         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2786
2787         if (write && !ret)
2788                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2789         return ret;
2790 }
2791
2792 enum {
2793         NEIGH_VAR_MCAST_PROBE,
2794         NEIGH_VAR_UCAST_PROBE,
2795         NEIGH_VAR_APP_PROBE,
2796         NEIGH_VAR_RETRANS_TIME,
2797         NEIGH_VAR_BASE_REACHABLE_TIME,
2798         NEIGH_VAR_DELAY_PROBE_TIME,
2799         NEIGH_VAR_GC_STALETIME,
2800         NEIGH_VAR_QUEUE_LEN,
2801         NEIGH_VAR_QUEUE_LEN_BYTES,
2802         NEIGH_VAR_PROXY_QLEN,
2803         NEIGH_VAR_ANYCAST_DELAY,
2804         NEIGH_VAR_PROXY_DELAY,
2805         NEIGH_VAR_LOCKTIME,
2806         NEIGH_VAR_RETRANS_TIME_MS,
2807         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2808         NEIGH_VAR_GC_INTERVAL,
2809         NEIGH_VAR_GC_THRESH1,
2810         NEIGH_VAR_GC_THRESH2,
2811         NEIGH_VAR_GC_THRESH3,
2812         NEIGH_VAR_MAX
2813 };
2814
2815 static struct neigh_sysctl_table {
2816         struct ctl_table_header *sysctl_header;
2817         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2818 } neigh_sysctl_template __read_mostly = {
2819         .neigh_vars = {
2820                 [NEIGH_VAR_MCAST_PROBE] = {
2821                         .procname       = "mcast_solicit",
2822                         .maxlen         = sizeof(int),
2823                         .mode           = 0644,
2824                         .extra1         = &zero,
2825                         .extra2         = &int_max,
2826                         .proc_handler   = proc_dointvec_minmax,
2827                 },
2828                 [NEIGH_VAR_UCAST_PROBE] = {
2829                         .procname       = "ucast_solicit",
2830                         .maxlen         = sizeof(int),
2831                         .mode           = 0644,
2832                         .extra1         = &zero,
2833                         .extra2         = &int_max,
2834                         .proc_handler   = proc_dointvec_minmax,
2835                 },
2836                 [NEIGH_VAR_APP_PROBE] = {
2837                         .procname       = "app_solicit",
2838                         .maxlen         = sizeof(int),
2839                         .mode           = 0644,
2840                         .extra1         = &zero,
2841                         .extra2         = &int_max,
2842                         .proc_handler   = proc_dointvec_minmax,
2843                 },
2844                 [NEIGH_VAR_RETRANS_TIME] = {
2845                         .procname       = "retrans_time",
2846                         .maxlen         = sizeof(int),
2847                         .mode           = 0644,
2848                         .proc_handler   = proc_dointvec_userhz_jiffies,
2849                 },
2850                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2851                         .procname       = "base_reachable_time",
2852                         .maxlen         = sizeof(int),
2853                         .mode           = 0644,
2854                         .proc_handler   = proc_dointvec_jiffies,
2855                 },
2856                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2857                         .procname       = "delay_first_probe_time",
2858                         .maxlen         = sizeof(int),
2859                         .mode           = 0644,
2860                         .proc_handler   = proc_dointvec_jiffies,
2861                 },
2862                 [NEIGH_VAR_GC_STALETIME] = {
2863                         .procname       = "gc_stale_time",
2864                         .maxlen         = sizeof(int),
2865                         .mode           = 0644,
2866                         .proc_handler   = proc_dointvec_jiffies,
2867                 },
2868                 [NEIGH_VAR_QUEUE_LEN] = {
2869                         .procname       = "unres_qlen",
2870                         .maxlen         = sizeof(int),
2871                         .mode           = 0644,
2872                         .proc_handler   = proc_unres_qlen,
2873                 },
2874                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2875                         .procname       = "unres_qlen_bytes",
2876                         .maxlen         = sizeof(int),
2877                         .mode           = 0644,
2878                         .extra1         = &zero,
2879                         .proc_handler   = proc_dointvec_minmax,
2880                 },
2881                 [NEIGH_VAR_PROXY_QLEN] = {
2882                         .procname       = "proxy_qlen",
2883                         .maxlen         = sizeof(int),
2884                         .mode           = 0644,
2885                         .extra1         = &zero,
2886                         .extra2         = &int_max,
2887                         .proc_handler   = proc_dointvec_minmax,
2888                 },
2889                 [NEIGH_VAR_ANYCAST_DELAY] = {
2890                         .procname       = "anycast_delay",
2891                         .maxlen         = sizeof(int),
2892                         .mode           = 0644,
2893                         .proc_handler   = proc_dointvec_userhz_jiffies,
2894                 },
2895                 [NEIGH_VAR_PROXY_DELAY] = {
2896                         .procname       = "proxy_delay",
2897                         .maxlen         = sizeof(int),
2898                         .mode           = 0644,
2899                         .proc_handler   = proc_dointvec_userhz_jiffies,
2900                 },
2901                 [NEIGH_VAR_LOCKTIME] = {
2902                         .procname       = "locktime",
2903                         .maxlen         = sizeof(int),
2904                         .mode           = 0644,
2905                         .proc_handler   = proc_dointvec_userhz_jiffies,
2906                 },
2907                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2908                         .procname       = "retrans_time_ms",
2909                         .maxlen         = sizeof(int),
2910                         .mode           = 0644,
2911                         .proc_handler   = proc_dointvec_ms_jiffies,
2912                 },
2913                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2914                         .procname       = "base_reachable_time_ms",
2915                         .maxlen         = sizeof(int),
2916                         .mode           = 0644,
2917                         .proc_handler   = proc_dointvec_ms_jiffies,
2918                 },
2919                 [NEIGH_VAR_GC_INTERVAL] = {
2920                         .procname       = "gc_interval",
2921                         .maxlen         = sizeof(int),
2922                         .mode           = 0644,
2923                         .proc_handler   = proc_dointvec_jiffies,
2924                 },
2925                 [NEIGH_VAR_GC_THRESH1] = {
2926                         .procname       = "gc_thresh1",
2927                         .maxlen         = sizeof(int),
2928                         .mode           = 0644,
2929                         .extra1         = &zero,
2930                         .extra2         = &int_max,
2931                         .proc_handler   = proc_dointvec_minmax,
2932                 },
2933                 [NEIGH_VAR_GC_THRESH2] = {
2934                         .procname       = "gc_thresh2",
2935                         .maxlen         = sizeof(int),
2936                         .mode           = 0644,
2937                         .extra1         = &zero,
2938                         .extra2         = &int_max,
2939                         .proc_handler   = proc_dointvec_minmax,
2940                 },
2941                 [NEIGH_VAR_GC_THRESH3] = {
2942                         .procname       = "gc_thresh3",
2943                         .maxlen         = sizeof(int),
2944                         .mode           = 0644,
2945                         .extra1         = &zero,
2946                         .extra2         = &int_max,
2947                         .proc_handler   = proc_dointvec_minmax,
2948                 },
2949                 {},
2950         },
2951 };
2952
2953 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2954                           char *p_name, proc_handler *handler)
2955 {
2956         struct neigh_sysctl_table *t;
2957         const char *dev_name_source = NULL;
2958         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2959
2960         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2961         if (!t)
2962                 goto err;
2963
2964         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2965         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2966         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2967         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2968         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2969         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2970         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2971         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2972         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2973         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2974         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2975         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2976         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2977         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2978         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2979
2980         if (dev) {
2981                 dev_name_source = dev->name;
2982                 /* Terminate the table early */
2983                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2984                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2985         } else {
2986                 dev_name_source = "default";
2987                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2988                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2989                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2990                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2991         }
2992
2993
2994         if (handler) {
2995                 /* RetransTime */
2996                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2997                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2998                 /* ReachableTime */
2999                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3000                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
3001                 /* RetransTime (in milliseconds)*/
3002                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3003                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
3004                 /* ReachableTime (in milliseconds) */
3005                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3006                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
3007         }
3008
3009         /* Don't export sysctls to unprivileged users */
3010         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3011                 t->neigh_vars[0].procname = NULL;
3012
3013         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3014                 p_name, dev_name_source);
3015         t->sysctl_header =
3016                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3017         if (!t->sysctl_header)
3018                 goto free;
3019
3020         p->sysctl_table = t;
3021         return 0;
3022
3023 free:
3024         kfree(t);
3025 err:
3026         return -ENOBUFS;
3027 }
3028 EXPORT_SYMBOL(neigh_sysctl_register);
3029
3030 void neigh_sysctl_unregister(struct neigh_parms *p)
3031 {
3032         if (p->sysctl_table) {
3033                 struct neigh_sysctl_table *t = p->sysctl_table;
3034                 p->sysctl_table = NULL;
3035                 unregister_net_sysctl_table(t->sysctl_header);
3036                 kfree(t);
3037         }
3038 }
3039 EXPORT_SYMBOL(neigh_sysctl_unregister);
3040
3041 #endif  /* CONFIG_SYSCTL */
3042
3043 static int __init neigh_init(void)
3044 {
3045         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3046         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3047         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3048
3049         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3050                       NULL);
3051         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3052
3053         return 0;
3054 }
3055
3056 subsys_initcall(neigh_init);
3057