]> Pileus Git - ~andy/linux/blob - net/core/neighbour.c
Merge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[~andy/linux] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define NEIGH_DEBUG 1
43
44 #define NEIGH_PRINTK(x...) printk(x)
45 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
46 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
47 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
48
49 #if NEIGH_DEBUG >= 1
50 #undef NEIGH_PRINTK1
51 #define NEIGH_PRINTK1 NEIGH_PRINTK
52 #endif
53 #if NEIGH_DEBUG >= 2
54 #undef NEIGH_PRINTK2
55 #define NEIGH_PRINTK2 NEIGH_PRINTK
56 #endif
57
58 #define PNEIGH_HASHMASK         0xF
59
60 static void neigh_timer_handler(unsigned long arg);
61 static void __neigh_notify(struct neighbour *n, int type, int flags);
62 static void neigh_update_notify(struct neighbour *neigh);
63 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
64
65 static struct neigh_table *neigh_tables;
66 #ifdef CONFIG_PROC_FS
67 static const struct file_operations neigh_stat_seq_fops;
68 #endif
69
70 /*
71    Neighbour hash table buckets are protected with rwlock tbl->lock.
72
73    - All the scans/updates to hash buckets MUST be made under this lock.
74    - NOTHING clever should be made under this lock: no callbacks
75      to protocol backends, no attempts to send something to network.
76      It will result in deadlocks, if backend/driver wants to use neighbour
77      cache.
78    - If the entry requires some non-trivial actions, increase
79      its reference count and release table lock.
80
81    Neighbour entries are protected:
82    - with reference count.
83    - with rwlock neigh->lock
84
85    Reference count prevents destruction.
86
87    neigh->lock mainly serializes ll address data and its validity state.
88    However, the same lock is used to protect another entry fields:
89     - timer
90     - resolution queue
91
92    Again, nothing clever shall be made under neigh->lock,
93    the most complicated procedure, which we allow is dev->hard_header.
94    It is supposed, that dev->hard_header is simplistic and does
95    not make callbacks to neighbour tables.
96
97    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
98    list of neighbour tables. This list is used only in process context,
99  */
100
101 static DEFINE_RWLOCK(neigh_tbl_lock);
102
103 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
104 {
105         kfree_skb(skb);
106         return -ENETDOWN;
107 }
108
109 static void neigh_cleanup_and_release(struct neighbour *neigh)
110 {
111         if (neigh->parms->neigh_cleanup)
112                 neigh->parms->neigh_cleanup(neigh);
113
114         __neigh_notify(neigh, RTM_DELNEIGH, 0);
115         neigh_release(neigh);
116 }
117
118 /*
119  * It is random distribution in the interval (1/2)*base...(3/2)*base.
120  * It corresponds to default IPv6 settings and is not overridable,
121  * because it is really reasonable choice.
122  */
123
124 unsigned long neigh_rand_reach_time(unsigned long base)
125 {
126         return base ? (net_random() % base) + (base >> 1) : 0;
127 }
128 EXPORT_SYMBOL(neigh_rand_reach_time);
129
130
131 static int neigh_forced_gc(struct neigh_table *tbl)
132 {
133         int shrunk = 0;
134         int i;
135         struct neigh_hash_table *nht;
136
137         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
138
139         write_lock_bh(&tbl->lock);
140         nht = rcu_dereference_protected(tbl->nht,
141                                         lockdep_is_held(&tbl->lock));
142         for (i = 0; i < (1 << nht->hash_shift); i++) {
143                 struct neighbour *n;
144                 struct neighbour __rcu **np;
145
146                 np = &nht->hash_buckets[i];
147                 while ((n = rcu_dereference_protected(*np,
148                                         lockdep_is_held(&tbl->lock))) != NULL) {
149                         /* Neighbour record may be discarded if:
150                          * - nobody refers to it.
151                          * - it is not permanent
152                          */
153                         write_lock(&n->lock);
154                         if (atomic_read(&n->refcnt) == 1 &&
155                             !(n->nud_state & NUD_PERMANENT)) {
156                                 rcu_assign_pointer(*np,
157                                         rcu_dereference_protected(n->next,
158                                                   lockdep_is_held(&tbl->lock)));
159                                 n->dead = 1;
160                                 shrunk  = 1;
161                                 write_unlock(&n->lock);
162                                 neigh_cleanup_and_release(n);
163                                 continue;
164                         }
165                         write_unlock(&n->lock);
166                         np = &n->next;
167                 }
168         }
169
170         tbl->last_flush = jiffies;
171
172         write_unlock_bh(&tbl->lock);
173
174         return shrunk;
175 }
176
177 static void neigh_add_timer(struct neighbour *n, unsigned long when)
178 {
179         neigh_hold(n);
180         if (unlikely(mod_timer(&n->timer, when))) {
181                 printk("NEIGH: BUG, double timer add, state is %x\n",
182                        n->nud_state);
183                 dump_stack();
184         }
185 }
186
187 static int neigh_del_timer(struct neighbour *n)
188 {
189         if ((n->nud_state & NUD_IN_TIMER) &&
190             del_timer(&n->timer)) {
191                 neigh_release(n);
192                 return 1;
193         }
194         return 0;
195 }
196
197 static void pneigh_queue_purge(struct sk_buff_head *list)
198 {
199         struct sk_buff *skb;
200
201         while ((skb = skb_dequeue(list)) != NULL) {
202                 dev_put(skb->dev);
203                 kfree_skb(skb);
204         }
205 }
206
207 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
208 {
209         int i;
210         struct neigh_hash_table *nht;
211
212         nht = rcu_dereference_protected(tbl->nht,
213                                         lockdep_is_held(&tbl->lock));
214
215         for (i = 0; i < (1 << nht->hash_shift); i++) {
216                 struct neighbour *n;
217                 struct neighbour __rcu **np = &nht->hash_buckets[i];
218
219                 while ((n = rcu_dereference_protected(*np,
220                                         lockdep_is_held(&tbl->lock))) != NULL) {
221                         if (dev && n->dev != dev) {
222                                 np = &n->next;
223                                 continue;
224                         }
225                         rcu_assign_pointer(*np,
226                                    rcu_dereference_protected(n->next,
227                                                 lockdep_is_held(&tbl->lock)));
228                         write_lock(&n->lock);
229                         neigh_del_timer(n);
230                         n->dead = 1;
231
232                         if (atomic_read(&n->refcnt) != 1) {
233                                 /* The most unpleasant situation.
234                                    We must destroy neighbour entry,
235                                    but someone still uses it.
236
237                                    The destroy will be delayed until
238                                    the last user releases us, but
239                                    we must kill timers etc. and move
240                                    it to safe state.
241                                  */
242                                 skb_queue_purge(&n->arp_queue);
243                                 n->arp_queue_len_bytes = 0;
244                                 n->output = neigh_blackhole;
245                                 if (n->nud_state & NUD_VALID)
246                                         n->nud_state = NUD_NOARP;
247                                 else
248                                         n->nud_state = NUD_NONE;
249                                 NEIGH_PRINTK2("neigh %p is stray.\n", n);
250                         }
251                         write_unlock(&n->lock);
252                         neigh_cleanup_and_release(n);
253                 }
254         }
255 }
256
257 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         write_unlock_bh(&tbl->lock);
262 }
263 EXPORT_SYMBOL(neigh_changeaddr);
264
265 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
266 {
267         write_lock_bh(&tbl->lock);
268         neigh_flush_dev(tbl, dev);
269         pneigh_ifdown(tbl, dev);
270         write_unlock_bh(&tbl->lock);
271
272         del_timer_sync(&tbl->proxy_timer);
273         pneigh_queue_purge(&tbl->proxy_queue);
274         return 0;
275 }
276 EXPORT_SYMBOL(neigh_ifdown);
277
278 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
279 {
280         struct neighbour *n = NULL;
281         unsigned long now = jiffies;
282         int entries;
283
284         entries = atomic_inc_return(&tbl->entries) - 1;
285         if (entries >= tbl->gc_thresh3 ||
286             (entries >= tbl->gc_thresh2 &&
287              time_after(now, tbl->last_flush + 5 * HZ))) {
288                 if (!neigh_forced_gc(tbl) &&
289                     entries >= tbl->gc_thresh3)
290                         goto out_entries;
291         }
292
293         if (tbl->entry_size)
294                 n = kzalloc(tbl->entry_size, GFP_ATOMIC);
295         else {
296                 int sz = sizeof(*n) + tbl->key_len;
297
298                 sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
299                 sz += dev->neigh_priv_len;
300                 n = kzalloc(sz, GFP_ATOMIC);
301         }
302         if (!n)
303                 goto out_entries;
304
305         skb_queue_head_init(&n->arp_queue);
306         rwlock_init(&n->lock);
307         seqlock_init(&n->ha_lock);
308         n->updated        = n->used = now;
309         n->nud_state      = NUD_NONE;
310         n->output         = neigh_blackhole;
311         seqlock_init(&n->hh.hh_lock);
312         n->parms          = neigh_parms_clone(&tbl->parms);
313         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
314
315         NEIGH_CACHE_STAT_INC(tbl, allocs);
316         n->tbl            = tbl;
317         atomic_set(&n->refcnt, 1);
318         n->dead           = 1;
319 out:
320         return n;
321
322 out_entries:
323         atomic_dec(&tbl->entries);
324         goto out;
325 }
326
327 static void neigh_get_hash_rnd(u32 *x)
328 {
329         get_random_bytes(x, sizeof(*x));
330         *x |= 1;
331 }
332
333 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
334 {
335         size_t size = (1 << shift) * sizeof(struct neighbour *);
336         struct neigh_hash_table *ret;
337         struct neighbour __rcu **buckets;
338         int i;
339
340         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
341         if (!ret)
342                 return NULL;
343         if (size <= PAGE_SIZE)
344                 buckets = kzalloc(size, GFP_ATOMIC);
345         else
346                 buckets = (struct neighbour __rcu **)
347                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
348                                            get_order(size));
349         if (!buckets) {
350                 kfree(ret);
351                 return NULL;
352         }
353         ret->hash_buckets = buckets;
354         ret->hash_shift = shift;
355         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
356                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
357         return ret;
358 }
359
360 static void neigh_hash_free_rcu(struct rcu_head *head)
361 {
362         struct neigh_hash_table *nht = container_of(head,
363                                                     struct neigh_hash_table,
364                                                     rcu);
365         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
366         struct neighbour __rcu **buckets = nht->hash_buckets;
367
368         if (size <= PAGE_SIZE)
369                 kfree(buckets);
370         else
371                 free_pages((unsigned long)buckets, get_order(size));
372         kfree(nht);
373 }
374
375 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
376                                                 unsigned long new_shift)
377 {
378         unsigned int i, hash;
379         struct neigh_hash_table *new_nht, *old_nht;
380
381         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
382
383         old_nht = rcu_dereference_protected(tbl->nht,
384                                             lockdep_is_held(&tbl->lock));
385         new_nht = neigh_hash_alloc(new_shift);
386         if (!new_nht)
387                 return old_nht;
388
389         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
390                 struct neighbour *n, *next;
391
392                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
393                                                    lockdep_is_held(&tbl->lock));
394                      n != NULL;
395                      n = next) {
396                         hash = tbl->hash(n->primary_key, n->dev,
397                                          new_nht->hash_rnd);
398
399                         hash >>= (32 - new_nht->hash_shift);
400                         next = rcu_dereference_protected(n->next,
401                                                 lockdep_is_held(&tbl->lock));
402
403                         rcu_assign_pointer(n->next,
404                                            rcu_dereference_protected(
405                                                 new_nht->hash_buckets[hash],
406                                                 lockdep_is_held(&tbl->lock)));
407                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
408                 }
409         }
410
411         rcu_assign_pointer(tbl->nht, new_nht);
412         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
413         return new_nht;
414 }
415
416 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
417                                struct net_device *dev)
418 {
419         struct neighbour *n;
420         int key_len = tbl->key_len;
421         u32 hash_val;
422         struct neigh_hash_table *nht;
423
424         NEIGH_CACHE_STAT_INC(tbl, lookups);
425
426         rcu_read_lock_bh();
427         nht = rcu_dereference_bh(tbl->nht);
428         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
429
430         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
431              n != NULL;
432              n = rcu_dereference_bh(n->next)) {
433                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
434                         if (!atomic_inc_not_zero(&n->refcnt))
435                                 n = NULL;
436                         NEIGH_CACHE_STAT_INC(tbl, hits);
437                         break;
438                 }
439         }
440
441         rcu_read_unlock_bh();
442         return n;
443 }
444 EXPORT_SYMBOL(neigh_lookup);
445
446 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
447                                      const void *pkey)
448 {
449         struct neighbour *n;
450         int key_len = tbl->key_len;
451         u32 hash_val;
452         struct neigh_hash_table *nht;
453
454         NEIGH_CACHE_STAT_INC(tbl, lookups);
455
456         rcu_read_lock_bh();
457         nht = rcu_dereference_bh(tbl->nht);
458         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
459
460         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
461              n != NULL;
462              n = rcu_dereference_bh(n->next)) {
463                 if (!memcmp(n->primary_key, pkey, key_len) &&
464                     net_eq(dev_net(n->dev), net)) {
465                         if (!atomic_inc_not_zero(&n->refcnt))
466                                 n = NULL;
467                         NEIGH_CACHE_STAT_INC(tbl, hits);
468                         break;
469                 }
470         }
471
472         rcu_read_unlock_bh();
473         return n;
474 }
475 EXPORT_SYMBOL(neigh_lookup_nodev);
476
477 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
478                                struct net_device *dev)
479 {
480         u32 hash_val;
481         int key_len = tbl->key_len;
482         int error;
483         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
484         struct neigh_hash_table *nht;
485
486         if (!n) {
487                 rc = ERR_PTR(-ENOBUFS);
488                 goto out;
489         }
490
491         memcpy(n->primary_key, pkey, key_len);
492         n->dev = dev;
493         dev_hold(dev);
494
495         /* Protocol specific setup. */
496         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
497                 rc = ERR_PTR(error);
498                 goto out_neigh_release;
499         }
500
501         if (dev->netdev_ops->ndo_neigh_construct) {
502                 error = dev->netdev_ops->ndo_neigh_construct(n);
503                 if (error < 0) {
504                         rc = ERR_PTR(error);
505                         goto out_neigh_release;
506                 }
507         }
508
509         /* Device specific setup. */
510         if (n->parms->neigh_setup &&
511             (error = n->parms->neigh_setup(n)) < 0) {
512                 rc = ERR_PTR(error);
513                 goto out_neigh_release;
514         }
515
516         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
517
518         write_lock_bh(&tbl->lock);
519         nht = rcu_dereference_protected(tbl->nht,
520                                         lockdep_is_held(&tbl->lock));
521
522         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
523                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
524
525         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
526
527         if (n->parms->dead) {
528                 rc = ERR_PTR(-EINVAL);
529                 goto out_tbl_unlock;
530         }
531
532         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
533                                             lockdep_is_held(&tbl->lock));
534              n1 != NULL;
535              n1 = rcu_dereference_protected(n1->next,
536                         lockdep_is_held(&tbl->lock))) {
537                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
538                         neigh_hold(n1);
539                         rc = n1;
540                         goto out_tbl_unlock;
541                 }
542         }
543
544         n->dead = 0;
545         neigh_hold(n);
546         rcu_assign_pointer(n->next,
547                            rcu_dereference_protected(nht->hash_buckets[hash_val],
548                                                      lockdep_is_held(&tbl->lock)));
549         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
550         write_unlock_bh(&tbl->lock);
551         NEIGH_PRINTK2("neigh %p is created.\n", n);
552         rc = n;
553 out:
554         return rc;
555 out_tbl_unlock:
556         write_unlock_bh(&tbl->lock);
557 out_neigh_release:
558         neigh_release(n);
559         goto out;
560 }
561 EXPORT_SYMBOL(neigh_create);
562
563 static u32 pneigh_hash(const void *pkey, int key_len)
564 {
565         u32 hash_val = *(u32 *)(pkey + key_len - 4);
566         hash_val ^= (hash_val >> 16);
567         hash_val ^= hash_val >> 8;
568         hash_val ^= hash_val >> 4;
569         hash_val &= PNEIGH_HASHMASK;
570         return hash_val;
571 }
572
573 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
574                                               struct net *net,
575                                               const void *pkey,
576                                               int key_len,
577                                               struct net_device *dev)
578 {
579         while (n) {
580                 if (!memcmp(n->key, pkey, key_len) &&
581                     net_eq(pneigh_net(n), net) &&
582                     (n->dev == dev || !n->dev))
583                         return n;
584                 n = n->next;
585         }
586         return NULL;
587 }
588
589 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
590                 struct net *net, const void *pkey, struct net_device *dev)
591 {
592         int key_len = tbl->key_len;
593         u32 hash_val = pneigh_hash(pkey, key_len);
594
595         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                                  net, pkey, key_len, dev);
597 }
598 EXPORT_SYMBOL_GPL(__pneigh_lookup);
599
600 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
601                                     struct net *net, const void *pkey,
602                                     struct net_device *dev, int creat)
603 {
604         struct pneigh_entry *n;
605         int key_len = tbl->key_len;
606         u32 hash_val = pneigh_hash(pkey, key_len);
607
608         read_lock_bh(&tbl->lock);
609         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
610                               net, pkey, key_len, dev);
611         read_unlock_bh(&tbl->lock);
612
613         if (n || !creat)
614                 goto out;
615
616         ASSERT_RTNL();
617
618         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
619         if (!n)
620                 goto out;
621
622         write_pnet(&n->net, hold_net(net));
623         memcpy(n->key, pkey, key_len);
624         n->dev = dev;
625         if (dev)
626                 dev_hold(dev);
627
628         if (tbl->pconstructor && tbl->pconstructor(n)) {
629                 if (dev)
630                         dev_put(dev);
631                 release_net(net);
632                 kfree(n);
633                 n = NULL;
634                 goto out;
635         }
636
637         write_lock_bh(&tbl->lock);
638         n->next = tbl->phash_buckets[hash_val];
639         tbl->phash_buckets[hash_val] = n;
640         write_unlock_bh(&tbl->lock);
641 out:
642         return n;
643 }
644 EXPORT_SYMBOL(pneigh_lookup);
645
646
647 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
648                   struct net_device *dev)
649 {
650         struct pneigh_entry *n, **np;
651         int key_len = tbl->key_len;
652         u32 hash_val = pneigh_hash(pkey, key_len);
653
654         write_lock_bh(&tbl->lock);
655         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
656              np = &n->next) {
657                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
658                     net_eq(pneigh_net(n), net)) {
659                         *np = n->next;
660                         write_unlock_bh(&tbl->lock);
661                         if (tbl->pdestructor)
662                                 tbl->pdestructor(n);
663                         if (n->dev)
664                                 dev_put(n->dev);
665                         release_net(pneigh_net(n));
666                         kfree(n);
667                         return 0;
668                 }
669         }
670         write_unlock_bh(&tbl->lock);
671         return -ENOENT;
672 }
673
674 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
675 {
676         struct pneigh_entry *n, **np;
677         u32 h;
678
679         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
680                 np = &tbl->phash_buckets[h];
681                 while ((n = *np) != NULL) {
682                         if (!dev || n->dev == dev) {
683                                 *np = n->next;
684                                 if (tbl->pdestructor)
685                                         tbl->pdestructor(n);
686                                 if (n->dev)
687                                         dev_put(n->dev);
688                                 release_net(pneigh_net(n));
689                                 kfree(n);
690                                 continue;
691                         }
692                         np = &n->next;
693                 }
694         }
695         return -ENOENT;
696 }
697
698 static void neigh_parms_destroy(struct neigh_parms *parms);
699
700 static inline void neigh_parms_put(struct neigh_parms *parms)
701 {
702         if (atomic_dec_and_test(&parms->refcnt))
703                 neigh_parms_destroy(parms);
704 }
705
706 /*
707  *      neighbour must already be out of the table;
708  *
709  */
710 void neigh_destroy(struct neighbour *neigh)
711 {
712         struct net_device *dev = neigh->dev;
713
714         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
715
716         if (!neigh->dead) {
717                 pr_warn("Destroying alive neighbour %p\n", neigh);
718                 dump_stack();
719                 return;
720         }
721
722         if (neigh_del_timer(neigh))
723                 pr_warn("Impossible event\n");
724
725         skb_queue_purge(&neigh->arp_queue);
726         neigh->arp_queue_len_bytes = 0;
727
728         if (dev->netdev_ops->ndo_neigh_destroy)
729                 dev->netdev_ops->ndo_neigh_destroy(neigh);
730
731         dev_put(dev);
732         neigh_parms_put(neigh->parms);
733
734         NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
735
736         atomic_dec(&neigh->tbl->entries);
737         kfree_rcu(neigh, rcu);
738 }
739 EXPORT_SYMBOL(neigh_destroy);
740
741 /* Neighbour state is suspicious;
742    disable fast path.
743
744    Called with write_locked neigh.
745  */
746 static void neigh_suspect(struct neighbour *neigh)
747 {
748         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
749
750         neigh->output = neigh->ops->output;
751 }
752
753 /* Neighbour state is OK;
754    enable fast path.
755
756    Called with write_locked neigh.
757  */
758 static void neigh_connect(struct neighbour *neigh)
759 {
760         NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
761
762         neigh->output = neigh->ops->connected_output;
763 }
764
765 static void neigh_periodic_work(struct work_struct *work)
766 {
767         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
768         struct neighbour *n;
769         struct neighbour __rcu **np;
770         unsigned int i;
771         struct neigh_hash_table *nht;
772
773         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
774
775         write_lock_bh(&tbl->lock);
776         nht = rcu_dereference_protected(tbl->nht,
777                                         lockdep_is_held(&tbl->lock));
778
779         /*
780          *      periodically recompute ReachableTime from random function
781          */
782
783         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
784                 struct neigh_parms *p;
785                 tbl->last_rand = jiffies;
786                 for (p = &tbl->parms; p; p = p->next)
787                         p->reachable_time =
788                                 neigh_rand_reach_time(p->base_reachable_time);
789         }
790
791         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
792                 np = &nht->hash_buckets[i];
793
794                 while ((n = rcu_dereference_protected(*np,
795                                 lockdep_is_held(&tbl->lock))) != NULL) {
796                         unsigned int state;
797
798                         write_lock(&n->lock);
799
800                         state = n->nud_state;
801                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
802                                 write_unlock(&n->lock);
803                                 goto next_elt;
804                         }
805
806                         if (time_before(n->used, n->confirmed))
807                                 n->used = n->confirmed;
808
809                         if (atomic_read(&n->refcnt) == 1 &&
810                             (state == NUD_FAILED ||
811                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
812                                 *np = n->next;
813                                 n->dead = 1;
814                                 write_unlock(&n->lock);
815                                 neigh_cleanup_and_release(n);
816                                 continue;
817                         }
818                         write_unlock(&n->lock);
819
820 next_elt:
821                         np = &n->next;
822                 }
823                 /*
824                  * It's fine to release lock here, even if hash table
825                  * grows while we are preempted.
826                  */
827                 write_unlock_bh(&tbl->lock);
828                 cond_resched();
829                 write_lock_bh(&tbl->lock);
830                 nht = rcu_dereference_protected(tbl->nht,
831                                                 lockdep_is_held(&tbl->lock));
832         }
833         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
834          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
835          * base_reachable_time.
836          */
837         schedule_delayed_work(&tbl->gc_work,
838                               tbl->parms.base_reachable_time >> 1);
839         write_unlock_bh(&tbl->lock);
840 }
841
842 static __inline__ int neigh_max_probes(struct neighbour *n)
843 {
844         struct neigh_parms *p = n->parms;
845         return (n->nud_state & NUD_PROBE) ?
846                 p->ucast_probes :
847                 p->ucast_probes + p->app_probes + p->mcast_probes;
848 }
849
850 static void neigh_invalidate(struct neighbour *neigh)
851         __releases(neigh->lock)
852         __acquires(neigh->lock)
853 {
854         struct sk_buff *skb;
855
856         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
857         NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
858         neigh->updated = jiffies;
859
860         /* It is very thin place. report_unreachable is very complicated
861            routine. Particularly, it can hit the same neighbour entry!
862
863            So that, we try to be accurate and avoid dead loop. --ANK
864          */
865         while (neigh->nud_state == NUD_FAILED &&
866                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
867                 write_unlock(&neigh->lock);
868                 neigh->ops->error_report(neigh, skb);
869                 write_lock(&neigh->lock);
870         }
871         skb_queue_purge(&neigh->arp_queue);
872         neigh->arp_queue_len_bytes = 0;
873 }
874
875 static void neigh_probe(struct neighbour *neigh)
876         __releases(neigh->lock)
877 {
878         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
879         /* keep skb alive even if arp_queue overflows */
880         if (skb)
881                 skb = skb_copy(skb, GFP_ATOMIC);
882         write_unlock(&neigh->lock);
883         neigh->ops->solicit(neigh, skb);
884         atomic_inc(&neigh->probes);
885         kfree_skb(skb);
886 }
887
888 /* Called when a timer expires for a neighbour entry. */
889
890 static void neigh_timer_handler(unsigned long arg)
891 {
892         unsigned long now, next;
893         struct neighbour *neigh = (struct neighbour *)arg;
894         unsigned int state;
895         int notify = 0;
896
897         write_lock(&neigh->lock);
898
899         state = neigh->nud_state;
900         now = jiffies;
901         next = now + HZ;
902
903         if (!(state & NUD_IN_TIMER))
904                 goto out;
905
906         if (state & NUD_REACHABLE) {
907                 if (time_before_eq(now,
908                                    neigh->confirmed + neigh->parms->reachable_time)) {
909                         NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
910                         next = neigh->confirmed + neigh->parms->reachable_time;
911                 } else if (time_before_eq(now,
912                                           neigh->used + neigh->parms->delay_probe_time)) {
913                         NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
914                         neigh->nud_state = NUD_DELAY;
915                         neigh->updated = jiffies;
916                         neigh_suspect(neigh);
917                         next = now + neigh->parms->delay_probe_time;
918                 } else {
919                         NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
920                         neigh->nud_state = NUD_STALE;
921                         neigh->updated = jiffies;
922                         neigh_suspect(neigh);
923                         notify = 1;
924                 }
925         } else if (state & NUD_DELAY) {
926                 if (time_before_eq(now,
927                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
928                         NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
929                         neigh->nud_state = NUD_REACHABLE;
930                         neigh->updated = jiffies;
931                         neigh_connect(neigh);
932                         notify = 1;
933                         next = neigh->confirmed + neigh->parms->reachable_time;
934                 } else {
935                         NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
936                         neigh->nud_state = NUD_PROBE;
937                         neigh->updated = jiffies;
938                         atomic_set(&neigh->probes, 0);
939                         next = now + neigh->parms->retrans_time;
940                 }
941         } else {
942                 /* NUD_PROBE|NUD_INCOMPLETE */
943                 next = now + neigh->parms->retrans_time;
944         }
945
946         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
947             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
948                 neigh->nud_state = NUD_FAILED;
949                 notify = 1;
950                 neigh_invalidate(neigh);
951         }
952
953         if (neigh->nud_state & NUD_IN_TIMER) {
954                 if (time_before(next, jiffies + HZ/2))
955                         next = jiffies + HZ/2;
956                 if (!mod_timer(&neigh->timer, next))
957                         neigh_hold(neigh);
958         }
959         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
960                 neigh_probe(neigh);
961         } else {
962 out:
963                 write_unlock(&neigh->lock);
964         }
965
966         if (notify)
967                 neigh_update_notify(neigh);
968
969         neigh_release(neigh);
970 }
971
972 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
973 {
974         int rc;
975         bool immediate_probe = false;
976
977         write_lock_bh(&neigh->lock);
978
979         rc = 0;
980         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
981                 goto out_unlock_bh;
982
983         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
984                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
985                         unsigned long next, now = jiffies;
986
987                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
988                         neigh->nud_state     = NUD_INCOMPLETE;
989                         neigh->updated = now;
990                         next = now + max(neigh->parms->retrans_time, HZ/2);
991                         neigh_add_timer(neigh, next);
992                         immediate_probe = true;
993                 } else {
994                         neigh->nud_state = NUD_FAILED;
995                         neigh->updated = jiffies;
996                         write_unlock_bh(&neigh->lock);
997
998                         kfree_skb(skb);
999                         return 1;
1000                 }
1001         } else if (neigh->nud_state & NUD_STALE) {
1002                 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1003                 neigh->nud_state = NUD_DELAY;
1004                 neigh->updated = jiffies;
1005                 neigh_add_timer(neigh,
1006                                 jiffies + neigh->parms->delay_probe_time);
1007         }
1008
1009         if (neigh->nud_state == NUD_INCOMPLETE) {
1010                 if (skb) {
1011                         while (neigh->arp_queue_len_bytes + skb->truesize >
1012                                neigh->parms->queue_len_bytes) {
1013                                 struct sk_buff *buff;
1014
1015                                 buff = __skb_dequeue(&neigh->arp_queue);
1016                                 if (!buff)
1017                                         break;
1018                                 neigh->arp_queue_len_bytes -= buff->truesize;
1019                                 kfree_skb(buff);
1020                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1021                         }
1022                         skb_dst_force(skb);
1023                         __skb_queue_tail(&neigh->arp_queue, skb);
1024                         neigh->arp_queue_len_bytes += skb->truesize;
1025                 }
1026                 rc = 1;
1027         }
1028 out_unlock_bh:
1029         if (immediate_probe)
1030                 neigh_probe(neigh);
1031         else
1032                 write_unlock(&neigh->lock);
1033         local_bh_enable();
1034         return rc;
1035 }
1036 EXPORT_SYMBOL(__neigh_event_send);
1037
1038 static void neigh_update_hhs(struct neighbour *neigh)
1039 {
1040         struct hh_cache *hh;
1041         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1042                 = NULL;
1043
1044         if (neigh->dev->header_ops)
1045                 update = neigh->dev->header_ops->cache_update;
1046
1047         if (update) {
1048                 hh = &neigh->hh;
1049                 if (hh->hh_len) {
1050                         write_seqlock_bh(&hh->hh_lock);
1051                         update(hh, neigh->dev, neigh->ha);
1052                         write_sequnlock_bh(&hh->hh_lock);
1053                 }
1054         }
1055 }
1056
1057
1058
1059 /* Generic update routine.
1060    -- lladdr is new lladdr or NULL, if it is not supplied.
1061    -- new    is new state.
1062    -- flags
1063         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1064                                 if it is different.
1065         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1066                                 lladdr instead of overriding it
1067                                 if it is different.
1068                                 It also allows to retain current state
1069                                 if lladdr is unchanged.
1070         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1071
1072         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1073                                 NTF_ROUTER flag.
1074         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1075                                 a router.
1076
1077    Caller MUST hold reference count on the entry.
1078  */
1079
1080 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1081                  u32 flags)
1082 {
1083         u8 old;
1084         int err;
1085         int notify = 0;
1086         struct net_device *dev;
1087         int update_isrouter = 0;
1088
1089         write_lock_bh(&neigh->lock);
1090
1091         dev    = neigh->dev;
1092         old    = neigh->nud_state;
1093         err    = -EPERM;
1094
1095         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1096             (old & (NUD_NOARP | NUD_PERMANENT)))
1097                 goto out;
1098
1099         if (!(new & NUD_VALID)) {
1100                 neigh_del_timer(neigh);
1101                 if (old & NUD_CONNECTED)
1102                         neigh_suspect(neigh);
1103                 neigh->nud_state = new;
1104                 err = 0;
1105                 notify = old & NUD_VALID;
1106                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1107                     (new & NUD_FAILED)) {
1108                         neigh_invalidate(neigh);
1109                         notify = 1;
1110                 }
1111                 goto out;
1112         }
1113
1114         /* Compare new lladdr with cached one */
1115         if (!dev->addr_len) {
1116                 /* First case: device needs no address. */
1117                 lladdr = neigh->ha;
1118         } else if (lladdr) {
1119                 /* The second case: if something is already cached
1120                    and a new address is proposed:
1121                    - compare new & old
1122                    - if they are different, check override flag
1123                  */
1124                 if ((old & NUD_VALID) &&
1125                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1126                         lladdr = neigh->ha;
1127         } else {
1128                 /* No address is supplied; if we know something,
1129                    use it, otherwise discard the request.
1130                  */
1131                 err = -EINVAL;
1132                 if (!(old & NUD_VALID))
1133                         goto out;
1134                 lladdr = neigh->ha;
1135         }
1136
1137         if (new & NUD_CONNECTED)
1138                 neigh->confirmed = jiffies;
1139         neigh->updated = jiffies;
1140
1141         /* If entry was valid and address is not changed,
1142            do not change entry state, if new one is STALE.
1143          */
1144         err = 0;
1145         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1146         if (old & NUD_VALID) {
1147                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1148                         update_isrouter = 0;
1149                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1150                             (old & NUD_CONNECTED)) {
1151                                 lladdr = neigh->ha;
1152                                 new = NUD_STALE;
1153                         } else
1154                                 goto out;
1155                 } else {
1156                         if (lladdr == neigh->ha && new == NUD_STALE &&
1157                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1158                              (old & NUD_CONNECTED))
1159                             )
1160                                 new = old;
1161                 }
1162         }
1163
1164         if (new != old) {
1165                 neigh_del_timer(neigh);
1166                 if (new & NUD_IN_TIMER)
1167                         neigh_add_timer(neigh, (jiffies +
1168                                                 ((new & NUD_REACHABLE) ?
1169                                                  neigh->parms->reachable_time :
1170                                                  0)));
1171                 neigh->nud_state = new;
1172         }
1173
1174         if (lladdr != neigh->ha) {
1175                 write_seqlock(&neigh->ha_lock);
1176                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1177                 write_sequnlock(&neigh->ha_lock);
1178                 neigh_update_hhs(neigh);
1179                 if (!(new & NUD_CONNECTED))
1180                         neigh->confirmed = jiffies -
1181                                       (neigh->parms->base_reachable_time << 1);
1182                 notify = 1;
1183         }
1184         if (new == old)
1185                 goto out;
1186         if (new & NUD_CONNECTED)
1187                 neigh_connect(neigh);
1188         else
1189                 neigh_suspect(neigh);
1190         if (!(old & NUD_VALID)) {
1191                 struct sk_buff *skb;
1192
1193                 /* Again: avoid dead loop if something went wrong */
1194
1195                 while (neigh->nud_state & NUD_VALID &&
1196                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1197                         struct dst_entry *dst = skb_dst(skb);
1198                         struct neighbour *n2, *n1 = neigh;
1199                         write_unlock_bh(&neigh->lock);
1200
1201                         rcu_read_lock();
1202                         /* On shaper/eql skb->dst->neighbour != neigh :( */
1203                         if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
1204                                 n1 = n2;
1205                         n1->output(n1, skb);
1206                         rcu_read_unlock();
1207
1208                         write_lock_bh(&neigh->lock);
1209                 }
1210                 skb_queue_purge(&neigh->arp_queue);
1211                 neigh->arp_queue_len_bytes = 0;
1212         }
1213 out:
1214         if (update_isrouter) {
1215                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1216                         (neigh->flags | NTF_ROUTER) :
1217                         (neigh->flags & ~NTF_ROUTER);
1218         }
1219         write_unlock_bh(&neigh->lock);
1220
1221         if (notify)
1222                 neigh_update_notify(neigh);
1223
1224         return err;
1225 }
1226 EXPORT_SYMBOL(neigh_update);
1227
1228 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1229                                  u8 *lladdr, void *saddr,
1230                                  struct net_device *dev)
1231 {
1232         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1233                                                  lladdr || !dev->addr_len);
1234         if (neigh)
1235                 neigh_update(neigh, lladdr, NUD_STALE,
1236                              NEIGH_UPDATE_F_OVERRIDE);
1237         return neigh;
1238 }
1239 EXPORT_SYMBOL(neigh_event_ns);
1240
1241 /* called with read_lock_bh(&n->lock); */
1242 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1243 {
1244         struct net_device *dev = dst->dev;
1245         __be16 prot = dst->ops->protocol;
1246         struct hh_cache *hh = &n->hh;
1247
1248         write_lock_bh(&n->lock);
1249
1250         /* Only one thread can come in here and initialize the
1251          * hh_cache entry.
1252          */
1253         if (!hh->hh_len)
1254                 dev->header_ops->cache(n, hh, prot);
1255
1256         write_unlock_bh(&n->lock);
1257 }
1258
1259 /* This function can be used in contexts, where only old dev_queue_xmit
1260  * worked, f.e. if you want to override normal output path (eql, shaper),
1261  * but resolution is not made yet.
1262  */
1263
1264 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1265 {
1266         struct net_device *dev = skb->dev;
1267
1268         __skb_pull(skb, skb_network_offset(skb));
1269
1270         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1271                             skb->len) < 0 &&
1272             dev->header_ops->rebuild(skb))
1273                 return 0;
1274
1275         return dev_queue_xmit(skb);
1276 }
1277 EXPORT_SYMBOL(neigh_compat_output);
1278
1279 /* Slow and careful. */
1280
1281 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1282 {
1283         struct dst_entry *dst = skb_dst(skb);
1284         int rc = 0;
1285
1286         if (!dst)
1287                 goto discard;
1288
1289         __skb_pull(skb, skb_network_offset(skb));
1290
1291         if (!neigh_event_send(neigh, skb)) {
1292                 int err;
1293                 struct net_device *dev = neigh->dev;
1294                 unsigned int seq;
1295
1296                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1297                         neigh_hh_init(neigh, dst);
1298
1299                 do {
1300                         seq = read_seqbegin(&neigh->ha_lock);
1301                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1302                                               neigh->ha, NULL, skb->len);
1303                 } while (read_seqretry(&neigh->ha_lock, seq));
1304
1305                 if (err >= 0)
1306                         rc = dev_queue_xmit(skb);
1307                 else
1308                         goto out_kfree_skb;
1309         }
1310 out:
1311         return rc;
1312 discard:
1313         NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1314                       dst, neigh);
1315 out_kfree_skb:
1316         rc = -EINVAL;
1317         kfree_skb(skb);
1318         goto out;
1319 }
1320 EXPORT_SYMBOL(neigh_resolve_output);
1321
1322 /* As fast as possible without hh cache */
1323
1324 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1325 {
1326         struct net_device *dev = neigh->dev;
1327         unsigned int seq;
1328         int err;
1329
1330         __skb_pull(skb, skb_network_offset(skb));
1331
1332         do {
1333                 seq = read_seqbegin(&neigh->ha_lock);
1334                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1335                                       neigh->ha, NULL, skb->len);
1336         } while (read_seqretry(&neigh->ha_lock, seq));
1337
1338         if (err >= 0)
1339                 err = dev_queue_xmit(skb);
1340         else {
1341                 err = -EINVAL;
1342                 kfree_skb(skb);
1343         }
1344         return err;
1345 }
1346 EXPORT_SYMBOL(neigh_connected_output);
1347
1348 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1349 {
1350         return dev_queue_xmit(skb);
1351 }
1352 EXPORT_SYMBOL(neigh_direct_output);
1353
1354 static void neigh_proxy_process(unsigned long arg)
1355 {
1356         struct neigh_table *tbl = (struct neigh_table *)arg;
1357         long sched_next = 0;
1358         unsigned long now = jiffies;
1359         struct sk_buff *skb, *n;
1360
1361         spin_lock(&tbl->proxy_queue.lock);
1362
1363         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1364                 long tdif = NEIGH_CB(skb)->sched_next - now;
1365
1366                 if (tdif <= 0) {
1367                         struct net_device *dev = skb->dev;
1368
1369                         __skb_unlink(skb, &tbl->proxy_queue);
1370                         if (tbl->proxy_redo && netif_running(dev)) {
1371                                 rcu_read_lock();
1372                                 tbl->proxy_redo(skb);
1373                                 rcu_read_unlock();
1374                         } else {
1375                                 kfree_skb(skb);
1376                         }
1377
1378                         dev_put(dev);
1379                 } else if (!sched_next || tdif < sched_next)
1380                         sched_next = tdif;
1381         }
1382         del_timer(&tbl->proxy_timer);
1383         if (sched_next)
1384                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1385         spin_unlock(&tbl->proxy_queue.lock);
1386 }
1387
1388 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1389                     struct sk_buff *skb)
1390 {
1391         unsigned long now = jiffies;
1392         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1393
1394         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1395                 kfree_skb(skb);
1396                 return;
1397         }
1398
1399         NEIGH_CB(skb)->sched_next = sched_next;
1400         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1401
1402         spin_lock(&tbl->proxy_queue.lock);
1403         if (del_timer(&tbl->proxy_timer)) {
1404                 if (time_before(tbl->proxy_timer.expires, sched_next))
1405                         sched_next = tbl->proxy_timer.expires;
1406         }
1407         skb_dst_drop(skb);
1408         dev_hold(skb->dev);
1409         __skb_queue_tail(&tbl->proxy_queue, skb);
1410         mod_timer(&tbl->proxy_timer, sched_next);
1411         spin_unlock(&tbl->proxy_queue.lock);
1412 }
1413 EXPORT_SYMBOL(pneigh_enqueue);
1414
1415 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1416                                                       struct net *net, int ifindex)
1417 {
1418         struct neigh_parms *p;
1419
1420         for (p = &tbl->parms; p; p = p->next) {
1421                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422                     (!p->dev && !ifindex))
1423                         return p;
1424         }
1425
1426         return NULL;
1427 }
1428
1429 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430                                       struct neigh_table *tbl)
1431 {
1432         struct neigh_parms *p, *ref;
1433         struct net *net = dev_net(dev);
1434         const struct net_device_ops *ops = dev->netdev_ops;
1435
1436         ref = lookup_neigh_parms(tbl, net, 0);
1437         if (!ref)
1438                 return NULL;
1439
1440         p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1441         if (p) {
1442                 p->tbl            = tbl;
1443                 atomic_set(&p->refcnt, 1);
1444                 p->reachable_time =
1445                                 neigh_rand_reach_time(p->base_reachable_time);
1446
1447                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1448                         kfree(p);
1449                         return NULL;
1450                 }
1451
1452                 dev_hold(dev);
1453                 p->dev = dev;
1454                 write_pnet(&p->net, hold_net(net));
1455                 p->sysctl_table = NULL;
1456                 write_lock_bh(&tbl->lock);
1457                 p->next         = tbl->parms.next;
1458                 tbl->parms.next = p;
1459                 write_unlock_bh(&tbl->lock);
1460         }
1461         return p;
1462 }
1463 EXPORT_SYMBOL(neigh_parms_alloc);
1464
1465 static void neigh_rcu_free_parms(struct rcu_head *head)
1466 {
1467         struct neigh_parms *parms =
1468                 container_of(head, struct neigh_parms, rcu_head);
1469
1470         neigh_parms_put(parms);
1471 }
1472
1473 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1474 {
1475         struct neigh_parms **p;
1476
1477         if (!parms || parms == &tbl->parms)
1478                 return;
1479         write_lock_bh(&tbl->lock);
1480         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1481                 if (*p == parms) {
1482                         *p = parms->next;
1483                         parms->dead = 1;
1484                         write_unlock_bh(&tbl->lock);
1485                         if (parms->dev)
1486                                 dev_put(parms->dev);
1487                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1488                         return;
1489                 }
1490         }
1491         write_unlock_bh(&tbl->lock);
1492         NEIGH_PRINTK1("neigh_parms_release: not found\n");
1493 }
1494 EXPORT_SYMBOL(neigh_parms_release);
1495
1496 static void neigh_parms_destroy(struct neigh_parms *parms)
1497 {
1498         release_net(neigh_parms_net(parms));
1499         kfree(parms);
1500 }
1501
1502 static struct lock_class_key neigh_table_proxy_queue_class;
1503
1504 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1505 {
1506         unsigned long now = jiffies;
1507         unsigned long phsize;
1508
1509         write_pnet(&tbl->parms.net, &init_net);
1510         atomic_set(&tbl->parms.refcnt, 1);
1511         tbl->parms.reachable_time =
1512                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1513
1514         tbl->stats = alloc_percpu(struct neigh_statistics);
1515         if (!tbl->stats)
1516                 panic("cannot create neighbour cache statistics");
1517
1518 #ifdef CONFIG_PROC_FS
1519         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1520                               &neigh_stat_seq_fops, tbl))
1521                 panic("cannot create neighbour proc dir entry");
1522 #endif
1523
1524         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1525
1526         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1527         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1528
1529         if (!tbl->nht || !tbl->phash_buckets)
1530                 panic("cannot allocate neighbour cache hashes");
1531
1532         rwlock_init(&tbl->lock);
1533         INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1534         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1535         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1536         skb_queue_head_init_class(&tbl->proxy_queue,
1537                         &neigh_table_proxy_queue_class);
1538
1539         tbl->last_flush = now;
1540         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1541 }
1542
1543 void neigh_table_init(struct neigh_table *tbl)
1544 {
1545         struct neigh_table *tmp;
1546
1547         neigh_table_init_no_netlink(tbl);
1548         write_lock(&neigh_tbl_lock);
1549         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1550                 if (tmp->family == tbl->family)
1551                         break;
1552         }
1553         tbl->next       = neigh_tables;
1554         neigh_tables    = tbl;
1555         write_unlock(&neigh_tbl_lock);
1556
1557         if (unlikely(tmp)) {
1558                 pr_err("Registering multiple tables for family %d\n",
1559                        tbl->family);
1560                 dump_stack();
1561         }
1562 }
1563 EXPORT_SYMBOL(neigh_table_init);
1564
1565 int neigh_table_clear(struct neigh_table *tbl)
1566 {
1567         struct neigh_table **tp;
1568
1569         /* It is not clean... Fix it to unload IPv6 module safely */
1570         cancel_delayed_work_sync(&tbl->gc_work);
1571         del_timer_sync(&tbl->proxy_timer);
1572         pneigh_queue_purge(&tbl->proxy_queue);
1573         neigh_ifdown(tbl, NULL);
1574         if (atomic_read(&tbl->entries))
1575                 pr_crit("neighbour leakage\n");
1576         write_lock(&neigh_tbl_lock);
1577         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1578                 if (*tp == tbl) {
1579                         *tp = tbl->next;
1580                         break;
1581                 }
1582         }
1583         write_unlock(&neigh_tbl_lock);
1584
1585         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1586                  neigh_hash_free_rcu);
1587         tbl->nht = NULL;
1588
1589         kfree(tbl->phash_buckets);
1590         tbl->phash_buckets = NULL;
1591
1592         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1593
1594         free_percpu(tbl->stats);
1595         tbl->stats = NULL;
1596
1597         return 0;
1598 }
1599 EXPORT_SYMBOL(neigh_table_clear);
1600
1601 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1602 {
1603         struct net *net = sock_net(skb->sk);
1604         struct ndmsg *ndm;
1605         struct nlattr *dst_attr;
1606         struct neigh_table *tbl;
1607         struct net_device *dev = NULL;
1608         int err = -EINVAL;
1609
1610         ASSERT_RTNL();
1611         if (nlmsg_len(nlh) < sizeof(*ndm))
1612                 goto out;
1613
1614         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1615         if (dst_attr == NULL)
1616                 goto out;
1617
1618         ndm = nlmsg_data(nlh);
1619         if (ndm->ndm_ifindex) {
1620                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1621                 if (dev == NULL) {
1622                         err = -ENODEV;
1623                         goto out;
1624                 }
1625         }
1626
1627         read_lock(&neigh_tbl_lock);
1628         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1629                 struct neighbour *neigh;
1630
1631                 if (tbl->family != ndm->ndm_family)
1632                         continue;
1633                 read_unlock(&neigh_tbl_lock);
1634
1635                 if (nla_len(dst_attr) < tbl->key_len)
1636                         goto out;
1637
1638                 if (ndm->ndm_flags & NTF_PROXY) {
1639                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1640                         goto out;
1641                 }
1642
1643                 if (dev == NULL)
1644                         goto out;
1645
1646                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1647                 if (neigh == NULL) {
1648                         err = -ENOENT;
1649                         goto out;
1650                 }
1651
1652                 err = neigh_update(neigh, NULL, NUD_FAILED,
1653                                    NEIGH_UPDATE_F_OVERRIDE |
1654                                    NEIGH_UPDATE_F_ADMIN);
1655                 neigh_release(neigh);
1656                 goto out;
1657         }
1658         read_unlock(&neigh_tbl_lock);
1659         err = -EAFNOSUPPORT;
1660
1661 out:
1662         return err;
1663 }
1664
1665 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1666 {
1667         struct net *net = sock_net(skb->sk);
1668         struct ndmsg *ndm;
1669         struct nlattr *tb[NDA_MAX+1];
1670         struct neigh_table *tbl;
1671         struct net_device *dev = NULL;
1672         int err;
1673
1674         ASSERT_RTNL();
1675         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1676         if (err < 0)
1677                 goto out;
1678
1679         err = -EINVAL;
1680         if (tb[NDA_DST] == NULL)
1681                 goto out;
1682
1683         ndm = nlmsg_data(nlh);
1684         if (ndm->ndm_ifindex) {
1685                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1686                 if (dev == NULL) {
1687                         err = -ENODEV;
1688                         goto out;
1689                 }
1690
1691                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1692                         goto out;
1693         }
1694
1695         read_lock(&neigh_tbl_lock);
1696         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1697                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1698                 struct neighbour *neigh;
1699                 void *dst, *lladdr;
1700
1701                 if (tbl->family != ndm->ndm_family)
1702                         continue;
1703                 read_unlock(&neigh_tbl_lock);
1704
1705                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1706                         goto out;
1707                 dst = nla_data(tb[NDA_DST]);
1708                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1709
1710                 if (ndm->ndm_flags & NTF_PROXY) {
1711                         struct pneigh_entry *pn;
1712
1713                         err = -ENOBUFS;
1714                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1715                         if (pn) {
1716                                 pn->flags = ndm->ndm_flags;
1717                                 err = 0;
1718                         }
1719                         goto out;
1720                 }
1721
1722                 if (dev == NULL)
1723                         goto out;
1724
1725                 neigh = neigh_lookup(tbl, dst, dev);
1726                 if (neigh == NULL) {
1727                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1728                                 err = -ENOENT;
1729                                 goto out;
1730                         }
1731
1732                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1733                         if (IS_ERR(neigh)) {
1734                                 err = PTR_ERR(neigh);
1735                                 goto out;
1736                         }
1737                 } else {
1738                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1739                                 err = -EEXIST;
1740                                 neigh_release(neigh);
1741                                 goto out;
1742                         }
1743
1744                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1745                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1746                 }
1747
1748                 if (ndm->ndm_flags & NTF_USE) {
1749                         neigh_event_send(neigh, NULL);
1750                         err = 0;
1751                 } else
1752                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1753                 neigh_release(neigh);
1754                 goto out;
1755         }
1756
1757         read_unlock(&neigh_tbl_lock);
1758         err = -EAFNOSUPPORT;
1759 out:
1760         return err;
1761 }
1762
1763 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1764 {
1765         struct nlattr *nest;
1766
1767         nest = nla_nest_start(skb, NDTA_PARMS);
1768         if (nest == NULL)
1769                 return -ENOBUFS;
1770
1771         if ((parms->dev &&
1772              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1773             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1774             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1775             /* approximative value for deprecated QUEUE_LEN (in packets) */
1776             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1777                         DIV_ROUND_UP(parms->queue_len_bytes,
1778                                      SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1779             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1780             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1781             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1782             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1783             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1784             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1785                           parms->base_reachable_time) ||
1786             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1787             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1788                           parms->delay_probe_time) ||
1789             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1790             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1791             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1792             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1793                 goto nla_put_failure;
1794         return nla_nest_end(skb, nest);
1795
1796 nla_put_failure:
1797         nla_nest_cancel(skb, nest);
1798         return -EMSGSIZE;
1799 }
1800
1801 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1802                               u32 pid, u32 seq, int type, int flags)
1803 {
1804         struct nlmsghdr *nlh;
1805         struct ndtmsg *ndtmsg;
1806
1807         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1808         if (nlh == NULL)
1809                 return -EMSGSIZE;
1810
1811         ndtmsg = nlmsg_data(nlh);
1812
1813         read_lock_bh(&tbl->lock);
1814         ndtmsg->ndtm_family = tbl->family;
1815         ndtmsg->ndtm_pad1   = 0;
1816         ndtmsg->ndtm_pad2   = 0;
1817
1818         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1819             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1820             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1821             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1822             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1823                 goto nla_put_failure;
1824         {
1825                 unsigned long now = jiffies;
1826                 unsigned int flush_delta = now - tbl->last_flush;
1827                 unsigned int rand_delta = now - tbl->last_rand;
1828                 struct neigh_hash_table *nht;
1829                 struct ndt_config ndc = {
1830                         .ndtc_key_len           = tbl->key_len,
1831                         .ndtc_entry_size        = tbl->entry_size,
1832                         .ndtc_entries           = atomic_read(&tbl->entries),
1833                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1834                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1835                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1836                 };
1837
1838                 rcu_read_lock_bh();
1839                 nht = rcu_dereference_bh(tbl->nht);
1840                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1841                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1842                 rcu_read_unlock_bh();
1843
1844                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1845                         goto nla_put_failure;
1846         }
1847
1848         {
1849                 int cpu;
1850                 struct ndt_stats ndst;
1851
1852                 memset(&ndst, 0, sizeof(ndst));
1853
1854                 for_each_possible_cpu(cpu) {
1855                         struct neigh_statistics *st;
1856
1857                         st = per_cpu_ptr(tbl->stats, cpu);
1858                         ndst.ndts_allocs                += st->allocs;
1859                         ndst.ndts_destroys              += st->destroys;
1860                         ndst.ndts_hash_grows            += st->hash_grows;
1861                         ndst.ndts_res_failed            += st->res_failed;
1862                         ndst.ndts_lookups               += st->lookups;
1863                         ndst.ndts_hits                  += st->hits;
1864                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1865                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1866                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1867                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1868                 }
1869
1870                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1871                         goto nla_put_failure;
1872         }
1873
1874         BUG_ON(tbl->parms.dev);
1875         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1876                 goto nla_put_failure;
1877
1878         read_unlock_bh(&tbl->lock);
1879         return nlmsg_end(skb, nlh);
1880
1881 nla_put_failure:
1882         read_unlock_bh(&tbl->lock);
1883         nlmsg_cancel(skb, nlh);
1884         return -EMSGSIZE;
1885 }
1886
1887 static int neightbl_fill_param_info(struct sk_buff *skb,
1888                                     struct neigh_table *tbl,
1889                                     struct neigh_parms *parms,
1890                                     u32 pid, u32 seq, int type,
1891                                     unsigned int flags)
1892 {
1893         struct ndtmsg *ndtmsg;
1894         struct nlmsghdr *nlh;
1895
1896         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1897         if (nlh == NULL)
1898                 return -EMSGSIZE;
1899
1900         ndtmsg = nlmsg_data(nlh);
1901
1902         read_lock_bh(&tbl->lock);
1903         ndtmsg->ndtm_family = tbl->family;
1904         ndtmsg->ndtm_pad1   = 0;
1905         ndtmsg->ndtm_pad2   = 0;
1906
1907         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1908             neightbl_fill_parms(skb, parms) < 0)
1909                 goto errout;
1910
1911         read_unlock_bh(&tbl->lock);
1912         return nlmsg_end(skb, nlh);
1913 errout:
1914         read_unlock_bh(&tbl->lock);
1915         nlmsg_cancel(skb, nlh);
1916         return -EMSGSIZE;
1917 }
1918
1919 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1920         [NDTA_NAME]             = { .type = NLA_STRING },
1921         [NDTA_THRESH1]          = { .type = NLA_U32 },
1922         [NDTA_THRESH2]          = { .type = NLA_U32 },
1923         [NDTA_THRESH3]          = { .type = NLA_U32 },
1924         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1925         [NDTA_PARMS]            = { .type = NLA_NESTED },
1926 };
1927
1928 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1929         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1930         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1931         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1932         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1933         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1934         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1935         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1936         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1937         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1938         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1939         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1940         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1941         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1942 };
1943
1944 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1945 {
1946         struct net *net = sock_net(skb->sk);
1947         struct neigh_table *tbl;
1948         struct ndtmsg *ndtmsg;
1949         struct nlattr *tb[NDTA_MAX+1];
1950         int err;
1951
1952         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1953                           nl_neightbl_policy);
1954         if (err < 0)
1955                 goto errout;
1956
1957         if (tb[NDTA_NAME] == NULL) {
1958                 err = -EINVAL;
1959                 goto errout;
1960         }
1961
1962         ndtmsg = nlmsg_data(nlh);
1963         read_lock(&neigh_tbl_lock);
1964         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1965                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1966                         continue;
1967
1968                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1969                         break;
1970         }
1971
1972         if (tbl == NULL) {
1973                 err = -ENOENT;
1974                 goto errout_locked;
1975         }
1976
1977         /*
1978          * We acquire tbl->lock to be nice to the periodic timers and
1979          * make sure they always see a consistent set of values.
1980          */
1981         write_lock_bh(&tbl->lock);
1982
1983         if (tb[NDTA_PARMS]) {
1984                 struct nlattr *tbp[NDTPA_MAX+1];
1985                 struct neigh_parms *p;
1986                 int i, ifindex = 0;
1987
1988                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1989                                        nl_ntbl_parm_policy);
1990                 if (err < 0)
1991                         goto errout_tbl_lock;
1992
1993                 if (tbp[NDTPA_IFINDEX])
1994                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1995
1996                 p = lookup_neigh_parms(tbl, net, ifindex);
1997                 if (p == NULL) {
1998                         err = -ENOENT;
1999                         goto errout_tbl_lock;
2000                 }
2001
2002                 for (i = 1; i <= NDTPA_MAX; i++) {
2003                         if (tbp[i] == NULL)
2004                                 continue;
2005
2006                         switch (i) {
2007                         case NDTPA_QUEUE_LEN:
2008                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2009                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2010                                 break;
2011                         case NDTPA_QUEUE_LENBYTES:
2012                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2013                                 break;
2014                         case NDTPA_PROXY_QLEN:
2015                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2016                                 break;
2017                         case NDTPA_APP_PROBES:
2018                                 p->app_probes = nla_get_u32(tbp[i]);
2019                                 break;
2020                         case NDTPA_UCAST_PROBES:
2021                                 p->ucast_probes = nla_get_u32(tbp[i]);
2022                                 break;
2023                         case NDTPA_MCAST_PROBES:
2024                                 p->mcast_probes = nla_get_u32(tbp[i]);
2025                                 break;
2026                         case NDTPA_BASE_REACHABLE_TIME:
2027                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2028                                 break;
2029                         case NDTPA_GC_STALETIME:
2030                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2031                                 break;
2032                         case NDTPA_DELAY_PROBE_TIME:
2033                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2034                                 break;
2035                         case NDTPA_RETRANS_TIME:
2036                                 p->retrans_time = nla_get_msecs(tbp[i]);
2037                                 break;
2038                         case NDTPA_ANYCAST_DELAY:
2039                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2040                                 break;
2041                         case NDTPA_PROXY_DELAY:
2042                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2043                                 break;
2044                         case NDTPA_LOCKTIME:
2045                                 p->locktime = nla_get_msecs(tbp[i]);
2046                                 break;
2047                         }
2048                 }
2049         }
2050
2051         if (tb[NDTA_THRESH1])
2052                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2053
2054         if (tb[NDTA_THRESH2])
2055                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2056
2057         if (tb[NDTA_THRESH3])
2058                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2059
2060         if (tb[NDTA_GC_INTERVAL])
2061                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2062
2063         err = 0;
2064
2065 errout_tbl_lock:
2066         write_unlock_bh(&tbl->lock);
2067 errout_locked:
2068         read_unlock(&neigh_tbl_lock);
2069 errout:
2070         return err;
2071 }
2072
2073 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2074 {
2075         struct net *net = sock_net(skb->sk);
2076         int family, tidx, nidx = 0;
2077         int tbl_skip = cb->args[0];
2078         int neigh_skip = cb->args[1];
2079         struct neigh_table *tbl;
2080
2081         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2082
2083         read_lock(&neigh_tbl_lock);
2084         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2085                 struct neigh_parms *p;
2086
2087                 if (tidx < tbl_skip || (family && tbl->family != family))
2088                         continue;
2089
2090                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2091                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2092                                        NLM_F_MULTI) <= 0)
2093                         break;
2094
2095                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2096                         if (!net_eq(neigh_parms_net(p), net))
2097                                 continue;
2098
2099                         if (nidx < neigh_skip)
2100                                 goto next;
2101
2102                         if (neightbl_fill_param_info(skb, tbl, p,
2103                                                      NETLINK_CB(cb->skb).pid,
2104                                                      cb->nlh->nlmsg_seq,
2105                                                      RTM_NEWNEIGHTBL,
2106                                                      NLM_F_MULTI) <= 0)
2107                                 goto out;
2108                 next:
2109                         nidx++;
2110                 }
2111
2112                 neigh_skip = 0;
2113         }
2114 out:
2115         read_unlock(&neigh_tbl_lock);
2116         cb->args[0] = tidx;
2117         cb->args[1] = nidx;
2118
2119         return skb->len;
2120 }
2121
2122 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2123                            u32 pid, u32 seq, int type, unsigned int flags)
2124 {
2125         unsigned long now = jiffies;
2126         struct nda_cacheinfo ci;
2127         struct nlmsghdr *nlh;
2128         struct ndmsg *ndm;
2129
2130         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2131         if (nlh == NULL)
2132                 return -EMSGSIZE;
2133
2134         ndm = nlmsg_data(nlh);
2135         ndm->ndm_family  = neigh->ops->family;
2136         ndm->ndm_pad1    = 0;
2137         ndm->ndm_pad2    = 0;
2138         ndm->ndm_flags   = neigh->flags;
2139         ndm->ndm_type    = neigh->type;
2140         ndm->ndm_ifindex = neigh->dev->ifindex;
2141
2142         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2143                 goto nla_put_failure;
2144
2145         read_lock_bh(&neigh->lock);
2146         ndm->ndm_state   = neigh->nud_state;
2147         if (neigh->nud_state & NUD_VALID) {
2148                 char haddr[MAX_ADDR_LEN];
2149
2150                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2151                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2152                         read_unlock_bh(&neigh->lock);
2153                         goto nla_put_failure;
2154                 }
2155         }
2156
2157         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2158         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2159         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2160         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2161         read_unlock_bh(&neigh->lock);
2162
2163         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2164             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2165                 goto nla_put_failure;
2166
2167         return nlmsg_end(skb, nlh);
2168
2169 nla_put_failure:
2170         nlmsg_cancel(skb, nlh);
2171         return -EMSGSIZE;
2172 }
2173
2174 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2175                             u32 pid, u32 seq, int type, unsigned int flags,
2176                             struct neigh_table *tbl)
2177 {
2178         struct nlmsghdr *nlh;
2179         struct ndmsg *ndm;
2180
2181         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2182         if (nlh == NULL)
2183                 return -EMSGSIZE;
2184
2185         ndm = nlmsg_data(nlh);
2186         ndm->ndm_family  = tbl->family;
2187         ndm->ndm_pad1    = 0;
2188         ndm->ndm_pad2    = 0;
2189         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2190         ndm->ndm_type    = NDA_DST;
2191         ndm->ndm_ifindex = pn->dev->ifindex;
2192         ndm->ndm_state   = NUD_NONE;
2193
2194         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2195                 goto nla_put_failure;
2196
2197         return nlmsg_end(skb, nlh);
2198
2199 nla_put_failure:
2200         nlmsg_cancel(skb, nlh);
2201         return -EMSGSIZE;
2202 }
2203
2204 static void neigh_update_notify(struct neighbour *neigh)
2205 {
2206         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2207         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2208 }
2209
2210 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2211                             struct netlink_callback *cb)
2212 {
2213         struct net *net = sock_net(skb->sk);
2214         struct neighbour *n;
2215         int rc, h, s_h = cb->args[1];
2216         int idx, s_idx = idx = cb->args[2];
2217         struct neigh_hash_table *nht;
2218
2219         rcu_read_lock_bh();
2220         nht = rcu_dereference_bh(tbl->nht);
2221
2222         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2223                 if (h > s_h)
2224                         s_idx = 0;
2225                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2226                      n != NULL;
2227                      n = rcu_dereference_bh(n->next)) {
2228                         if (!net_eq(dev_net(n->dev), net))
2229                                 continue;
2230                         if (idx < s_idx)
2231                                 goto next;
2232                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2233                                             cb->nlh->nlmsg_seq,
2234                                             RTM_NEWNEIGH,
2235                                             NLM_F_MULTI) <= 0) {
2236                                 rc = -1;
2237                                 goto out;
2238                         }
2239 next:
2240                         idx++;
2241                 }
2242         }
2243         rc = skb->len;
2244 out:
2245         rcu_read_unlock_bh();
2246         cb->args[1] = h;
2247         cb->args[2] = idx;
2248         return rc;
2249 }
2250
2251 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2252                              struct netlink_callback *cb)
2253 {
2254         struct pneigh_entry *n;
2255         struct net *net = sock_net(skb->sk);
2256         int rc, h, s_h = cb->args[3];
2257         int idx, s_idx = idx = cb->args[4];
2258
2259         read_lock_bh(&tbl->lock);
2260
2261         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2262                 if (h > s_h)
2263                         s_idx = 0;
2264                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2265                         if (dev_net(n->dev) != net)
2266                                 continue;
2267                         if (idx < s_idx)
2268                                 goto next;
2269                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2270                                             cb->nlh->nlmsg_seq,
2271                                             RTM_NEWNEIGH,
2272                                             NLM_F_MULTI, tbl) <= 0) {
2273                                 read_unlock_bh(&tbl->lock);
2274                                 rc = -1;
2275                                 goto out;
2276                         }
2277                 next:
2278                         idx++;
2279                 }
2280         }
2281
2282         read_unlock_bh(&tbl->lock);
2283         rc = skb->len;
2284 out:
2285         cb->args[3] = h;
2286         cb->args[4] = idx;
2287         return rc;
2288
2289 }
2290
2291 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2292 {
2293         struct neigh_table *tbl;
2294         int t, family, s_t;
2295         int proxy = 0;
2296         int err;
2297
2298         read_lock(&neigh_tbl_lock);
2299         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2300
2301         /* check for full ndmsg structure presence, family member is
2302          * the same for both structures
2303          */
2304         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2305             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2306                 proxy = 1;
2307
2308         s_t = cb->args[0];
2309
2310         for (tbl = neigh_tables, t = 0; tbl;
2311              tbl = tbl->next, t++) {
2312                 if (t < s_t || (family && tbl->family != family))
2313                         continue;
2314                 if (t > s_t)
2315                         memset(&cb->args[1], 0, sizeof(cb->args) -
2316                                                 sizeof(cb->args[0]));
2317                 if (proxy)
2318                         err = pneigh_dump_table(tbl, skb, cb);
2319                 else
2320                         err = neigh_dump_table(tbl, skb, cb);
2321                 if (err < 0)
2322                         break;
2323         }
2324         read_unlock(&neigh_tbl_lock);
2325
2326         cb->args[0] = t;
2327         return skb->len;
2328 }
2329
2330 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2331 {
2332         int chain;
2333         struct neigh_hash_table *nht;
2334
2335         rcu_read_lock_bh();
2336         nht = rcu_dereference_bh(tbl->nht);
2337
2338         read_lock(&tbl->lock); /* avoid resizes */
2339         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2340                 struct neighbour *n;
2341
2342                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2343                      n != NULL;
2344                      n = rcu_dereference_bh(n->next))
2345                         cb(n, cookie);
2346         }
2347         read_unlock(&tbl->lock);
2348         rcu_read_unlock_bh();
2349 }
2350 EXPORT_SYMBOL(neigh_for_each);
2351
2352 /* The tbl->lock must be held as a writer and BH disabled. */
2353 void __neigh_for_each_release(struct neigh_table *tbl,
2354                               int (*cb)(struct neighbour *))
2355 {
2356         int chain;
2357         struct neigh_hash_table *nht;
2358
2359         nht = rcu_dereference_protected(tbl->nht,
2360                                         lockdep_is_held(&tbl->lock));
2361         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2362                 struct neighbour *n;
2363                 struct neighbour __rcu **np;
2364
2365                 np = &nht->hash_buckets[chain];
2366                 while ((n = rcu_dereference_protected(*np,
2367                                         lockdep_is_held(&tbl->lock))) != NULL) {
2368                         int release;
2369
2370                         write_lock(&n->lock);
2371                         release = cb(n);
2372                         if (release) {
2373                                 rcu_assign_pointer(*np,
2374                                         rcu_dereference_protected(n->next,
2375                                                 lockdep_is_held(&tbl->lock)));
2376                                 n->dead = 1;
2377                         } else
2378                                 np = &n->next;
2379                         write_unlock(&n->lock);
2380                         if (release)
2381                                 neigh_cleanup_and_release(n);
2382                 }
2383         }
2384 }
2385 EXPORT_SYMBOL(__neigh_for_each_release);
2386
2387 #ifdef CONFIG_PROC_FS
2388
2389 static struct neighbour *neigh_get_first(struct seq_file *seq)
2390 {
2391         struct neigh_seq_state *state = seq->private;
2392         struct net *net = seq_file_net(seq);
2393         struct neigh_hash_table *nht = state->nht;
2394         struct neighbour *n = NULL;
2395         int bucket = state->bucket;
2396
2397         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2398         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2399                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2400
2401                 while (n) {
2402                         if (!net_eq(dev_net(n->dev), net))
2403                                 goto next;
2404                         if (state->neigh_sub_iter) {
2405                                 loff_t fakep = 0;
2406                                 void *v;
2407
2408                                 v = state->neigh_sub_iter(state, n, &fakep);
2409                                 if (!v)
2410                                         goto next;
2411                         }
2412                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2413                                 break;
2414                         if (n->nud_state & ~NUD_NOARP)
2415                                 break;
2416 next:
2417                         n = rcu_dereference_bh(n->next);
2418                 }
2419
2420                 if (n)
2421                         break;
2422         }
2423         state->bucket = bucket;
2424
2425         return n;
2426 }
2427
2428 static struct neighbour *neigh_get_next(struct seq_file *seq,
2429                                         struct neighbour *n,
2430                                         loff_t *pos)
2431 {
2432         struct neigh_seq_state *state = seq->private;
2433         struct net *net = seq_file_net(seq);
2434         struct neigh_hash_table *nht = state->nht;
2435
2436         if (state->neigh_sub_iter) {
2437                 void *v = state->neigh_sub_iter(state, n, pos);
2438                 if (v)
2439                         return n;
2440         }
2441         n = rcu_dereference_bh(n->next);
2442
2443         while (1) {
2444                 while (n) {
2445                         if (!net_eq(dev_net(n->dev), net))
2446                                 goto next;
2447                         if (state->neigh_sub_iter) {
2448                                 void *v = state->neigh_sub_iter(state, n, pos);
2449                                 if (v)
2450                                         return n;
2451                                 goto next;
2452                         }
2453                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2454                                 break;
2455
2456                         if (n->nud_state & ~NUD_NOARP)
2457                                 break;
2458 next:
2459                         n = rcu_dereference_bh(n->next);
2460                 }
2461
2462                 if (n)
2463                         break;
2464
2465                 if (++state->bucket >= (1 << nht->hash_shift))
2466                         break;
2467
2468                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2469         }
2470
2471         if (n && pos)
2472                 --(*pos);
2473         return n;
2474 }
2475
2476 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2477 {
2478         struct neighbour *n = neigh_get_first(seq);
2479
2480         if (n) {
2481                 --(*pos);
2482                 while (*pos) {
2483                         n = neigh_get_next(seq, n, pos);
2484                         if (!n)
2485                                 break;
2486                 }
2487         }
2488         return *pos ? NULL : n;
2489 }
2490
2491 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2492 {
2493         struct neigh_seq_state *state = seq->private;
2494         struct net *net = seq_file_net(seq);
2495         struct neigh_table *tbl = state->tbl;
2496         struct pneigh_entry *pn = NULL;
2497         int bucket = state->bucket;
2498
2499         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2500         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2501                 pn = tbl->phash_buckets[bucket];
2502                 while (pn && !net_eq(pneigh_net(pn), net))
2503                         pn = pn->next;
2504                 if (pn)
2505                         break;
2506         }
2507         state->bucket = bucket;
2508
2509         return pn;
2510 }
2511
2512 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2513                                             struct pneigh_entry *pn,
2514                                             loff_t *pos)
2515 {
2516         struct neigh_seq_state *state = seq->private;
2517         struct net *net = seq_file_net(seq);
2518         struct neigh_table *tbl = state->tbl;
2519
2520         do {
2521                 pn = pn->next;
2522         } while (pn && !net_eq(pneigh_net(pn), net));
2523
2524         while (!pn) {
2525                 if (++state->bucket > PNEIGH_HASHMASK)
2526                         break;
2527                 pn = tbl->phash_buckets[state->bucket];
2528                 while (pn && !net_eq(pneigh_net(pn), net))
2529                         pn = pn->next;
2530                 if (pn)
2531                         break;
2532         }
2533
2534         if (pn && pos)
2535                 --(*pos);
2536
2537         return pn;
2538 }
2539
2540 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2541 {
2542         struct pneigh_entry *pn = pneigh_get_first(seq);
2543
2544         if (pn) {
2545                 --(*pos);
2546                 while (*pos) {
2547                         pn = pneigh_get_next(seq, pn, pos);
2548                         if (!pn)
2549                                 break;
2550                 }
2551         }
2552         return *pos ? NULL : pn;
2553 }
2554
2555 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2556 {
2557         struct neigh_seq_state *state = seq->private;
2558         void *rc;
2559         loff_t idxpos = *pos;
2560
2561         rc = neigh_get_idx(seq, &idxpos);
2562         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2563                 rc = pneigh_get_idx(seq, &idxpos);
2564
2565         return rc;
2566 }
2567
2568 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2569         __acquires(rcu_bh)
2570 {
2571         struct neigh_seq_state *state = seq->private;
2572
2573         state->tbl = tbl;
2574         state->bucket = 0;
2575         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2576
2577         rcu_read_lock_bh();
2578         state->nht = rcu_dereference_bh(tbl->nht);
2579
2580         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2581 }
2582 EXPORT_SYMBOL(neigh_seq_start);
2583
2584 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2585 {
2586         struct neigh_seq_state *state;
2587         void *rc;
2588
2589         if (v == SEQ_START_TOKEN) {
2590                 rc = neigh_get_first(seq);
2591                 goto out;
2592         }
2593
2594         state = seq->private;
2595         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2596                 rc = neigh_get_next(seq, v, NULL);
2597                 if (rc)
2598                         goto out;
2599                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2600                         rc = pneigh_get_first(seq);
2601         } else {
2602                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2603                 rc = pneigh_get_next(seq, v, NULL);
2604         }
2605 out:
2606         ++(*pos);
2607         return rc;
2608 }
2609 EXPORT_SYMBOL(neigh_seq_next);
2610
2611 void neigh_seq_stop(struct seq_file *seq, void *v)
2612         __releases(rcu_bh)
2613 {
2614         rcu_read_unlock_bh();
2615 }
2616 EXPORT_SYMBOL(neigh_seq_stop);
2617
2618 /* statistics via seq_file */
2619
2620 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2621 {
2622         struct neigh_table *tbl = seq->private;
2623         int cpu;
2624
2625         if (*pos == 0)
2626                 return SEQ_START_TOKEN;
2627
2628         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2629                 if (!cpu_possible(cpu))
2630                         continue;
2631                 *pos = cpu+1;
2632                 return per_cpu_ptr(tbl->stats, cpu);
2633         }
2634         return NULL;
2635 }
2636
2637 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2638 {
2639         struct neigh_table *tbl = seq->private;
2640         int cpu;
2641
2642         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2643                 if (!cpu_possible(cpu))
2644                         continue;
2645                 *pos = cpu+1;
2646                 return per_cpu_ptr(tbl->stats, cpu);
2647         }
2648         return NULL;
2649 }
2650
2651 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2652 {
2653
2654 }
2655
2656 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2657 {
2658         struct neigh_table *tbl = seq->private;
2659         struct neigh_statistics *st = v;
2660
2661         if (v == SEQ_START_TOKEN) {
2662                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2663                 return 0;
2664         }
2665
2666         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2667                         "%08lx %08lx  %08lx %08lx %08lx\n",
2668                    atomic_read(&tbl->entries),
2669
2670                    st->allocs,
2671                    st->destroys,
2672                    st->hash_grows,
2673
2674                    st->lookups,
2675                    st->hits,
2676
2677                    st->res_failed,
2678
2679                    st->rcv_probes_mcast,
2680                    st->rcv_probes_ucast,
2681
2682                    st->periodic_gc_runs,
2683                    st->forced_gc_runs,
2684                    st->unres_discards
2685                    );
2686
2687         return 0;
2688 }
2689
2690 static const struct seq_operations neigh_stat_seq_ops = {
2691         .start  = neigh_stat_seq_start,
2692         .next   = neigh_stat_seq_next,
2693         .stop   = neigh_stat_seq_stop,
2694         .show   = neigh_stat_seq_show,
2695 };
2696
2697 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2698 {
2699         int ret = seq_open(file, &neigh_stat_seq_ops);
2700
2701         if (!ret) {
2702                 struct seq_file *sf = file->private_data;
2703                 sf->private = PDE(inode)->data;
2704         }
2705         return ret;
2706 };
2707
2708 static const struct file_operations neigh_stat_seq_fops = {
2709         .owner   = THIS_MODULE,
2710         .open    = neigh_stat_seq_open,
2711         .read    = seq_read,
2712         .llseek  = seq_lseek,
2713         .release = seq_release,
2714 };
2715
2716 #endif /* CONFIG_PROC_FS */
2717
2718 static inline size_t neigh_nlmsg_size(void)
2719 {
2720         return NLMSG_ALIGN(sizeof(struct ndmsg))
2721                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2722                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2723                + nla_total_size(sizeof(struct nda_cacheinfo))
2724                + nla_total_size(4); /* NDA_PROBES */
2725 }
2726
2727 static void __neigh_notify(struct neighbour *n, int type, int flags)
2728 {
2729         struct net *net = dev_net(n->dev);
2730         struct sk_buff *skb;
2731         int err = -ENOBUFS;
2732
2733         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2734         if (skb == NULL)
2735                 goto errout;
2736
2737         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2738         if (err < 0) {
2739                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2740                 WARN_ON(err == -EMSGSIZE);
2741                 kfree_skb(skb);
2742                 goto errout;
2743         }
2744         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2745         return;
2746 errout:
2747         if (err < 0)
2748                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2749 }
2750
2751 #ifdef CONFIG_ARPD
2752 void neigh_app_ns(struct neighbour *n)
2753 {
2754         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2755 }
2756 EXPORT_SYMBOL(neigh_app_ns);
2757 #endif /* CONFIG_ARPD */
2758
2759 #ifdef CONFIG_SYSCTL
2760
2761 static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2762                            size_t *lenp, loff_t *ppos)
2763 {
2764         int size, ret;
2765         ctl_table tmp = *ctl;
2766
2767         tmp.data = &size;
2768         size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2769         ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2770         if (write && !ret)
2771                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2772         return ret;
2773 }
2774
2775 enum {
2776         NEIGH_VAR_MCAST_PROBE,
2777         NEIGH_VAR_UCAST_PROBE,
2778         NEIGH_VAR_APP_PROBE,
2779         NEIGH_VAR_RETRANS_TIME,
2780         NEIGH_VAR_BASE_REACHABLE_TIME,
2781         NEIGH_VAR_DELAY_PROBE_TIME,
2782         NEIGH_VAR_GC_STALETIME,
2783         NEIGH_VAR_QUEUE_LEN,
2784         NEIGH_VAR_QUEUE_LEN_BYTES,
2785         NEIGH_VAR_PROXY_QLEN,
2786         NEIGH_VAR_ANYCAST_DELAY,
2787         NEIGH_VAR_PROXY_DELAY,
2788         NEIGH_VAR_LOCKTIME,
2789         NEIGH_VAR_RETRANS_TIME_MS,
2790         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2791         NEIGH_VAR_GC_INTERVAL,
2792         NEIGH_VAR_GC_THRESH1,
2793         NEIGH_VAR_GC_THRESH2,
2794         NEIGH_VAR_GC_THRESH3,
2795         NEIGH_VAR_MAX
2796 };
2797
2798 static struct neigh_sysctl_table {
2799         struct ctl_table_header *sysctl_header;
2800         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2801 } neigh_sysctl_template __read_mostly = {
2802         .neigh_vars = {
2803                 [NEIGH_VAR_MCAST_PROBE] = {
2804                         .procname       = "mcast_solicit",
2805                         .maxlen         = sizeof(int),
2806                         .mode           = 0644,
2807                         .proc_handler   = proc_dointvec,
2808                 },
2809                 [NEIGH_VAR_UCAST_PROBE] = {
2810                         .procname       = "ucast_solicit",
2811                         .maxlen         = sizeof(int),
2812                         .mode           = 0644,
2813                         .proc_handler   = proc_dointvec,
2814                 },
2815                 [NEIGH_VAR_APP_PROBE] = {
2816                         .procname       = "app_solicit",
2817                         .maxlen         = sizeof(int),
2818                         .mode           = 0644,
2819                         .proc_handler   = proc_dointvec,
2820                 },
2821                 [NEIGH_VAR_RETRANS_TIME] = {
2822                         .procname       = "retrans_time",
2823                         .maxlen         = sizeof(int),
2824                         .mode           = 0644,
2825                         .proc_handler   = proc_dointvec_userhz_jiffies,
2826                 },
2827                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2828                         .procname       = "base_reachable_time",
2829                         .maxlen         = sizeof(int),
2830                         .mode           = 0644,
2831                         .proc_handler   = proc_dointvec_jiffies,
2832                 },
2833                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2834                         .procname       = "delay_first_probe_time",
2835                         .maxlen         = sizeof(int),
2836                         .mode           = 0644,
2837                         .proc_handler   = proc_dointvec_jiffies,
2838                 },
2839                 [NEIGH_VAR_GC_STALETIME] = {
2840                         .procname       = "gc_stale_time",
2841                         .maxlen         = sizeof(int),
2842                         .mode           = 0644,
2843                         .proc_handler   = proc_dointvec_jiffies,
2844                 },
2845                 [NEIGH_VAR_QUEUE_LEN] = {
2846                         .procname       = "unres_qlen",
2847                         .maxlen         = sizeof(int),
2848                         .mode           = 0644,
2849                         .proc_handler   = proc_unres_qlen,
2850                 },
2851                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2852                         .procname       = "unres_qlen_bytes",
2853                         .maxlen         = sizeof(int),
2854                         .mode           = 0644,
2855                         .proc_handler   = proc_dointvec,
2856                 },
2857                 [NEIGH_VAR_PROXY_QLEN] = {
2858                         .procname       = "proxy_qlen",
2859                         .maxlen         = sizeof(int),
2860                         .mode           = 0644,
2861                         .proc_handler   = proc_dointvec,
2862                 },
2863                 [NEIGH_VAR_ANYCAST_DELAY] = {
2864                         .procname       = "anycast_delay",
2865                         .maxlen         = sizeof(int),
2866                         .mode           = 0644,
2867                         .proc_handler   = proc_dointvec_userhz_jiffies,
2868                 },
2869                 [NEIGH_VAR_PROXY_DELAY] = {
2870                         .procname       = "proxy_delay",
2871                         .maxlen         = sizeof(int),
2872                         .mode           = 0644,
2873                         .proc_handler   = proc_dointvec_userhz_jiffies,
2874                 },
2875                 [NEIGH_VAR_LOCKTIME] = {
2876                         .procname       = "locktime",
2877                         .maxlen         = sizeof(int),
2878                         .mode           = 0644,
2879                         .proc_handler   = proc_dointvec_userhz_jiffies,
2880                 },
2881                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2882                         .procname       = "retrans_time_ms",
2883                         .maxlen         = sizeof(int),
2884                         .mode           = 0644,
2885                         .proc_handler   = proc_dointvec_ms_jiffies,
2886                 },
2887                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2888                         .procname       = "base_reachable_time_ms",
2889                         .maxlen         = sizeof(int),
2890                         .mode           = 0644,
2891                         .proc_handler   = proc_dointvec_ms_jiffies,
2892                 },
2893                 [NEIGH_VAR_GC_INTERVAL] = {
2894                         .procname       = "gc_interval",
2895                         .maxlen         = sizeof(int),
2896                         .mode           = 0644,
2897                         .proc_handler   = proc_dointvec_jiffies,
2898                 },
2899                 [NEIGH_VAR_GC_THRESH1] = {
2900                         .procname       = "gc_thresh1",
2901                         .maxlen         = sizeof(int),
2902                         .mode           = 0644,
2903                         .proc_handler   = proc_dointvec,
2904                 },
2905                 [NEIGH_VAR_GC_THRESH2] = {
2906                         .procname       = "gc_thresh2",
2907                         .maxlen         = sizeof(int),
2908                         .mode           = 0644,
2909                         .proc_handler   = proc_dointvec,
2910                 },
2911                 [NEIGH_VAR_GC_THRESH3] = {
2912                         .procname       = "gc_thresh3",
2913                         .maxlen         = sizeof(int),
2914                         .mode           = 0644,
2915                         .proc_handler   = proc_dointvec,
2916                 },
2917                 {},
2918         },
2919 };
2920
2921 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2922                           char *p_name, proc_handler *handler)
2923 {
2924         struct neigh_sysctl_table *t;
2925         const char *dev_name_source = NULL;
2926         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2927
2928         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2929         if (!t)
2930                 goto err;
2931
2932         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2933         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2934         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2935         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2936         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2937         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2938         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2939         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2940         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2941         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2942         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2943         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2944         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2945         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2946         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2947
2948         if (dev) {
2949                 dev_name_source = dev->name;
2950                 /* Terminate the table early */
2951                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2952                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2953         } else {
2954                 dev_name_source = "default";
2955                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2956                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2957                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2958                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2959         }
2960
2961
2962         if (handler) {
2963                 /* RetransTime */
2964                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2965                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2966                 /* ReachableTime */
2967                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2968                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2969                 /* RetransTime (in milliseconds)*/
2970                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2971                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2972                 /* ReachableTime (in milliseconds) */
2973                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2974                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2975         }
2976
2977         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2978                 p_name, dev_name_source);
2979         t->sysctl_header =
2980                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2981         if (!t->sysctl_header)
2982                 goto free;
2983
2984         p->sysctl_table = t;
2985         return 0;
2986
2987 free:
2988         kfree(t);
2989 err:
2990         return -ENOBUFS;
2991 }
2992 EXPORT_SYMBOL(neigh_sysctl_register);
2993
2994 void neigh_sysctl_unregister(struct neigh_parms *p)
2995 {
2996         if (p->sysctl_table) {
2997                 struct neigh_sysctl_table *t = p->sysctl_table;
2998                 p->sysctl_table = NULL;
2999                 unregister_net_sysctl_table(t->sysctl_header);
3000                 kfree(t);
3001         }
3002 }
3003 EXPORT_SYMBOL(neigh_sysctl_unregister);
3004
3005 #endif  /* CONFIG_SYSCTL */
3006
3007 static int __init neigh_init(void)
3008 {
3009         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3010         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3011         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3012
3013         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3014                       NULL);
3015         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3016
3017         return 0;
3018 }
3019
3020 subsys_initcall(neigh_init);
3021