]> Pileus Git - ~andy/linux/blob - net/core/neighbour.c
neigh: only allow init_net to change the default neigh_parms
[~andy/linux] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41
42 #define DEBUG
43 #define NEIGH_DEBUG 1
44 #define neigh_dbg(level, fmt, ...)              \
45 do {                                            \
46         if (level <= NEIGH_DEBUG)               \
47                 pr_debug(fmt, ##__VA_ARGS__);   \
48 } while (0)
49
50 #define PNEIGH_HASHMASK         0xF
51
52 static void neigh_timer_handler(unsigned long arg);
53 static void __neigh_notify(struct neighbour *n, int type, int flags);
54 static void neigh_update_notify(struct neighbour *neigh);
55 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
56
57 static struct neigh_table *neigh_tables;
58 #ifdef CONFIG_PROC_FS
59 static const struct file_operations neigh_stat_seq_fops;
60 #endif
61
62 /*
63    Neighbour hash table buckets are protected with rwlock tbl->lock.
64
65    - All the scans/updates to hash buckets MUST be made under this lock.
66    - NOTHING clever should be made under this lock: no callbacks
67      to protocol backends, no attempts to send something to network.
68      It will result in deadlocks, if backend/driver wants to use neighbour
69      cache.
70    - If the entry requires some non-trivial actions, increase
71      its reference count and release table lock.
72
73    Neighbour entries are protected:
74    - with reference count.
75    - with rwlock neigh->lock
76
77    Reference count prevents destruction.
78
79    neigh->lock mainly serializes ll address data and its validity state.
80    However, the same lock is used to protect another entry fields:
81     - timer
82     - resolution queue
83
84    Again, nothing clever shall be made under neigh->lock,
85    the most complicated procedure, which we allow is dev->hard_header.
86    It is supposed, that dev->hard_header is simplistic and does
87    not make callbacks to neighbour tables.
88
89    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
90    list of neighbour tables. This list is used only in process context,
91  */
92
93 static DEFINE_RWLOCK(neigh_tbl_lock);
94
95 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
96 {
97         kfree_skb(skb);
98         return -ENETDOWN;
99 }
100
101 static void neigh_cleanup_and_release(struct neighbour *neigh)
102 {
103         if (neigh->parms->neigh_cleanup)
104                 neigh->parms->neigh_cleanup(neigh);
105
106         __neigh_notify(neigh, RTM_DELNEIGH, 0);
107         neigh_release(neigh);
108 }
109
110 /*
111  * It is random distribution in the interval (1/2)*base...(3/2)*base.
112  * It corresponds to default IPv6 settings and is not overridable,
113  * because it is really reasonable choice.
114  */
115
116 unsigned long neigh_rand_reach_time(unsigned long base)
117 {
118         return base ? (net_random() % base) + (base >> 1) : 0;
119 }
120 EXPORT_SYMBOL(neigh_rand_reach_time);
121
122
123 static int neigh_forced_gc(struct neigh_table *tbl)
124 {
125         int shrunk = 0;
126         int i;
127         struct neigh_hash_table *nht;
128
129         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
130
131         write_lock_bh(&tbl->lock);
132         nht = rcu_dereference_protected(tbl->nht,
133                                         lockdep_is_held(&tbl->lock));
134         for (i = 0; i < (1 << nht->hash_shift); i++) {
135                 struct neighbour *n;
136                 struct neighbour __rcu **np;
137
138                 np = &nht->hash_buckets[i];
139                 while ((n = rcu_dereference_protected(*np,
140                                         lockdep_is_held(&tbl->lock))) != NULL) {
141                         /* Neighbour record may be discarded if:
142                          * - nobody refers to it.
143                          * - it is not permanent
144                          */
145                         write_lock(&n->lock);
146                         if (atomic_read(&n->refcnt) == 1 &&
147                             !(n->nud_state & NUD_PERMANENT)) {
148                                 rcu_assign_pointer(*np,
149                                         rcu_dereference_protected(n->next,
150                                                   lockdep_is_held(&tbl->lock)));
151                                 n->dead = 1;
152                                 shrunk  = 1;
153                                 write_unlock(&n->lock);
154                                 neigh_cleanup_and_release(n);
155                                 continue;
156                         }
157                         write_unlock(&n->lock);
158                         np = &n->next;
159                 }
160         }
161
162         tbl->last_flush = jiffies;
163
164         write_unlock_bh(&tbl->lock);
165
166         return shrunk;
167 }
168
169 static void neigh_add_timer(struct neighbour *n, unsigned long when)
170 {
171         neigh_hold(n);
172         if (unlikely(mod_timer(&n->timer, when))) {
173                 printk("NEIGH: BUG, double timer add, state is %x\n",
174                        n->nud_state);
175                 dump_stack();
176         }
177 }
178
179 static int neigh_del_timer(struct neighbour *n)
180 {
181         if ((n->nud_state & NUD_IN_TIMER) &&
182             del_timer(&n->timer)) {
183                 neigh_release(n);
184                 return 1;
185         }
186         return 0;
187 }
188
189 static void pneigh_queue_purge(struct sk_buff_head *list)
190 {
191         struct sk_buff *skb;
192
193         while ((skb = skb_dequeue(list)) != NULL) {
194                 dev_put(skb->dev);
195                 kfree_skb(skb);
196         }
197 }
198
199 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200 {
201         int i;
202         struct neigh_hash_table *nht;
203
204         nht = rcu_dereference_protected(tbl->nht,
205                                         lockdep_is_held(&tbl->lock));
206
207         for (i = 0; i < (1 << nht->hash_shift); i++) {
208                 struct neighbour *n;
209                 struct neighbour __rcu **np = &nht->hash_buckets[i];
210
211                 while ((n = rcu_dereference_protected(*np,
212                                         lockdep_is_held(&tbl->lock))) != NULL) {
213                         if (dev && n->dev != dev) {
214                                 np = &n->next;
215                                 continue;
216                         }
217                         rcu_assign_pointer(*np,
218                                    rcu_dereference_protected(n->next,
219                                                 lockdep_is_held(&tbl->lock)));
220                         write_lock(&n->lock);
221                         neigh_del_timer(n);
222                         n->dead = 1;
223
224                         if (atomic_read(&n->refcnt) != 1) {
225                                 /* The most unpleasant situation.
226                                    We must destroy neighbour entry,
227                                    but someone still uses it.
228
229                                    The destroy will be delayed until
230                                    the last user releases us, but
231                                    we must kill timers etc. and move
232                                    it to safe state.
233                                  */
234                                 skb_queue_purge(&n->arp_queue);
235                                 n->arp_queue_len_bytes = 0;
236                                 n->output = neigh_blackhole;
237                                 if (n->nud_state & NUD_VALID)
238                                         n->nud_state = NUD_NOARP;
239                                 else
240                                         n->nud_state = NUD_NONE;
241                                 neigh_dbg(2, "neigh %p is stray\n", n);
242                         }
243                         write_unlock(&n->lock);
244                         neigh_cleanup_and_release(n);
245                 }
246         }
247 }
248
249 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
250 {
251         write_lock_bh(&tbl->lock);
252         neigh_flush_dev(tbl, dev);
253         write_unlock_bh(&tbl->lock);
254 }
255 EXPORT_SYMBOL(neigh_changeaddr);
256
257 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
258 {
259         write_lock_bh(&tbl->lock);
260         neigh_flush_dev(tbl, dev);
261         pneigh_ifdown(tbl, dev);
262         write_unlock_bh(&tbl->lock);
263
264         del_timer_sync(&tbl->proxy_timer);
265         pneigh_queue_purge(&tbl->proxy_queue);
266         return 0;
267 }
268 EXPORT_SYMBOL(neigh_ifdown);
269
270 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
271 {
272         struct neighbour *n = NULL;
273         unsigned long now = jiffies;
274         int entries;
275
276         entries = atomic_inc_return(&tbl->entries) - 1;
277         if (entries >= tbl->gc_thresh3 ||
278             (entries >= tbl->gc_thresh2 &&
279              time_after(now, tbl->last_flush + 5 * HZ))) {
280                 if (!neigh_forced_gc(tbl) &&
281                     entries >= tbl->gc_thresh3)
282                         goto out_entries;
283         }
284
285         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
286         if (!n)
287                 goto out_entries;
288
289         skb_queue_head_init(&n->arp_queue);
290         rwlock_init(&n->lock);
291         seqlock_init(&n->ha_lock);
292         n->updated        = n->used = now;
293         n->nud_state      = NUD_NONE;
294         n->output         = neigh_blackhole;
295         seqlock_init(&n->hh.hh_lock);
296         n->parms          = neigh_parms_clone(&tbl->parms);
297         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
298
299         NEIGH_CACHE_STAT_INC(tbl, allocs);
300         n->tbl            = tbl;
301         atomic_set(&n->refcnt, 1);
302         n->dead           = 1;
303 out:
304         return n;
305
306 out_entries:
307         atomic_dec(&tbl->entries);
308         goto out;
309 }
310
311 static void neigh_get_hash_rnd(u32 *x)
312 {
313         get_random_bytes(x, sizeof(*x));
314         *x |= 1;
315 }
316
317 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
318 {
319         size_t size = (1 << shift) * sizeof(struct neighbour *);
320         struct neigh_hash_table *ret;
321         struct neighbour __rcu **buckets;
322         int i;
323
324         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
325         if (!ret)
326                 return NULL;
327         if (size <= PAGE_SIZE)
328                 buckets = kzalloc(size, GFP_ATOMIC);
329         else
330                 buckets = (struct neighbour __rcu **)
331                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
332                                            get_order(size));
333         if (!buckets) {
334                 kfree(ret);
335                 return NULL;
336         }
337         ret->hash_buckets = buckets;
338         ret->hash_shift = shift;
339         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
340                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
341         return ret;
342 }
343
344 static void neigh_hash_free_rcu(struct rcu_head *head)
345 {
346         struct neigh_hash_table *nht = container_of(head,
347                                                     struct neigh_hash_table,
348                                                     rcu);
349         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
350         struct neighbour __rcu **buckets = nht->hash_buckets;
351
352         if (size <= PAGE_SIZE)
353                 kfree(buckets);
354         else
355                 free_pages((unsigned long)buckets, get_order(size));
356         kfree(nht);
357 }
358
359 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
360                                                 unsigned long new_shift)
361 {
362         unsigned int i, hash;
363         struct neigh_hash_table *new_nht, *old_nht;
364
365         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
366
367         old_nht = rcu_dereference_protected(tbl->nht,
368                                             lockdep_is_held(&tbl->lock));
369         new_nht = neigh_hash_alloc(new_shift);
370         if (!new_nht)
371                 return old_nht;
372
373         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
374                 struct neighbour *n, *next;
375
376                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
377                                                    lockdep_is_held(&tbl->lock));
378                      n != NULL;
379                      n = next) {
380                         hash = tbl->hash(n->primary_key, n->dev,
381                                          new_nht->hash_rnd);
382
383                         hash >>= (32 - new_nht->hash_shift);
384                         next = rcu_dereference_protected(n->next,
385                                                 lockdep_is_held(&tbl->lock));
386
387                         rcu_assign_pointer(n->next,
388                                            rcu_dereference_protected(
389                                                 new_nht->hash_buckets[hash],
390                                                 lockdep_is_held(&tbl->lock)));
391                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
392                 }
393         }
394
395         rcu_assign_pointer(tbl->nht, new_nht);
396         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
397         return new_nht;
398 }
399
400 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
401                                struct net_device *dev)
402 {
403         struct neighbour *n;
404         int key_len = tbl->key_len;
405         u32 hash_val;
406         struct neigh_hash_table *nht;
407
408         NEIGH_CACHE_STAT_INC(tbl, lookups);
409
410         rcu_read_lock_bh();
411         nht = rcu_dereference_bh(tbl->nht);
412         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
413
414         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
415              n != NULL;
416              n = rcu_dereference_bh(n->next)) {
417                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
418                         if (!atomic_inc_not_zero(&n->refcnt))
419                                 n = NULL;
420                         NEIGH_CACHE_STAT_INC(tbl, hits);
421                         break;
422                 }
423         }
424
425         rcu_read_unlock_bh();
426         return n;
427 }
428 EXPORT_SYMBOL(neigh_lookup);
429
430 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
431                                      const void *pkey)
432 {
433         struct neighbour *n;
434         int key_len = tbl->key_len;
435         u32 hash_val;
436         struct neigh_hash_table *nht;
437
438         NEIGH_CACHE_STAT_INC(tbl, lookups);
439
440         rcu_read_lock_bh();
441         nht = rcu_dereference_bh(tbl->nht);
442         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
443
444         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
445              n != NULL;
446              n = rcu_dereference_bh(n->next)) {
447                 if (!memcmp(n->primary_key, pkey, key_len) &&
448                     net_eq(dev_net(n->dev), net)) {
449                         if (!atomic_inc_not_zero(&n->refcnt))
450                                 n = NULL;
451                         NEIGH_CACHE_STAT_INC(tbl, hits);
452                         break;
453                 }
454         }
455
456         rcu_read_unlock_bh();
457         return n;
458 }
459 EXPORT_SYMBOL(neigh_lookup_nodev);
460
461 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
462                                  struct net_device *dev, bool want_ref)
463 {
464         u32 hash_val;
465         int key_len = tbl->key_len;
466         int error;
467         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
468         struct neigh_hash_table *nht;
469
470         if (!n) {
471                 rc = ERR_PTR(-ENOBUFS);
472                 goto out;
473         }
474
475         memcpy(n->primary_key, pkey, key_len);
476         n->dev = dev;
477         dev_hold(dev);
478
479         /* Protocol specific setup. */
480         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
481                 rc = ERR_PTR(error);
482                 goto out_neigh_release;
483         }
484
485         if (dev->netdev_ops->ndo_neigh_construct) {
486                 error = dev->netdev_ops->ndo_neigh_construct(n);
487                 if (error < 0) {
488                         rc = ERR_PTR(error);
489                         goto out_neigh_release;
490                 }
491         }
492
493         /* Device specific setup. */
494         if (n->parms->neigh_setup &&
495             (error = n->parms->neigh_setup(n)) < 0) {
496                 rc = ERR_PTR(error);
497                 goto out_neigh_release;
498         }
499
500         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
501
502         write_lock_bh(&tbl->lock);
503         nht = rcu_dereference_protected(tbl->nht,
504                                         lockdep_is_held(&tbl->lock));
505
506         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
507                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
508
509         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
510
511         if (n->parms->dead) {
512                 rc = ERR_PTR(-EINVAL);
513                 goto out_tbl_unlock;
514         }
515
516         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
517                                             lockdep_is_held(&tbl->lock));
518              n1 != NULL;
519              n1 = rcu_dereference_protected(n1->next,
520                         lockdep_is_held(&tbl->lock))) {
521                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
522                         if (want_ref)
523                                 neigh_hold(n1);
524                         rc = n1;
525                         goto out_tbl_unlock;
526                 }
527         }
528
529         n->dead = 0;
530         if (want_ref)
531                 neigh_hold(n);
532         rcu_assign_pointer(n->next,
533                            rcu_dereference_protected(nht->hash_buckets[hash_val],
534                                                      lockdep_is_held(&tbl->lock)));
535         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
536         write_unlock_bh(&tbl->lock);
537         neigh_dbg(2, "neigh %p is created\n", n);
538         rc = n;
539 out:
540         return rc;
541 out_tbl_unlock:
542         write_unlock_bh(&tbl->lock);
543 out_neigh_release:
544         neigh_release(n);
545         goto out;
546 }
547 EXPORT_SYMBOL(__neigh_create);
548
549 static u32 pneigh_hash(const void *pkey, int key_len)
550 {
551         u32 hash_val = *(u32 *)(pkey + key_len - 4);
552         hash_val ^= (hash_val >> 16);
553         hash_val ^= hash_val >> 8;
554         hash_val ^= hash_val >> 4;
555         hash_val &= PNEIGH_HASHMASK;
556         return hash_val;
557 }
558
559 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
560                                               struct net *net,
561                                               const void *pkey,
562                                               int key_len,
563                                               struct net_device *dev)
564 {
565         while (n) {
566                 if (!memcmp(n->key, pkey, key_len) &&
567                     net_eq(pneigh_net(n), net) &&
568                     (n->dev == dev || !n->dev))
569                         return n;
570                 n = n->next;
571         }
572         return NULL;
573 }
574
575 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
576                 struct net *net, const void *pkey, struct net_device *dev)
577 {
578         int key_len = tbl->key_len;
579         u32 hash_val = pneigh_hash(pkey, key_len);
580
581         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
582                                  net, pkey, key_len, dev);
583 }
584 EXPORT_SYMBOL_GPL(__pneigh_lookup);
585
586 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
587                                     struct net *net, const void *pkey,
588                                     struct net_device *dev, int creat)
589 {
590         struct pneigh_entry *n;
591         int key_len = tbl->key_len;
592         u32 hash_val = pneigh_hash(pkey, key_len);
593
594         read_lock_bh(&tbl->lock);
595         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
596                               net, pkey, key_len, dev);
597         read_unlock_bh(&tbl->lock);
598
599         if (n || !creat)
600                 goto out;
601
602         ASSERT_RTNL();
603
604         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
605         if (!n)
606                 goto out;
607
608         write_pnet(&n->net, hold_net(net));
609         memcpy(n->key, pkey, key_len);
610         n->dev = dev;
611         if (dev)
612                 dev_hold(dev);
613
614         if (tbl->pconstructor && tbl->pconstructor(n)) {
615                 if (dev)
616                         dev_put(dev);
617                 release_net(net);
618                 kfree(n);
619                 n = NULL;
620                 goto out;
621         }
622
623         write_lock_bh(&tbl->lock);
624         n->next = tbl->phash_buckets[hash_val];
625         tbl->phash_buckets[hash_val] = n;
626         write_unlock_bh(&tbl->lock);
627 out:
628         return n;
629 }
630 EXPORT_SYMBOL(pneigh_lookup);
631
632
633 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
634                   struct net_device *dev)
635 {
636         struct pneigh_entry *n, **np;
637         int key_len = tbl->key_len;
638         u32 hash_val = pneigh_hash(pkey, key_len);
639
640         write_lock_bh(&tbl->lock);
641         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
642              np = &n->next) {
643                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
644                     net_eq(pneigh_net(n), net)) {
645                         *np = n->next;
646                         write_unlock_bh(&tbl->lock);
647                         if (tbl->pdestructor)
648                                 tbl->pdestructor(n);
649                         if (n->dev)
650                                 dev_put(n->dev);
651                         release_net(pneigh_net(n));
652                         kfree(n);
653                         return 0;
654                 }
655         }
656         write_unlock_bh(&tbl->lock);
657         return -ENOENT;
658 }
659
660 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
661 {
662         struct pneigh_entry *n, **np;
663         u32 h;
664
665         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
666                 np = &tbl->phash_buckets[h];
667                 while ((n = *np) != NULL) {
668                         if (!dev || n->dev == dev) {
669                                 *np = n->next;
670                                 if (tbl->pdestructor)
671                                         tbl->pdestructor(n);
672                                 if (n->dev)
673                                         dev_put(n->dev);
674                                 release_net(pneigh_net(n));
675                                 kfree(n);
676                                 continue;
677                         }
678                         np = &n->next;
679                 }
680         }
681         return -ENOENT;
682 }
683
684 static void neigh_parms_destroy(struct neigh_parms *parms);
685
686 static inline void neigh_parms_put(struct neigh_parms *parms)
687 {
688         if (atomic_dec_and_test(&parms->refcnt))
689                 neigh_parms_destroy(parms);
690 }
691
692 /*
693  *      neighbour must already be out of the table;
694  *
695  */
696 void neigh_destroy(struct neighbour *neigh)
697 {
698         struct net_device *dev = neigh->dev;
699
700         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
701
702         if (!neigh->dead) {
703                 pr_warn("Destroying alive neighbour %p\n", neigh);
704                 dump_stack();
705                 return;
706         }
707
708         if (neigh_del_timer(neigh))
709                 pr_warn("Impossible event\n");
710
711         skb_queue_purge(&neigh->arp_queue);
712         neigh->arp_queue_len_bytes = 0;
713
714         if (dev->netdev_ops->ndo_neigh_destroy)
715                 dev->netdev_ops->ndo_neigh_destroy(neigh);
716
717         dev_put(dev);
718         neigh_parms_put(neigh->parms);
719
720         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
721
722         atomic_dec(&neigh->tbl->entries);
723         kfree_rcu(neigh, rcu);
724 }
725 EXPORT_SYMBOL(neigh_destroy);
726
727 /* Neighbour state is suspicious;
728    disable fast path.
729
730    Called with write_locked neigh.
731  */
732 static void neigh_suspect(struct neighbour *neigh)
733 {
734         neigh_dbg(2, "neigh %p is suspected\n", neigh);
735
736         neigh->output = neigh->ops->output;
737 }
738
739 /* Neighbour state is OK;
740    enable fast path.
741
742    Called with write_locked neigh.
743  */
744 static void neigh_connect(struct neighbour *neigh)
745 {
746         neigh_dbg(2, "neigh %p is connected\n", neigh);
747
748         neigh->output = neigh->ops->connected_output;
749 }
750
751 static void neigh_periodic_work(struct work_struct *work)
752 {
753         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
754         struct neighbour *n;
755         struct neighbour __rcu **np;
756         unsigned int i;
757         struct neigh_hash_table *nht;
758
759         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
760
761         write_lock_bh(&tbl->lock);
762         nht = rcu_dereference_protected(tbl->nht,
763                                         lockdep_is_held(&tbl->lock));
764
765         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
766                 goto out;
767
768         /*
769          *      periodically recompute ReachableTime from random function
770          */
771
772         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
773                 struct neigh_parms *p;
774                 tbl->last_rand = jiffies;
775                 for (p = &tbl->parms; p; p = p->next)
776                         p->reachable_time =
777                                 neigh_rand_reach_time(p->base_reachable_time);
778         }
779
780         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
781                 np = &nht->hash_buckets[i];
782
783                 while ((n = rcu_dereference_protected(*np,
784                                 lockdep_is_held(&tbl->lock))) != NULL) {
785                         unsigned int state;
786
787                         write_lock(&n->lock);
788
789                         state = n->nud_state;
790                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
791                                 write_unlock(&n->lock);
792                                 goto next_elt;
793                         }
794
795                         if (time_before(n->used, n->confirmed))
796                                 n->used = n->confirmed;
797
798                         if (atomic_read(&n->refcnt) == 1 &&
799                             (state == NUD_FAILED ||
800                              time_after(jiffies, n->used + n->parms->gc_staletime))) {
801                                 *np = n->next;
802                                 n->dead = 1;
803                                 write_unlock(&n->lock);
804                                 neigh_cleanup_and_release(n);
805                                 continue;
806                         }
807                         write_unlock(&n->lock);
808
809 next_elt:
810                         np = &n->next;
811                 }
812                 /*
813                  * It's fine to release lock here, even if hash table
814                  * grows while we are preempted.
815                  */
816                 write_unlock_bh(&tbl->lock);
817                 cond_resched();
818                 write_lock_bh(&tbl->lock);
819                 nht = rcu_dereference_protected(tbl->nht,
820                                                 lockdep_is_held(&tbl->lock));
821         }
822 out:
823         /* Cycle through all hash buckets every base_reachable_time/2 ticks.
824          * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
825          * base_reachable_time.
826          */
827         schedule_delayed_work(&tbl->gc_work,
828                               tbl->parms.base_reachable_time >> 1);
829         write_unlock_bh(&tbl->lock);
830 }
831
832 static __inline__ int neigh_max_probes(struct neighbour *n)
833 {
834         struct neigh_parms *p = n->parms;
835         return (n->nud_state & NUD_PROBE) ?
836                 p->ucast_probes :
837                 p->ucast_probes + p->app_probes + p->mcast_probes;
838 }
839
840 static void neigh_invalidate(struct neighbour *neigh)
841         __releases(neigh->lock)
842         __acquires(neigh->lock)
843 {
844         struct sk_buff *skb;
845
846         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
847         neigh_dbg(2, "neigh %p is failed\n", neigh);
848         neigh->updated = jiffies;
849
850         /* It is very thin place. report_unreachable is very complicated
851            routine. Particularly, it can hit the same neighbour entry!
852
853            So that, we try to be accurate and avoid dead loop. --ANK
854          */
855         while (neigh->nud_state == NUD_FAILED &&
856                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
857                 write_unlock(&neigh->lock);
858                 neigh->ops->error_report(neigh, skb);
859                 write_lock(&neigh->lock);
860         }
861         skb_queue_purge(&neigh->arp_queue);
862         neigh->arp_queue_len_bytes = 0;
863 }
864
865 static void neigh_probe(struct neighbour *neigh)
866         __releases(neigh->lock)
867 {
868         struct sk_buff *skb = skb_peek(&neigh->arp_queue);
869         /* keep skb alive even if arp_queue overflows */
870         if (skb)
871                 skb = skb_copy(skb, GFP_ATOMIC);
872         write_unlock(&neigh->lock);
873         neigh->ops->solicit(neigh, skb);
874         atomic_inc(&neigh->probes);
875         kfree_skb(skb);
876 }
877
878 /* Called when a timer expires for a neighbour entry. */
879
880 static void neigh_timer_handler(unsigned long arg)
881 {
882         unsigned long now, next;
883         struct neighbour *neigh = (struct neighbour *)arg;
884         unsigned int state;
885         int notify = 0;
886
887         write_lock(&neigh->lock);
888
889         state = neigh->nud_state;
890         now = jiffies;
891         next = now + HZ;
892
893         if (!(state & NUD_IN_TIMER))
894                 goto out;
895
896         if (state & NUD_REACHABLE) {
897                 if (time_before_eq(now,
898                                    neigh->confirmed + neigh->parms->reachable_time)) {
899                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
900                         next = neigh->confirmed + neigh->parms->reachable_time;
901                 } else if (time_before_eq(now,
902                                           neigh->used + neigh->parms->delay_probe_time)) {
903                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
904                         neigh->nud_state = NUD_DELAY;
905                         neigh->updated = jiffies;
906                         neigh_suspect(neigh);
907                         next = now + neigh->parms->delay_probe_time;
908                 } else {
909                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
910                         neigh->nud_state = NUD_STALE;
911                         neigh->updated = jiffies;
912                         neigh_suspect(neigh);
913                         notify = 1;
914                 }
915         } else if (state & NUD_DELAY) {
916                 if (time_before_eq(now,
917                                    neigh->confirmed + neigh->parms->delay_probe_time)) {
918                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
919                         neigh->nud_state = NUD_REACHABLE;
920                         neigh->updated = jiffies;
921                         neigh_connect(neigh);
922                         notify = 1;
923                         next = neigh->confirmed + neigh->parms->reachable_time;
924                 } else {
925                         neigh_dbg(2, "neigh %p is probed\n", neigh);
926                         neigh->nud_state = NUD_PROBE;
927                         neigh->updated = jiffies;
928                         atomic_set(&neigh->probes, 0);
929                         next = now + neigh->parms->retrans_time;
930                 }
931         } else {
932                 /* NUD_PROBE|NUD_INCOMPLETE */
933                 next = now + neigh->parms->retrans_time;
934         }
935
936         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
937             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
938                 neigh->nud_state = NUD_FAILED;
939                 notify = 1;
940                 neigh_invalidate(neigh);
941         }
942
943         if (neigh->nud_state & NUD_IN_TIMER) {
944                 if (time_before(next, jiffies + HZ/2))
945                         next = jiffies + HZ/2;
946                 if (!mod_timer(&neigh->timer, next))
947                         neigh_hold(neigh);
948         }
949         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
950                 neigh_probe(neigh);
951         } else {
952 out:
953                 write_unlock(&neigh->lock);
954         }
955
956         if (notify)
957                 neigh_update_notify(neigh);
958
959         neigh_release(neigh);
960 }
961
962 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
963 {
964         int rc;
965         bool immediate_probe = false;
966
967         write_lock_bh(&neigh->lock);
968
969         rc = 0;
970         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
971                 goto out_unlock_bh;
972
973         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
974                 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
975                         unsigned long next, now = jiffies;
976
977                         atomic_set(&neigh->probes, neigh->parms->ucast_probes);
978                         neigh->nud_state     = NUD_INCOMPLETE;
979                         neigh->updated = now;
980                         next = now + max(neigh->parms->retrans_time, HZ/2);
981                         neigh_add_timer(neigh, next);
982                         immediate_probe = true;
983                 } else {
984                         neigh->nud_state = NUD_FAILED;
985                         neigh->updated = jiffies;
986                         write_unlock_bh(&neigh->lock);
987
988                         kfree_skb(skb);
989                         return 1;
990                 }
991         } else if (neigh->nud_state & NUD_STALE) {
992                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
993                 neigh->nud_state = NUD_DELAY;
994                 neigh->updated = jiffies;
995                 neigh_add_timer(neigh,
996                                 jiffies + neigh->parms->delay_probe_time);
997         }
998
999         if (neigh->nud_state == NUD_INCOMPLETE) {
1000                 if (skb) {
1001                         while (neigh->arp_queue_len_bytes + skb->truesize >
1002                                neigh->parms->queue_len_bytes) {
1003                                 struct sk_buff *buff;
1004
1005                                 buff = __skb_dequeue(&neigh->arp_queue);
1006                                 if (!buff)
1007                                         break;
1008                                 neigh->arp_queue_len_bytes -= buff->truesize;
1009                                 kfree_skb(buff);
1010                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1011                         }
1012                         skb_dst_force(skb);
1013                         __skb_queue_tail(&neigh->arp_queue, skb);
1014                         neigh->arp_queue_len_bytes += skb->truesize;
1015                 }
1016                 rc = 1;
1017         }
1018 out_unlock_bh:
1019         if (immediate_probe)
1020                 neigh_probe(neigh);
1021         else
1022                 write_unlock(&neigh->lock);
1023         local_bh_enable();
1024         return rc;
1025 }
1026 EXPORT_SYMBOL(__neigh_event_send);
1027
1028 static void neigh_update_hhs(struct neighbour *neigh)
1029 {
1030         struct hh_cache *hh;
1031         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1032                 = NULL;
1033
1034         if (neigh->dev->header_ops)
1035                 update = neigh->dev->header_ops->cache_update;
1036
1037         if (update) {
1038                 hh = &neigh->hh;
1039                 if (hh->hh_len) {
1040                         write_seqlock_bh(&hh->hh_lock);
1041                         update(hh, neigh->dev, neigh->ha);
1042                         write_sequnlock_bh(&hh->hh_lock);
1043                 }
1044         }
1045 }
1046
1047
1048
1049 /* Generic update routine.
1050    -- lladdr is new lladdr or NULL, if it is not supplied.
1051    -- new    is new state.
1052    -- flags
1053         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1054                                 if it is different.
1055         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1056                                 lladdr instead of overriding it
1057                                 if it is different.
1058                                 It also allows to retain current state
1059                                 if lladdr is unchanged.
1060         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1061
1062         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1063                                 NTF_ROUTER flag.
1064         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1065                                 a router.
1066
1067    Caller MUST hold reference count on the entry.
1068  */
1069
1070 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1071                  u32 flags)
1072 {
1073         u8 old;
1074         int err;
1075         int notify = 0;
1076         struct net_device *dev;
1077         int update_isrouter = 0;
1078
1079         write_lock_bh(&neigh->lock);
1080
1081         dev    = neigh->dev;
1082         old    = neigh->nud_state;
1083         err    = -EPERM;
1084
1085         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1086             (old & (NUD_NOARP | NUD_PERMANENT)))
1087                 goto out;
1088
1089         if (!(new & NUD_VALID)) {
1090                 neigh_del_timer(neigh);
1091                 if (old & NUD_CONNECTED)
1092                         neigh_suspect(neigh);
1093                 neigh->nud_state = new;
1094                 err = 0;
1095                 notify = old & NUD_VALID;
1096                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1097                     (new & NUD_FAILED)) {
1098                         neigh_invalidate(neigh);
1099                         notify = 1;
1100                 }
1101                 goto out;
1102         }
1103
1104         /* Compare new lladdr with cached one */
1105         if (!dev->addr_len) {
1106                 /* First case: device needs no address. */
1107                 lladdr = neigh->ha;
1108         } else if (lladdr) {
1109                 /* The second case: if something is already cached
1110                    and a new address is proposed:
1111                    - compare new & old
1112                    - if they are different, check override flag
1113                  */
1114                 if ((old & NUD_VALID) &&
1115                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1116                         lladdr = neigh->ha;
1117         } else {
1118                 /* No address is supplied; if we know something,
1119                    use it, otherwise discard the request.
1120                  */
1121                 err = -EINVAL;
1122                 if (!(old & NUD_VALID))
1123                         goto out;
1124                 lladdr = neigh->ha;
1125         }
1126
1127         if (new & NUD_CONNECTED)
1128                 neigh->confirmed = jiffies;
1129         neigh->updated = jiffies;
1130
1131         /* If entry was valid and address is not changed,
1132            do not change entry state, if new one is STALE.
1133          */
1134         err = 0;
1135         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1136         if (old & NUD_VALID) {
1137                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1138                         update_isrouter = 0;
1139                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1140                             (old & NUD_CONNECTED)) {
1141                                 lladdr = neigh->ha;
1142                                 new = NUD_STALE;
1143                         } else
1144                                 goto out;
1145                 } else {
1146                         if (lladdr == neigh->ha && new == NUD_STALE &&
1147                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1148                              (old & NUD_CONNECTED))
1149                             )
1150                                 new = old;
1151                 }
1152         }
1153
1154         if (new != old) {
1155                 neigh_del_timer(neigh);
1156                 if (new & NUD_IN_TIMER)
1157                         neigh_add_timer(neigh, (jiffies +
1158                                                 ((new & NUD_REACHABLE) ?
1159                                                  neigh->parms->reachable_time :
1160                                                  0)));
1161                 neigh->nud_state = new;
1162         }
1163
1164         if (lladdr != neigh->ha) {
1165                 write_seqlock(&neigh->ha_lock);
1166                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1167                 write_sequnlock(&neigh->ha_lock);
1168                 neigh_update_hhs(neigh);
1169                 if (!(new & NUD_CONNECTED))
1170                         neigh->confirmed = jiffies -
1171                                       (neigh->parms->base_reachable_time << 1);
1172                 notify = 1;
1173         }
1174         if (new == old)
1175                 goto out;
1176         if (new & NUD_CONNECTED)
1177                 neigh_connect(neigh);
1178         else
1179                 neigh_suspect(neigh);
1180         if (!(old & NUD_VALID)) {
1181                 struct sk_buff *skb;
1182
1183                 /* Again: avoid dead loop if something went wrong */
1184
1185                 while (neigh->nud_state & NUD_VALID &&
1186                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1187                         struct dst_entry *dst = skb_dst(skb);
1188                         struct neighbour *n2, *n1 = neigh;
1189                         write_unlock_bh(&neigh->lock);
1190
1191                         rcu_read_lock();
1192
1193                         /* Why not just use 'neigh' as-is?  The problem is that
1194                          * things such as shaper, eql, and sch_teql can end up
1195                          * using alternative, different, neigh objects to output
1196                          * the packet in the output path.  So what we need to do
1197                          * here is re-lookup the top-level neigh in the path so
1198                          * we can reinject the packet there.
1199                          */
1200                         n2 = NULL;
1201                         if (dst) {
1202                                 n2 = dst_neigh_lookup_skb(dst, skb);
1203                                 if (n2)
1204                                         n1 = n2;
1205                         }
1206                         n1->output(n1, skb);
1207                         if (n2)
1208                                 neigh_release(n2);
1209                         rcu_read_unlock();
1210
1211                         write_lock_bh(&neigh->lock);
1212                 }
1213                 skb_queue_purge(&neigh->arp_queue);
1214                 neigh->arp_queue_len_bytes = 0;
1215         }
1216 out:
1217         if (update_isrouter) {
1218                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1219                         (neigh->flags | NTF_ROUTER) :
1220                         (neigh->flags & ~NTF_ROUTER);
1221         }
1222         write_unlock_bh(&neigh->lock);
1223
1224         if (notify)
1225                 neigh_update_notify(neigh);
1226
1227         return err;
1228 }
1229 EXPORT_SYMBOL(neigh_update);
1230
1231 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1232                                  u8 *lladdr, void *saddr,
1233                                  struct net_device *dev)
1234 {
1235         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1236                                                  lladdr || !dev->addr_len);
1237         if (neigh)
1238                 neigh_update(neigh, lladdr, NUD_STALE,
1239                              NEIGH_UPDATE_F_OVERRIDE);
1240         return neigh;
1241 }
1242 EXPORT_SYMBOL(neigh_event_ns);
1243
1244 /* called with read_lock_bh(&n->lock); */
1245 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1246 {
1247         struct net_device *dev = dst->dev;
1248         __be16 prot = dst->ops->protocol;
1249         struct hh_cache *hh = &n->hh;
1250
1251         write_lock_bh(&n->lock);
1252
1253         /* Only one thread can come in here and initialize the
1254          * hh_cache entry.
1255          */
1256         if (!hh->hh_len)
1257                 dev->header_ops->cache(n, hh, prot);
1258
1259         write_unlock_bh(&n->lock);
1260 }
1261
1262 /* This function can be used in contexts, where only old dev_queue_xmit
1263  * worked, f.e. if you want to override normal output path (eql, shaper),
1264  * but resolution is not made yet.
1265  */
1266
1267 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1268 {
1269         struct net_device *dev = skb->dev;
1270
1271         __skb_pull(skb, skb_network_offset(skb));
1272
1273         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1274                             skb->len) < 0 &&
1275             dev->header_ops->rebuild(skb))
1276                 return 0;
1277
1278         return dev_queue_xmit(skb);
1279 }
1280 EXPORT_SYMBOL(neigh_compat_output);
1281
1282 /* Slow and careful. */
1283
1284 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1285 {
1286         struct dst_entry *dst = skb_dst(skb);
1287         int rc = 0;
1288
1289         if (!dst)
1290                 goto discard;
1291
1292         if (!neigh_event_send(neigh, skb)) {
1293                 int err;
1294                 struct net_device *dev = neigh->dev;
1295                 unsigned int seq;
1296
1297                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1298                         neigh_hh_init(neigh, dst);
1299
1300                 do {
1301                         __skb_pull(skb, skb_network_offset(skb));
1302                         seq = read_seqbegin(&neigh->ha_lock);
1303                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1304                                               neigh->ha, NULL, skb->len);
1305                 } while (read_seqretry(&neigh->ha_lock, seq));
1306
1307                 if (err >= 0)
1308                         rc = dev_queue_xmit(skb);
1309                 else
1310                         goto out_kfree_skb;
1311         }
1312 out:
1313         return rc;
1314 discard:
1315         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1316 out_kfree_skb:
1317         rc = -EINVAL;
1318         kfree_skb(skb);
1319         goto out;
1320 }
1321 EXPORT_SYMBOL(neigh_resolve_output);
1322
1323 /* As fast as possible without hh cache */
1324
1325 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1326 {
1327         struct net_device *dev = neigh->dev;
1328         unsigned int seq;
1329         int err;
1330
1331         do {
1332                 __skb_pull(skb, skb_network_offset(skb));
1333                 seq = read_seqbegin(&neigh->ha_lock);
1334                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1335                                       neigh->ha, NULL, skb->len);
1336         } while (read_seqretry(&neigh->ha_lock, seq));
1337
1338         if (err >= 0)
1339                 err = dev_queue_xmit(skb);
1340         else {
1341                 err = -EINVAL;
1342                 kfree_skb(skb);
1343         }
1344         return err;
1345 }
1346 EXPORT_SYMBOL(neigh_connected_output);
1347
1348 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1349 {
1350         return dev_queue_xmit(skb);
1351 }
1352 EXPORT_SYMBOL(neigh_direct_output);
1353
1354 static void neigh_proxy_process(unsigned long arg)
1355 {
1356         struct neigh_table *tbl = (struct neigh_table *)arg;
1357         long sched_next = 0;
1358         unsigned long now = jiffies;
1359         struct sk_buff *skb, *n;
1360
1361         spin_lock(&tbl->proxy_queue.lock);
1362
1363         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1364                 long tdif = NEIGH_CB(skb)->sched_next - now;
1365
1366                 if (tdif <= 0) {
1367                         struct net_device *dev = skb->dev;
1368
1369                         __skb_unlink(skb, &tbl->proxy_queue);
1370                         if (tbl->proxy_redo && netif_running(dev)) {
1371                                 rcu_read_lock();
1372                                 tbl->proxy_redo(skb);
1373                                 rcu_read_unlock();
1374                         } else {
1375                                 kfree_skb(skb);
1376                         }
1377
1378                         dev_put(dev);
1379                 } else if (!sched_next || tdif < sched_next)
1380                         sched_next = tdif;
1381         }
1382         del_timer(&tbl->proxy_timer);
1383         if (sched_next)
1384                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1385         spin_unlock(&tbl->proxy_queue.lock);
1386 }
1387
1388 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1389                     struct sk_buff *skb)
1390 {
1391         unsigned long now = jiffies;
1392         unsigned long sched_next = now + (net_random() % p->proxy_delay);
1393
1394         if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1395                 kfree_skb(skb);
1396                 return;
1397         }
1398
1399         NEIGH_CB(skb)->sched_next = sched_next;
1400         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1401
1402         spin_lock(&tbl->proxy_queue.lock);
1403         if (del_timer(&tbl->proxy_timer)) {
1404                 if (time_before(tbl->proxy_timer.expires, sched_next))
1405                         sched_next = tbl->proxy_timer.expires;
1406         }
1407         skb_dst_drop(skb);
1408         dev_hold(skb->dev);
1409         __skb_queue_tail(&tbl->proxy_queue, skb);
1410         mod_timer(&tbl->proxy_timer, sched_next);
1411         spin_unlock(&tbl->proxy_queue.lock);
1412 }
1413 EXPORT_SYMBOL(pneigh_enqueue);
1414
1415 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1416                                                       struct net *net, int ifindex)
1417 {
1418         struct neigh_parms *p;
1419
1420         for (p = &tbl->parms; p; p = p->next) {
1421                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1422                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1423                         return p;
1424         }
1425
1426         return NULL;
1427 }
1428
1429 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1430                                       struct neigh_table *tbl)
1431 {
1432         struct neigh_parms *p;
1433         struct net *net = dev_net(dev);
1434         const struct net_device_ops *ops = dev->netdev_ops;
1435
1436         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1437         if (p) {
1438                 p->tbl            = tbl;
1439                 atomic_set(&p->refcnt, 1);
1440                 p->reachable_time =
1441                                 neigh_rand_reach_time(p->base_reachable_time);
1442
1443                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1444                         kfree(p);
1445                         return NULL;
1446                 }
1447
1448                 dev_hold(dev);
1449                 p->dev = dev;
1450                 write_pnet(&p->net, hold_net(net));
1451                 p->sysctl_table = NULL;
1452                 write_lock_bh(&tbl->lock);
1453                 p->next         = tbl->parms.next;
1454                 tbl->parms.next = p;
1455                 write_unlock_bh(&tbl->lock);
1456         }
1457         return p;
1458 }
1459 EXPORT_SYMBOL(neigh_parms_alloc);
1460
1461 static void neigh_rcu_free_parms(struct rcu_head *head)
1462 {
1463         struct neigh_parms *parms =
1464                 container_of(head, struct neigh_parms, rcu_head);
1465
1466         neigh_parms_put(parms);
1467 }
1468
1469 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1470 {
1471         struct neigh_parms **p;
1472
1473         if (!parms || parms == &tbl->parms)
1474                 return;
1475         write_lock_bh(&tbl->lock);
1476         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1477                 if (*p == parms) {
1478                         *p = parms->next;
1479                         parms->dead = 1;
1480                         write_unlock_bh(&tbl->lock);
1481                         if (parms->dev)
1482                                 dev_put(parms->dev);
1483                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1484                         return;
1485                 }
1486         }
1487         write_unlock_bh(&tbl->lock);
1488         neigh_dbg(1, "%s: not found\n", __func__);
1489 }
1490 EXPORT_SYMBOL(neigh_parms_release);
1491
1492 static void neigh_parms_destroy(struct neigh_parms *parms)
1493 {
1494         release_net(neigh_parms_net(parms));
1495         kfree(parms);
1496 }
1497
1498 static struct lock_class_key neigh_table_proxy_queue_class;
1499
1500 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1501 {
1502         unsigned long now = jiffies;
1503         unsigned long phsize;
1504
1505         write_pnet(&tbl->parms.net, &init_net);
1506         atomic_set(&tbl->parms.refcnt, 1);
1507         tbl->parms.reachable_time =
1508                           neigh_rand_reach_time(tbl->parms.base_reachable_time);
1509
1510         tbl->stats = alloc_percpu(struct neigh_statistics);
1511         if (!tbl->stats)
1512                 panic("cannot create neighbour cache statistics");
1513
1514 #ifdef CONFIG_PROC_FS
1515         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1516                               &neigh_stat_seq_fops, tbl))
1517                 panic("cannot create neighbour proc dir entry");
1518 #endif
1519
1520         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1521
1522         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1523         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1524
1525         if (!tbl->nht || !tbl->phash_buckets)
1526                 panic("cannot allocate neighbour cache hashes");
1527
1528         if (!tbl->entry_size)
1529                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1530                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1531         else
1532                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1533
1534         rwlock_init(&tbl->lock);
1535         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1536         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1537         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1538         skb_queue_head_init_class(&tbl->proxy_queue,
1539                         &neigh_table_proxy_queue_class);
1540
1541         tbl->last_flush = now;
1542         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1543 }
1544
1545 void neigh_table_init(struct neigh_table *tbl)
1546 {
1547         struct neigh_table *tmp;
1548
1549         neigh_table_init_no_netlink(tbl);
1550         write_lock(&neigh_tbl_lock);
1551         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1552                 if (tmp->family == tbl->family)
1553                         break;
1554         }
1555         tbl->next       = neigh_tables;
1556         neigh_tables    = tbl;
1557         write_unlock(&neigh_tbl_lock);
1558
1559         if (unlikely(tmp)) {
1560                 pr_err("Registering multiple tables for family %d\n",
1561                        tbl->family);
1562                 dump_stack();
1563         }
1564 }
1565 EXPORT_SYMBOL(neigh_table_init);
1566
1567 int neigh_table_clear(struct neigh_table *tbl)
1568 {
1569         struct neigh_table **tp;
1570
1571         /* It is not clean... Fix it to unload IPv6 module safely */
1572         cancel_delayed_work_sync(&tbl->gc_work);
1573         del_timer_sync(&tbl->proxy_timer);
1574         pneigh_queue_purge(&tbl->proxy_queue);
1575         neigh_ifdown(tbl, NULL);
1576         if (atomic_read(&tbl->entries))
1577                 pr_crit("neighbour leakage\n");
1578         write_lock(&neigh_tbl_lock);
1579         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1580                 if (*tp == tbl) {
1581                         *tp = tbl->next;
1582                         break;
1583                 }
1584         }
1585         write_unlock(&neigh_tbl_lock);
1586
1587         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1588                  neigh_hash_free_rcu);
1589         tbl->nht = NULL;
1590
1591         kfree(tbl->phash_buckets);
1592         tbl->phash_buckets = NULL;
1593
1594         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1595
1596         free_percpu(tbl->stats);
1597         tbl->stats = NULL;
1598
1599         return 0;
1600 }
1601 EXPORT_SYMBOL(neigh_table_clear);
1602
1603 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1604 {
1605         struct net *net = sock_net(skb->sk);
1606         struct ndmsg *ndm;
1607         struct nlattr *dst_attr;
1608         struct neigh_table *tbl;
1609         struct net_device *dev = NULL;
1610         int err = -EINVAL;
1611
1612         ASSERT_RTNL();
1613         if (nlmsg_len(nlh) < sizeof(*ndm))
1614                 goto out;
1615
1616         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1617         if (dst_attr == NULL)
1618                 goto out;
1619
1620         ndm = nlmsg_data(nlh);
1621         if (ndm->ndm_ifindex) {
1622                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1623                 if (dev == NULL) {
1624                         err = -ENODEV;
1625                         goto out;
1626                 }
1627         }
1628
1629         read_lock(&neigh_tbl_lock);
1630         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1631                 struct neighbour *neigh;
1632
1633                 if (tbl->family != ndm->ndm_family)
1634                         continue;
1635                 read_unlock(&neigh_tbl_lock);
1636
1637                 if (nla_len(dst_attr) < tbl->key_len)
1638                         goto out;
1639
1640                 if (ndm->ndm_flags & NTF_PROXY) {
1641                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1642                         goto out;
1643                 }
1644
1645                 if (dev == NULL)
1646                         goto out;
1647
1648                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1649                 if (neigh == NULL) {
1650                         err = -ENOENT;
1651                         goto out;
1652                 }
1653
1654                 err = neigh_update(neigh, NULL, NUD_FAILED,
1655                                    NEIGH_UPDATE_F_OVERRIDE |
1656                                    NEIGH_UPDATE_F_ADMIN);
1657                 neigh_release(neigh);
1658                 goto out;
1659         }
1660         read_unlock(&neigh_tbl_lock);
1661         err = -EAFNOSUPPORT;
1662
1663 out:
1664         return err;
1665 }
1666
1667 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1668 {
1669         struct net *net = sock_net(skb->sk);
1670         struct ndmsg *ndm;
1671         struct nlattr *tb[NDA_MAX+1];
1672         struct neigh_table *tbl;
1673         struct net_device *dev = NULL;
1674         int err;
1675
1676         ASSERT_RTNL();
1677         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1678         if (err < 0)
1679                 goto out;
1680
1681         err = -EINVAL;
1682         if (tb[NDA_DST] == NULL)
1683                 goto out;
1684
1685         ndm = nlmsg_data(nlh);
1686         if (ndm->ndm_ifindex) {
1687                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1688                 if (dev == NULL) {
1689                         err = -ENODEV;
1690                         goto out;
1691                 }
1692
1693                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1694                         goto out;
1695         }
1696
1697         read_lock(&neigh_tbl_lock);
1698         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1699                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1700                 struct neighbour *neigh;
1701                 void *dst, *lladdr;
1702
1703                 if (tbl->family != ndm->ndm_family)
1704                         continue;
1705                 read_unlock(&neigh_tbl_lock);
1706
1707                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1708                         goto out;
1709                 dst = nla_data(tb[NDA_DST]);
1710                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1711
1712                 if (ndm->ndm_flags & NTF_PROXY) {
1713                         struct pneigh_entry *pn;
1714
1715                         err = -ENOBUFS;
1716                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1717                         if (pn) {
1718                                 pn->flags = ndm->ndm_flags;
1719                                 err = 0;
1720                         }
1721                         goto out;
1722                 }
1723
1724                 if (dev == NULL)
1725                         goto out;
1726
1727                 neigh = neigh_lookup(tbl, dst, dev);
1728                 if (neigh == NULL) {
1729                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1730                                 err = -ENOENT;
1731                                 goto out;
1732                         }
1733
1734                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1735                         if (IS_ERR(neigh)) {
1736                                 err = PTR_ERR(neigh);
1737                                 goto out;
1738                         }
1739                 } else {
1740                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1741                                 err = -EEXIST;
1742                                 neigh_release(neigh);
1743                                 goto out;
1744                         }
1745
1746                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1747                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1748                 }
1749
1750                 if (ndm->ndm_flags & NTF_USE) {
1751                         neigh_event_send(neigh, NULL);
1752                         err = 0;
1753                 } else
1754                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1755                 neigh_release(neigh);
1756                 goto out;
1757         }
1758
1759         read_unlock(&neigh_tbl_lock);
1760         err = -EAFNOSUPPORT;
1761 out:
1762         return err;
1763 }
1764
1765 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1766 {
1767         struct nlattr *nest;
1768
1769         nest = nla_nest_start(skb, NDTA_PARMS);
1770         if (nest == NULL)
1771                 return -ENOBUFS;
1772
1773         if ((parms->dev &&
1774              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1775             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1776             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1777             /* approximative value for deprecated QUEUE_LEN (in packets) */
1778             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1779                         parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1780             nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1781             nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1782             nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1783             nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1784             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1785             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1786                           parms->base_reachable_time) ||
1787             nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1788             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1789                           parms->delay_probe_time) ||
1790             nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1791             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1792             nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1793             nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1794                 goto nla_put_failure;
1795         return nla_nest_end(skb, nest);
1796
1797 nla_put_failure:
1798         nla_nest_cancel(skb, nest);
1799         return -EMSGSIZE;
1800 }
1801
1802 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1803                               u32 pid, u32 seq, int type, int flags)
1804 {
1805         struct nlmsghdr *nlh;
1806         struct ndtmsg *ndtmsg;
1807
1808         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1809         if (nlh == NULL)
1810                 return -EMSGSIZE;
1811
1812         ndtmsg = nlmsg_data(nlh);
1813
1814         read_lock_bh(&tbl->lock);
1815         ndtmsg->ndtm_family = tbl->family;
1816         ndtmsg->ndtm_pad1   = 0;
1817         ndtmsg->ndtm_pad2   = 0;
1818
1819         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1820             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1821             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1822             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1823             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1824                 goto nla_put_failure;
1825         {
1826                 unsigned long now = jiffies;
1827                 unsigned int flush_delta = now - tbl->last_flush;
1828                 unsigned int rand_delta = now - tbl->last_rand;
1829                 struct neigh_hash_table *nht;
1830                 struct ndt_config ndc = {
1831                         .ndtc_key_len           = tbl->key_len,
1832                         .ndtc_entry_size        = tbl->entry_size,
1833                         .ndtc_entries           = atomic_read(&tbl->entries),
1834                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1835                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1836                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1837                 };
1838
1839                 rcu_read_lock_bh();
1840                 nht = rcu_dereference_bh(tbl->nht);
1841                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1842                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1843                 rcu_read_unlock_bh();
1844
1845                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1846                         goto nla_put_failure;
1847         }
1848
1849         {
1850                 int cpu;
1851                 struct ndt_stats ndst;
1852
1853                 memset(&ndst, 0, sizeof(ndst));
1854
1855                 for_each_possible_cpu(cpu) {
1856                         struct neigh_statistics *st;
1857
1858                         st = per_cpu_ptr(tbl->stats, cpu);
1859                         ndst.ndts_allocs                += st->allocs;
1860                         ndst.ndts_destroys              += st->destroys;
1861                         ndst.ndts_hash_grows            += st->hash_grows;
1862                         ndst.ndts_res_failed            += st->res_failed;
1863                         ndst.ndts_lookups               += st->lookups;
1864                         ndst.ndts_hits                  += st->hits;
1865                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1866                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1867                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1868                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1869                 }
1870
1871                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1872                         goto nla_put_failure;
1873         }
1874
1875         BUG_ON(tbl->parms.dev);
1876         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1877                 goto nla_put_failure;
1878
1879         read_unlock_bh(&tbl->lock);
1880         return nlmsg_end(skb, nlh);
1881
1882 nla_put_failure:
1883         read_unlock_bh(&tbl->lock);
1884         nlmsg_cancel(skb, nlh);
1885         return -EMSGSIZE;
1886 }
1887
1888 static int neightbl_fill_param_info(struct sk_buff *skb,
1889                                     struct neigh_table *tbl,
1890                                     struct neigh_parms *parms,
1891                                     u32 pid, u32 seq, int type,
1892                                     unsigned int flags)
1893 {
1894         struct ndtmsg *ndtmsg;
1895         struct nlmsghdr *nlh;
1896
1897         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1898         if (nlh == NULL)
1899                 return -EMSGSIZE;
1900
1901         ndtmsg = nlmsg_data(nlh);
1902
1903         read_lock_bh(&tbl->lock);
1904         ndtmsg->ndtm_family = tbl->family;
1905         ndtmsg->ndtm_pad1   = 0;
1906         ndtmsg->ndtm_pad2   = 0;
1907
1908         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1909             neightbl_fill_parms(skb, parms) < 0)
1910                 goto errout;
1911
1912         read_unlock_bh(&tbl->lock);
1913         return nlmsg_end(skb, nlh);
1914 errout:
1915         read_unlock_bh(&tbl->lock);
1916         nlmsg_cancel(skb, nlh);
1917         return -EMSGSIZE;
1918 }
1919
1920 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1921         [NDTA_NAME]             = { .type = NLA_STRING },
1922         [NDTA_THRESH1]          = { .type = NLA_U32 },
1923         [NDTA_THRESH2]          = { .type = NLA_U32 },
1924         [NDTA_THRESH3]          = { .type = NLA_U32 },
1925         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1926         [NDTA_PARMS]            = { .type = NLA_NESTED },
1927 };
1928
1929 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1930         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1931         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1932         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1933         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1934         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1935         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1936         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1937         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1938         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1939         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1940         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1941         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1942         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1943 };
1944
1945 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1946 {
1947         struct net *net = sock_net(skb->sk);
1948         struct neigh_table *tbl;
1949         struct ndtmsg *ndtmsg;
1950         struct nlattr *tb[NDTA_MAX+1];
1951         int err;
1952
1953         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1954                           nl_neightbl_policy);
1955         if (err < 0)
1956                 goto errout;
1957
1958         if (tb[NDTA_NAME] == NULL) {
1959                 err = -EINVAL;
1960                 goto errout;
1961         }
1962
1963         ndtmsg = nlmsg_data(nlh);
1964         read_lock(&neigh_tbl_lock);
1965         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1966                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1967                         continue;
1968
1969                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1970                         break;
1971         }
1972
1973         if (tbl == NULL) {
1974                 err = -ENOENT;
1975                 goto errout_locked;
1976         }
1977
1978         /*
1979          * We acquire tbl->lock to be nice to the periodic timers and
1980          * make sure they always see a consistent set of values.
1981          */
1982         write_lock_bh(&tbl->lock);
1983
1984         if (tb[NDTA_PARMS]) {
1985                 struct nlattr *tbp[NDTPA_MAX+1];
1986                 struct neigh_parms *p;
1987                 int i, ifindex = 0;
1988
1989                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1990                                        nl_ntbl_parm_policy);
1991                 if (err < 0)
1992                         goto errout_tbl_lock;
1993
1994                 if (tbp[NDTPA_IFINDEX])
1995                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1996
1997                 p = lookup_neigh_parms(tbl, net, ifindex);
1998                 if (p == NULL) {
1999                         err = -ENOENT;
2000                         goto errout_tbl_lock;
2001                 }
2002
2003                 for (i = 1; i <= NDTPA_MAX; i++) {
2004                         if (tbp[i] == NULL)
2005                                 continue;
2006
2007                         switch (i) {
2008                         case NDTPA_QUEUE_LEN:
2009                                 p->queue_len_bytes = nla_get_u32(tbp[i]) *
2010                                                      SKB_TRUESIZE(ETH_FRAME_LEN);
2011                                 break;
2012                         case NDTPA_QUEUE_LENBYTES:
2013                                 p->queue_len_bytes = nla_get_u32(tbp[i]);
2014                                 break;
2015                         case NDTPA_PROXY_QLEN:
2016                                 p->proxy_qlen = nla_get_u32(tbp[i]);
2017                                 break;
2018                         case NDTPA_APP_PROBES:
2019                                 p->app_probes = nla_get_u32(tbp[i]);
2020                                 break;
2021                         case NDTPA_UCAST_PROBES:
2022                                 p->ucast_probes = nla_get_u32(tbp[i]);
2023                                 break;
2024                         case NDTPA_MCAST_PROBES:
2025                                 p->mcast_probes = nla_get_u32(tbp[i]);
2026                                 break;
2027                         case NDTPA_BASE_REACHABLE_TIME:
2028                                 p->base_reachable_time = nla_get_msecs(tbp[i]);
2029                                 break;
2030                         case NDTPA_GC_STALETIME:
2031                                 p->gc_staletime = nla_get_msecs(tbp[i]);
2032                                 break;
2033                         case NDTPA_DELAY_PROBE_TIME:
2034                                 p->delay_probe_time = nla_get_msecs(tbp[i]);
2035                                 break;
2036                         case NDTPA_RETRANS_TIME:
2037                                 p->retrans_time = nla_get_msecs(tbp[i]);
2038                                 break;
2039                         case NDTPA_ANYCAST_DELAY:
2040                                 p->anycast_delay = nla_get_msecs(tbp[i]);
2041                                 break;
2042                         case NDTPA_PROXY_DELAY:
2043                                 p->proxy_delay = nla_get_msecs(tbp[i]);
2044                                 break;
2045                         case NDTPA_LOCKTIME:
2046                                 p->locktime = nla_get_msecs(tbp[i]);
2047                                 break;
2048                         }
2049                 }
2050         }
2051
2052         if (tb[NDTA_THRESH1])
2053                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2054
2055         if (tb[NDTA_THRESH2])
2056                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2057
2058         if (tb[NDTA_THRESH3])
2059                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2060
2061         if (tb[NDTA_GC_INTERVAL])
2062                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2063
2064         err = 0;
2065
2066 errout_tbl_lock:
2067         write_unlock_bh(&tbl->lock);
2068 errout_locked:
2069         read_unlock(&neigh_tbl_lock);
2070 errout:
2071         return err;
2072 }
2073
2074 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2075 {
2076         struct net *net = sock_net(skb->sk);
2077         int family, tidx, nidx = 0;
2078         int tbl_skip = cb->args[0];
2079         int neigh_skip = cb->args[1];
2080         struct neigh_table *tbl;
2081
2082         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2083
2084         read_lock(&neigh_tbl_lock);
2085         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2086                 struct neigh_parms *p;
2087
2088                 if (tidx < tbl_skip || (family && tbl->family != family))
2089                         continue;
2090
2091                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2092                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2093                                        NLM_F_MULTI) <= 0)
2094                         break;
2095
2096                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2097                         if (!net_eq(neigh_parms_net(p), net))
2098                                 continue;
2099
2100                         if (nidx < neigh_skip)
2101                                 goto next;
2102
2103                         if (neightbl_fill_param_info(skb, tbl, p,
2104                                                      NETLINK_CB(cb->skb).portid,
2105                                                      cb->nlh->nlmsg_seq,
2106                                                      RTM_NEWNEIGHTBL,
2107                                                      NLM_F_MULTI) <= 0)
2108                                 goto out;
2109                 next:
2110                         nidx++;
2111                 }
2112
2113                 neigh_skip = 0;
2114         }
2115 out:
2116         read_unlock(&neigh_tbl_lock);
2117         cb->args[0] = tidx;
2118         cb->args[1] = nidx;
2119
2120         return skb->len;
2121 }
2122
2123 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2124                            u32 pid, u32 seq, int type, unsigned int flags)
2125 {
2126         unsigned long now = jiffies;
2127         struct nda_cacheinfo ci;
2128         struct nlmsghdr *nlh;
2129         struct ndmsg *ndm;
2130
2131         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2132         if (nlh == NULL)
2133                 return -EMSGSIZE;
2134
2135         ndm = nlmsg_data(nlh);
2136         ndm->ndm_family  = neigh->ops->family;
2137         ndm->ndm_pad1    = 0;
2138         ndm->ndm_pad2    = 0;
2139         ndm->ndm_flags   = neigh->flags;
2140         ndm->ndm_type    = neigh->type;
2141         ndm->ndm_ifindex = neigh->dev->ifindex;
2142
2143         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2144                 goto nla_put_failure;
2145
2146         read_lock_bh(&neigh->lock);
2147         ndm->ndm_state   = neigh->nud_state;
2148         if (neigh->nud_state & NUD_VALID) {
2149                 char haddr[MAX_ADDR_LEN];
2150
2151                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2152                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2153                         read_unlock_bh(&neigh->lock);
2154                         goto nla_put_failure;
2155                 }
2156         }
2157
2158         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2159         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2160         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2161         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2162         read_unlock_bh(&neigh->lock);
2163
2164         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2165             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2166                 goto nla_put_failure;
2167
2168         return nlmsg_end(skb, nlh);
2169
2170 nla_put_failure:
2171         nlmsg_cancel(skb, nlh);
2172         return -EMSGSIZE;
2173 }
2174
2175 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2176                             u32 pid, u32 seq, int type, unsigned int flags,
2177                             struct neigh_table *tbl)
2178 {
2179         struct nlmsghdr *nlh;
2180         struct ndmsg *ndm;
2181
2182         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2183         if (nlh == NULL)
2184                 return -EMSGSIZE;
2185
2186         ndm = nlmsg_data(nlh);
2187         ndm->ndm_family  = tbl->family;
2188         ndm->ndm_pad1    = 0;
2189         ndm->ndm_pad2    = 0;
2190         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2191         ndm->ndm_type    = NDA_DST;
2192         ndm->ndm_ifindex = pn->dev->ifindex;
2193         ndm->ndm_state   = NUD_NONE;
2194
2195         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2196                 goto nla_put_failure;
2197
2198         return nlmsg_end(skb, nlh);
2199
2200 nla_put_failure:
2201         nlmsg_cancel(skb, nlh);
2202         return -EMSGSIZE;
2203 }
2204
2205 static void neigh_update_notify(struct neighbour *neigh)
2206 {
2207         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2208         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2209 }
2210
2211 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2212                             struct netlink_callback *cb)
2213 {
2214         struct net *net = sock_net(skb->sk);
2215         struct neighbour *n;
2216         int rc, h, s_h = cb->args[1];
2217         int idx, s_idx = idx = cb->args[2];
2218         struct neigh_hash_table *nht;
2219
2220         rcu_read_lock_bh();
2221         nht = rcu_dereference_bh(tbl->nht);
2222
2223         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2224                 if (h > s_h)
2225                         s_idx = 0;
2226                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2227                      n != NULL;
2228                      n = rcu_dereference_bh(n->next)) {
2229                         if (!net_eq(dev_net(n->dev), net))
2230                                 continue;
2231                         if (idx < s_idx)
2232                                 goto next;
2233                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2234                                             cb->nlh->nlmsg_seq,
2235                                             RTM_NEWNEIGH,
2236                                             NLM_F_MULTI) <= 0) {
2237                                 rc = -1;
2238                                 goto out;
2239                         }
2240 next:
2241                         idx++;
2242                 }
2243         }
2244         rc = skb->len;
2245 out:
2246         rcu_read_unlock_bh();
2247         cb->args[1] = h;
2248         cb->args[2] = idx;
2249         return rc;
2250 }
2251
2252 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2253                              struct netlink_callback *cb)
2254 {
2255         struct pneigh_entry *n;
2256         struct net *net = sock_net(skb->sk);
2257         int rc, h, s_h = cb->args[3];
2258         int idx, s_idx = idx = cb->args[4];
2259
2260         read_lock_bh(&tbl->lock);
2261
2262         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2263                 if (h > s_h)
2264                         s_idx = 0;
2265                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2266                         if (dev_net(n->dev) != net)
2267                                 continue;
2268                         if (idx < s_idx)
2269                                 goto next;
2270                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2271                                             cb->nlh->nlmsg_seq,
2272                                             RTM_NEWNEIGH,
2273                                             NLM_F_MULTI, tbl) <= 0) {
2274                                 read_unlock_bh(&tbl->lock);
2275                                 rc = -1;
2276                                 goto out;
2277                         }
2278                 next:
2279                         idx++;
2280                 }
2281         }
2282
2283         read_unlock_bh(&tbl->lock);
2284         rc = skb->len;
2285 out:
2286         cb->args[3] = h;
2287         cb->args[4] = idx;
2288         return rc;
2289
2290 }
2291
2292 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2293 {
2294         struct neigh_table *tbl;
2295         int t, family, s_t;
2296         int proxy = 0;
2297         int err;
2298
2299         read_lock(&neigh_tbl_lock);
2300         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2301
2302         /* check for full ndmsg structure presence, family member is
2303          * the same for both structures
2304          */
2305         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2306             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2307                 proxy = 1;
2308
2309         s_t = cb->args[0];
2310
2311         for (tbl = neigh_tables, t = 0; tbl;
2312              tbl = tbl->next, t++) {
2313                 if (t < s_t || (family && tbl->family != family))
2314                         continue;
2315                 if (t > s_t)
2316                         memset(&cb->args[1], 0, sizeof(cb->args) -
2317                                                 sizeof(cb->args[0]));
2318                 if (proxy)
2319                         err = pneigh_dump_table(tbl, skb, cb);
2320                 else
2321                         err = neigh_dump_table(tbl, skb, cb);
2322                 if (err < 0)
2323                         break;
2324         }
2325         read_unlock(&neigh_tbl_lock);
2326
2327         cb->args[0] = t;
2328         return skb->len;
2329 }
2330
2331 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2332 {
2333         int chain;
2334         struct neigh_hash_table *nht;
2335
2336         rcu_read_lock_bh();
2337         nht = rcu_dereference_bh(tbl->nht);
2338
2339         read_lock(&tbl->lock); /* avoid resizes */
2340         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2341                 struct neighbour *n;
2342
2343                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2344                      n != NULL;
2345                      n = rcu_dereference_bh(n->next))
2346                         cb(n, cookie);
2347         }
2348         read_unlock(&tbl->lock);
2349         rcu_read_unlock_bh();
2350 }
2351 EXPORT_SYMBOL(neigh_for_each);
2352
2353 /* The tbl->lock must be held as a writer and BH disabled. */
2354 void __neigh_for_each_release(struct neigh_table *tbl,
2355                               int (*cb)(struct neighbour *))
2356 {
2357         int chain;
2358         struct neigh_hash_table *nht;
2359
2360         nht = rcu_dereference_protected(tbl->nht,
2361                                         lockdep_is_held(&tbl->lock));
2362         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2363                 struct neighbour *n;
2364                 struct neighbour __rcu **np;
2365
2366                 np = &nht->hash_buckets[chain];
2367                 while ((n = rcu_dereference_protected(*np,
2368                                         lockdep_is_held(&tbl->lock))) != NULL) {
2369                         int release;
2370
2371                         write_lock(&n->lock);
2372                         release = cb(n);
2373                         if (release) {
2374                                 rcu_assign_pointer(*np,
2375                                         rcu_dereference_protected(n->next,
2376                                                 lockdep_is_held(&tbl->lock)));
2377                                 n->dead = 1;
2378                         } else
2379                                 np = &n->next;
2380                         write_unlock(&n->lock);
2381                         if (release)
2382                                 neigh_cleanup_and_release(n);
2383                 }
2384         }
2385 }
2386 EXPORT_SYMBOL(__neigh_for_each_release);
2387
2388 #ifdef CONFIG_PROC_FS
2389
2390 static struct neighbour *neigh_get_first(struct seq_file *seq)
2391 {
2392         struct neigh_seq_state *state = seq->private;
2393         struct net *net = seq_file_net(seq);
2394         struct neigh_hash_table *nht = state->nht;
2395         struct neighbour *n = NULL;
2396         int bucket = state->bucket;
2397
2398         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2399         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2400                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2401
2402                 while (n) {
2403                         if (!net_eq(dev_net(n->dev), net))
2404                                 goto next;
2405                         if (state->neigh_sub_iter) {
2406                                 loff_t fakep = 0;
2407                                 void *v;
2408
2409                                 v = state->neigh_sub_iter(state, n, &fakep);
2410                                 if (!v)
2411                                         goto next;
2412                         }
2413                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2414                                 break;
2415                         if (n->nud_state & ~NUD_NOARP)
2416                                 break;
2417 next:
2418                         n = rcu_dereference_bh(n->next);
2419                 }
2420
2421                 if (n)
2422                         break;
2423         }
2424         state->bucket = bucket;
2425
2426         return n;
2427 }
2428
2429 static struct neighbour *neigh_get_next(struct seq_file *seq,
2430                                         struct neighbour *n,
2431                                         loff_t *pos)
2432 {
2433         struct neigh_seq_state *state = seq->private;
2434         struct net *net = seq_file_net(seq);
2435         struct neigh_hash_table *nht = state->nht;
2436
2437         if (state->neigh_sub_iter) {
2438                 void *v = state->neigh_sub_iter(state, n, pos);
2439                 if (v)
2440                         return n;
2441         }
2442         n = rcu_dereference_bh(n->next);
2443
2444         while (1) {
2445                 while (n) {
2446                         if (!net_eq(dev_net(n->dev), net))
2447                                 goto next;
2448                         if (state->neigh_sub_iter) {
2449                                 void *v = state->neigh_sub_iter(state, n, pos);
2450                                 if (v)
2451                                         return n;
2452                                 goto next;
2453                         }
2454                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2455                                 break;
2456
2457                         if (n->nud_state & ~NUD_NOARP)
2458                                 break;
2459 next:
2460                         n = rcu_dereference_bh(n->next);
2461                 }
2462
2463                 if (n)
2464                         break;
2465
2466                 if (++state->bucket >= (1 << nht->hash_shift))
2467                         break;
2468
2469                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2470         }
2471
2472         if (n && pos)
2473                 --(*pos);
2474         return n;
2475 }
2476
2477 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2478 {
2479         struct neighbour *n = neigh_get_first(seq);
2480
2481         if (n) {
2482                 --(*pos);
2483                 while (*pos) {
2484                         n = neigh_get_next(seq, n, pos);
2485                         if (!n)
2486                                 break;
2487                 }
2488         }
2489         return *pos ? NULL : n;
2490 }
2491
2492 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2493 {
2494         struct neigh_seq_state *state = seq->private;
2495         struct net *net = seq_file_net(seq);
2496         struct neigh_table *tbl = state->tbl;
2497         struct pneigh_entry *pn = NULL;
2498         int bucket = state->bucket;
2499
2500         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2501         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2502                 pn = tbl->phash_buckets[bucket];
2503                 while (pn && !net_eq(pneigh_net(pn), net))
2504                         pn = pn->next;
2505                 if (pn)
2506                         break;
2507         }
2508         state->bucket = bucket;
2509
2510         return pn;
2511 }
2512
2513 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2514                                             struct pneigh_entry *pn,
2515                                             loff_t *pos)
2516 {
2517         struct neigh_seq_state *state = seq->private;
2518         struct net *net = seq_file_net(seq);
2519         struct neigh_table *tbl = state->tbl;
2520
2521         do {
2522                 pn = pn->next;
2523         } while (pn && !net_eq(pneigh_net(pn), net));
2524
2525         while (!pn) {
2526                 if (++state->bucket > PNEIGH_HASHMASK)
2527                         break;
2528                 pn = tbl->phash_buckets[state->bucket];
2529                 while (pn && !net_eq(pneigh_net(pn), net))
2530                         pn = pn->next;
2531                 if (pn)
2532                         break;
2533         }
2534
2535         if (pn && pos)
2536                 --(*pos);
2537
2538         return pn;
2539 }
2540
2541 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2542 {
2543         struct pneigh_entry *pn = pneigh_get_first(seq);
2544
2545         if (pn) {
2546                 --(*pos);
2547                 while (*pos) {
2548                         pn = pneigh_get_next(seq, pn, pos);
2549                         if (!pn)
2550                                 break;
2551                 }
2552         }
2553         return *pos ? NULL : pn;
2554 }
2555
2556 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2557 {
2558         struct neigh_seq_state *state = seq->private;
2559         void *rc;
2560         loff_t idxpos = *pos;
2561
2562         rc = neigh_get_idx(seq, &idxpos);
2563         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2564                 rc = pneigh_get_idx(seq, &idxpos);
2565
2566         return rc;
2567 }
2568
2569 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2570         __acquires(rcu_bh)
2571 {
2572         struct neigh_seq_state *state = seq->private;
2573
2574         state->tbl = tbl;
2575         state->bucket = 0;
2576         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2577
2578         rcu_read_lock_bh();
2579         state->nht = rcu_dereference_bh(tbl->nht);
2580
2581         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2582 }
2583 EXPORT_SYMBOL(neigh_seq_start);
2584
2585 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2586 {
2587         struct neigh_seq_state *state;
2588         void *rc;
2589
2590         if (v == SEQ_START_TOKEN) {
2591                 rc = neigh_get_first(seq);
2592                 goto out;
2593         }
2594
2595         state = seq->private;
2596         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2597                 rc = neigh_get_next(seq, v, NULL);
2598                 if (rc)
2599                         goto out;
2600                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2601                         rc = pneigh_get_first(seq);
2602         } else {
2603                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2604                 rc = pneigh_get_next(seq, v, NULL);
2605         }
2606 out:
2607         ++(*pos);
2608         return rc;
2609 }
2610 EXPORT_SYMBOL(neigh_seq_next);
2611
2612 void neigh_seq_stop(struct seq_file *seq, void *v)
2613         __releases(rcu_bh)
2614 {
2615         rcu_read_unlock_bh();
2616 }
2617 EXPORT_SYMBOL(neigh_seq_stop);
2618
2619 /* statistics via seq_file */
2620
2621 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2622 {
2623         struct neigh_table *tbl = seq->private;
2624         int cpu;
2625
2626         if (*pos == 0)
2627                 return SEQ_START_TOKEN;
2628
2629         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2630                 if (!cpu_possible(cpu))
2631                         continue;
2632                 *pos = cpu+1;
2633                 return per_cpu_ptr(tbl->stats, cpu);
2634         }
2635         return NULL;
2636 }
2637
2638 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2639 {
2640         struct neigh_table *tbl = seq->private;
2641         int cpu;
2642
2643         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2644                 if (!cpu_possible(cpu))
2645                         continue;
2646                 *pos = cpu+1;
2647                 return per_cpu_ptr(tbl->stats, cpu);
2648         }
2649         return NULL;
2650 }
2651
2652 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2653 {
2654
2655 }
2656
2657 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2658 {
2659         struct neigh_table *tbl = seq->private;
2660         struct neigh_statistics *st = v;
2661
2662         if (v == SEQ_START_TOKEN) {
2663                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2664                 return 0;
2665         }
2666
2667         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2668                         "%08lx %08lx  %08lx %08lx %08lx\n",
2669                    atomic_read(&tbl->entries),
2670
2671                    st->allocs,
2672                    st->destroys,
2673                    st->hash_grows,
2674
2675                    st->lookups,
2676                    st->hits,
2677
2678                    st->res_failed,
2679
2680                    st->rcv_probes_mcast,
2681                    st->rcv_probes_ucast,
2682
2683                    st->periodic_gc_runs,
2684                    st->forced_gc_runs,
2685                    st->unres_discards
2686                    );
2687
2688         return 0;
2689 }
2690
2691 static const struct seq_operations neigh_stat_seq_ops = {
2692         .start  = neigh_stat_seq_start,
2693         .next   = neigh_stat_seq_next,
2694         .stop   = neigh_stat_seq_stop,
2695         .show   = neigh_stat_seq_show,
2696 };
2697
2698 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2699 {
2700         int ret = seq_open(file, &neigh_stat_seq_ops);
2701
2702         if (!ret) {
2703                 struct seq_file *sf = file->private_data;
2704                 sf->private = PDE_DATA(inode);
2705         }
2706         return ret;
2707 };
2708
2709 static const struct file_operations neigh_stat_seq_fops = {
2710         .owner   = THIS_MODULE,
2711         .open    = neigh_stat_seq_open,
2712         .read    = seq_read,
2713         .llseek  = seq_lseek,
2714         .release = seq_release,
2715 };
2716
2717 #endif /* CONFIG_PROC_FS */
2718
2719 static inline size_t neigh_nlmsg_size(void)
2720 {
2721         return NLMSG_ALIGN(sizeof(struct ndmsg))
2722                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2723                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2724                + nla_total_size(sizeof(struct nda_cacheinfo))
2725                + nla_total_size(4); /* NDA_PROBES */
2726 }
2727
2728 static void __neigh_notify(struct neighbour *n, int type, int flags)
2729 {
2730         struct net *net = dev_net(n->dev);
2731         struct sk_buff *skb;
2732         int err = -ENOBUFS;
2733
2734         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2735         if (skb == NULL)
2736                 goto errout;
2737
2738         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2739         if (err < 0) {
2740                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2741                 WARN_ON(err == -EMSGSIZE);
2742                 kfree_skb(skb);
2743                 goto errout;
2744         }
2745         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2746         return;
2747 errout:
2748         if (err < 0)
2749                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2750 }
2751
2752 #ifdef CONFIG_ARPD
2753 void neigh_app_ns(struct neighbour *n)
2754 {
2755         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2756 }
2757 EXPORT_SYMBOL(neigh_app_ns);
2758 #endif /* CONFIG_ARPD */
2759
2760 #ifdef CONFIG_SYSCTL
2761 static int zero;
2762 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2763
2764 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2765                            void __user *buffer, size_t *lenp, loff_t *ppos)
2766 {
2767         int size, ret;
2768         struct ctl_table tmp = *ctl;
2769
2770         tmp.extra1 = &zero;
2771         tmp.extra2 = &unres_qlen_max;
2772         tmp.data = &size;
2773
2774         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2775         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2776
2777         if (write && !ret)
2778                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2779         return ret;
2780 }
2781
2782 enum {
2783         NEIGH_VAR_MCAST_PROBE,
2784         NEIGH_VAR_UCAST_PROBE,
2785         NEIGH_VAR_APP_PROBE,
2786         NEIGH_VAR_RETRANS_TIME,
2787         NEIGH_VAR_BASE_REACHABLE_TIME,
2788         NEIGH_VAR_DELAY_PROBE_TIME,
2789         NEIGH_VAR_GC_STALETIME,
2790         NEIGH_VAR_QUEUE_LEN,
2791         NEIGH_VAR_QUEUE_LEN_BYTES,
2792         NEIGH_VAR_PROXY_QLEN,
2793         NEIGH_VAR_ANYCAST_DELAY,
2794         NEIGH_VAR_PROXY_DELAY,
2795         NEIGH_VAR_LOCKTIME,
2796         NEIGH_VAR_RETRANS_TIME_MS,
2797         NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2798         NEIGH_VAR_GC_INTERVAL,
2799         NEIGH_VAR_GC_THRESH1,
2800         NEIGH_VAR_GC_THRESH2,
2801         NEIGH_VAR_GC_THRESH3,
2802         NEIGH_VAR_MAX
2803 };
2804
2805 static struct neigh_sysctl_table {
2806         struct ctl_table_header *sysctl_header;
2807         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2808 } neigh_sysctl_template __read_mostly = {
2809         .neigh_vars = {
2810                 [NEIGH_VAR_MCAST_PROBE] = {
2811                         .procname       = "mcast_solicit",
2812                         .maxlen         = sizeof(int),
2813                         .mode           = 0644,
2814                         .proc_handler   = proc_dointvec,
2815                 },
2816                 [NEIGH_VAR_UCAST_PROBE] = {
2817                         .procname       = "ucast_solicit",
2818                         .maxlen         = sizeof(int),
2819                         .mode           = 0644,
2820                         .proc_handler   = proc_dointvec,
2821                 },
2822                 [NEIGH_VAR_APP_PROBE] = {
2823                         .procname       = "app_solicit",
2824                         .maxlen         = sizeof(int),
2825                         .mode           = 0644,
2826                         .proc_handler   = proc_dointvec,
2827                 },
2828                 [NEIGH_VAR_RETRANS_TIME] = {
2829                         .procname       = "retrans_time",
2830                         .maxlen         = sizeof(int),
2831                         .mode           = 0644,
2832                         .proc_handler   = proc_dointvec_userhz_jiffies,
2833                 },
2834                 [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2835                         .procname       = "base_reachable_time",
2836                         .maxlen         = sizeof(int),
2837                         .mode           = 0644,
2838                         .proc_handler   = proc_dointvec_jiffies,
2839                 },
2840                 [NEIGH_VAR_DELAY_PROBE_TIME] = {
2841                         .procname       = "delay_first_probe_time",
2842                         .maxlen         = sizeof(int),
2843                         .mode           = 0644,
2844                         .proc_handler   = proc_dointvec_jiffies,
2845                 },
2846                 [NEIGH_VAR_GC_STALETIME] = {
2847                         .procname       = "gc_stale_time",
2848                         .maxlen         = sizeof(int),
2849                         .mode           = 0644,
2850                         .proc_handler   = proc_dointvec_jiffies,
2851                 },
2852                 [NEIGH_VAR_QUEUE_LEN] = {
2853                         .procname       = "unres_qlen",
2854                         .maxlen         = sizeof(int),
2855                         .mode           = 0644,
2856                         .proc_handler   = proc_unres_qlen,
2857                 },
2858                 [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2859                         .procname       = "unres_qlen_bytes",
2860                         .maxlen         = sizeof(int),
2861                         .mode           = 0644,
2862                         .extra1         = &zero,
2863                         .proc_handler   = proc_dointvec_minmax,
2864                 },
2865                 [NEIGH_VAR_PROXY_QLEN] = {
2866                         .procname       = "proxy_qlen",
2867                         .maxlen         = sizeof(int),
2868                         .mode           = 0644,
2869                         .proc_handler   = proc_dointvec,
2870                 },
2871                 [NEIGH_VAR_ANYCAST_DELAY] = {
2872                         .procname       = "anycast_delay",
2873                         .maxlen         = sizeof(int),
2874                         .mode           = 0644,
2875                         .proc_handler   = proc_dointvec_userhz_jiffies,
2876                 },
2877                 [NEIGH_VAR_PROXY_DELAY] = {
2878                         .procname       = "proxy_delay",
2879                         .maxlen         = sizeof(int),
2880                         .mode           = 0644,
2881                         .proc_handler   = proc_dointvec_userhz_jiffies,
2882                 },
2883                 [NEIGH_VAR_LOCKTIME] = {
2884                         .procname       = "locktime",
2885                         .maxlen         = sizeof(int),
2886                         .mode           = 0644,
2887                         .proc_handler   = proc_dointvec_userhz_jiffies,
2888                 },
2889                 [NEIGH_VAR_RETRANS_TIME_MS] = {
2890                         .procname       = "retrans_time_ms",
2891                         .maxlen         = sizeof(int),
2892                         .mode           = 0644,
2893                         .proc_handler   = proc_dointvec_ms_jiffies,
2894                 },
2895                 [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2896                         .procname       = "base_reachable_time_ms",
2897                         .maxlen         = sizeof(int),
2898                         .mode           = 0644,
2899                         .proc_handler   = proc_dointvec_ms_jiffies,
2900                 },
2901                 [NEIGH_VAR_GC_INTERVAL] = {
2902                         .procname       = "gc_interval",
2903                         .maxlen         = sizeof(int),
2904                         .mode           = 0644,
2905                         .proc_handler   = proc_dointvec_jiffies,
2906                 },
2907                 [NEIGH_VAR_GC_THRESH1] = {
2908                         .procname       = "gc_thresh1",
2909                         .maxlen         = sizeof(int),
2910                         .mode           = 0644,
2911                         .proc_handler   = proc_dointvec,
2912                 },
2913                 [NEIGH_VAR_GC_THRESH2] = {
2914                         .procname       = "gc_thresh2",
2915                         .maxlen         = sizeof(int),
2916                         .mode           = 0644,
2917                         .proc_handler   = proc_dointvec,
2918                 },
2919                 [NEIGH_VAR_GC_THRESH3] = {
2920                         .procname       = "gc_thresh3",
2921                         .maxlen         = sizeof(int),
2922                         .mode           = 0644,
2923                         .proc_handler   = proc_dointvec,
2924                 },
2925                 {},
2926         },
2927 };
2928
2929 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2930                           char *p_name, proc_handler *handler)
2931 {
2932         struct neigh_sysctl_table *t;
2933         const char *dev_name_source = NULL;
2934         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2935
2936         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2937         if (!t)
2938                 goto err;
2939
2940         t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2941         t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2942         t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2943         t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2944         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2945         t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2946         t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2947         t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2948         t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2949         t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2950         t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2951         t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2952         t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2953         t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2954         t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2955
2956         if (dev) {
2957                 dev_name_source = dev->name;
2958                 /* Terminate the table early */
2959                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2960                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2961         } else {
2962                 dev_name_source = "default";
2963                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2964                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2965                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2966                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2967         }
2968
2969
2970         if (handler) {
2971                 /* RetransTime */
2972                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2973                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2974                 /* ReachableTime */
2975                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2976                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2977                 /* RetransTime (in milliseconds)*/
2978                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2979                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2980                 /* ReachableTime (in milliseconds) */
2981                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2982                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2983         }
2984
2985         /* Don't export sysctls to unprivileged users */
2986         if (neigh_parms_net(p)->user_ns != &init_user_ns)
2987                 t->neigh_vars[0].procname = NULL;
2988
2989         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2990                 p_name, dev_name_source);
2991         t->sysctl_header =
2992                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2993         if (!t->sysctl_header)
2994                 goto free;
2995
2996         p->sysctl_table = t;
2997         return 0;
2998
2999 free:
3000         kfree(t);
3001 err:
3002         return -ENOBUFS;
3003 }
3004 EXPORT_SYMBOL(neigh_sysctl_register);
3005
3006 void neigh_sysctl_unregister(struct neigh_parms *p)
3007 {
3008         if (p->sysctl_table) {
3009                 struct neigh_sysctl_table *t = p->sysctl_table;
3010                 p->sysctl_table = NULL;
3011                 unregister_net_sysctl_table(t->sysctl_header);
3012                 kfree(t);
3013         }
3014 }
3015 EXPORT_SYMBOL(neigh_sysctl_unregister);
3016
3017 #endif  /* CONFIG_SYSCTL */
3018
3019 static int __init neigh_init(void)
3020 {
3021         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3022         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3023         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3024
3025         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3026                       NULL);
3027         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3028
3029         return 0;
3030 }
3031
3032 subsys_initcall(neigh_init);
3033