]> Pileus Git - ~andy/linux/blob - include/linux/netfilter/ipset/ip_set_ahash.h
Merge remote-tracking branch 'regulator/topic/lp8788' into v3.9-rc8
[~andy/linux] / include / linux / netfilter / ipset / ip_set_ahash.h
1 #ifndef _IP_SET_AHASH_H
2 #define _IP_SET_AHASH_H
3
4 #include <linux/rcupdate.h>
5 #include <linux/jhash.h>
6 #include <linux/netfilter/ipset/ip_set_timeout.h>
7
8 #define CONCAT(a, b, c)         a##b##c
9 #define TOKEN(a, b, c)          CONCAT(a, b, c)
10
11 #define type_pf_next            TOKEN(TYPE, PF, _elem)
12
13 /* Hashing which uses arrays to resolve clashing. The hash table is resized
14  * (doubled) when searching becomes too long.
15  * Internally jhash is used with the assumption that the size of the
16  * stored data is a multiple of sizeof(u32). If storage supports timeout,
17  * the timeout field must be the last one in the data structure - that field
18  * is ignored when computing the hash key.
19  *
20  * Readers and resizing
21  *
22  * Resizing can be triggered by userspace command only, and those
23  * are serialized by the nfnl mutex. During resizing the set is
24  * read-locked, so the only possible concurrent operations are
25  * the kernel side readers. Those must be protected by proper RCU locking.
26  */
27
28 /* Number of elements to store in an initial array block */
29 #define AHASH_INIT_SIZE                 4
30 /* Max number of elements to store in an array block */
31 #define AHASH_MAX_SIZE                  (3*AHASH_INIT_SIZE)
32
33 /* Max number of elements can be tuned */
34 #ifdef IP_SET_HASH_WITH_MULTI
35 #define AHASH_MAX(h)                    ((h)->ahash_max)
36
37 static inline u8
38 tune_ahash_max(u8 curr, u32 multi)
39 {
40         u32 n;
41
42         if (multi < curr)
43                 return curr;
44
45         n = curr + AHASH_INIT_SIZE;
46         /* Currently, at listing one hash bucket must fit into a message.
47          * Therefore we have a hard limit here.
48          */
49         return n > curr && n <= 64 ? n : curr;
50 }
51 #define TUNE_AHASH_MAX(h, multi)        \
52         ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
53 #else
54 #define AHASH_MAX(h)                    AHASH_MAX_SIZE
55 #define TUNE_AHASH_MAX(h, multi)
56 #endif
57
58 /* A hash bucket */
59 struct hbucket {
60         void *value;            /* the array of the values */
61         u8 size;                /* size of the array */
62         u8 pos;                 /* position of the first free entry */
63 };
64
65 /* The hash table: the table size stored here in order to make resizing easy */
66 struct htable {
67         u8 htable_bits;         /* size of hash table == 2^htable_bits */
68         struct hbucket bucket[0]; /* hashtable buckets */
69 };
70
71 #define hbucket(h, i)           (&((h)->bucket[i]))
72
73 /* Book-keeping of the prefixes added to the set */
74 struct ip_set_hash_nets {
75         u8 cidr;                /* the different cidr values in the set */
76         u32 nets;               /* number of elements per cidr */
77 };
78
79 /* The generic ip_set hash structure */
80 struct ip_set_hash {
81         struct htable *table;   /* the hash table */
82         u32 maxelem;            /* max elements in the hash */
83         u32 elements;           /* current element (vs timeout) */
84         u32 initval;            /* random jhash init value */
85         u32 timeout;            /* timeout value, if enabled */
86         struct timer_list gc;   /* garbage collection when timeout enabled */
87         struct type_pf_next next; /* temporary storage for uadd */
88 #ifdef IP_SET_HASH_WITH_MULTI
89         u8 ahash_max;           /* max elements in an array block */
90 #endif
91 #ifdef IP_SET_HASH_WITH_NETMASK
92         u8 netmask;             /* netmask value for subnets to store */
93 #endif
94 #ifdef IP_SET_HASH_WITH_RBTREE
95         struct rb_root rbtree;
96 #endif
97 #ifdef IP_SET_HASH_WITH_NETS
98         struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
99 #endif
100 };
101
102 static size_t
103 htable_size(u8 hbits)
104 {
105         size_t hsize;
106
107         /* We must fit both into u32 in jhash and size_t */
108         if (hbits > 31)
109                 return 0;
110         hsize = jhash_size(hbits);
111         if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
112             < hsize)
113                 return 0;
114
115         return hsize * sizeof(struct hbucket) + sizeof(struct htable);
116 }
117
118 /* Compute htable_bits from the user input parameter hashsize */
119 static u8
120 htable_bits(u32 hashsize)
121 {
122         /* Assume that hashsize == 2^htable_bits */
123         u8 bits = fls(hashsize - 1);
124         if (jhash_size(bits) != hashsize)
125                 /* Round up to the first 2^n value */
126                 bits = fls(hashsize);
127
128         return bits;
129 }
130
131 #ifdef IP_SET_HASH_WITH_NETS
132 #ifdef IP_SET_HASH_WITH_NETS_PACKED
133 /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
134 #define CIDR(cidr)      (cidr + 1)
135 #else
136 #define CIDR(cidr)      (cidr)
137 #endif
138
139 #define SET_HOST_MASK(family)   (family == AF_INET ? 32 : 128)
140 #ifdef IP_SET_HASH_WITH_MULTI
141 #define NETS_LENGTH(family)     (SET_HOST_MASK(family) + 1)
142 #else
143 #define NETS_LENGTH(family)     SET_HOST_MASK(family)
144 #endif
145
146 /* Network cidr size book keeping when the hash stores different
147  * sized networks */
148 static void
149 add_cidr(struct ip_set_hash *h, u8 cidr, u8 nets_length)
150 {
151         int i, j;
152
153         /* Add in increasing prefix order, so larger cidr first */
154         for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
155                 if (j != -1)
156                         continue;
157                 else if (h->nets[i].cidr < cidr)
158                         j = i;
159                 else if (h->nets[i].cidr == cidr) {
160                         h->nets[i].nets++;
161                         return;
162                 }
163         }
164         if (j != -1) {
165                 for (; i > j; i--) {
166                         h->nets[i].cidr = h->nets[i - 1].cidr;
167                         h->nets[i].nets = h->nets[i - 1].nets;
168                 }
169         }
170         h->nets[i].cidr = cidr;
171         h->nets[i].nets = 1;
172 }
173
174 static void
175 del_cidr(struct ip_set_hash *h, u8 cidr, u8 nets_length)
176 {
177         u8 i, j;
178
179         for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
180                 ;
181         h->nets[i].nets--;
182
183         if (h->nets[i].nets != 0)
184                 return;
185
186         for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
187                 h->nets[j].cidr = h->nets[j + 1].cidr;
188                 h->nets[j].nets = h->nets[j + 1].nets;
189         }
190 }
191 #else
192 #define NETS_LENGTH(family)             0
193 #endif
194
195 /* Destroy the hashtable part of the set */
196 static void
197 ahash_destroy(struct htable *t)
198 {
199         struct hbucket *n;
200         u32 i;
201
202         for (i = 0; i < jhash_size(t->htable_bits); i++) {
203                 n = hbucket(t, i);
204                 if (n->size)
205                         /* FIXME: use slab cache */
206                         kfree(n->value);
207         }
208
209         ip_set_free(t);
210 }
211
212 /* Calculate the actual memory size of the set data */
213 static size_t
214 ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 nets_length)
215 {
216         u32 i;
217         struct htable *t = h->table;
218         size_t memsize = sizeof(*h)
219                          + sizeof(*t)
220 #ifdef IP_SET_HASH_WITH_NETS
221                          + sizeof(struct ip_set_hash_nets) * nets_length
222 #endif
223                          + jhash_size(t->htable_bits) * sizeof(struct hbucket);
224
225         for (i = 0; i < jhash_size(t->htable_bits); i++)
226                         memsize += t->bucket[i].size * dsize;
227
228         return memsize;
229 }
230
231 /* Flush a hash type of set: destroy all elements */
232 static void
233 ip_set_hash_flush(struct ip_set *set)
234 {
235         struct ip_set_hash *h = set->data;
236         struct htable *t = h->table;
237         struct hbucket *n;
238         u32 i;
239
240         for (i = 0; i < jhash_size(t->htable_bits); i++) {
241                 n = hbucket(t, i);
242                 if (n->size) {
243                         n->size = n->pos = 0;
244                         /* FIXME: use slab cache */
245                         kfree(n->value);
246                 }
247         }
248 #ifdef IP_SET_HASH_WITH_NETS
249         memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
250                            * NETS_LENGTH(set->family));
251 #endif
252         h->elements = 0;
253 }
254
255 /* Destroy a hash type of set */
256 static void
257 ip_set_hash_destroy(struct ip_set *set)
258 {
259         struct ip_set_hash *h = set->data;
260
261         if (with_timeout(h->timeout))
262                 del_timer_sync(&h->gc);
263
264         ahash_destroy(h->table);
265 #ifdef IP_SET_HASH_WITH_RBTREE
266         rbtree_destroy(&h->rbtree);
267 #endif
268         kfree(h);
269
270         set->data = NULL;
271 }
272
273 #endif /* _IP_SET_AHASH_H */
274
275 #ifndef HKEY_DATALEN
276 #define HKEY_DATALEN    sizeof(struct type_pf_elem)
277 #endif
278
279 #define HKEY(data, initval, htable_bits)                        \
280 (jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval)       \
281         & jhash_mask(htable_bits))
282
283 /* Type/family dependent function prototypes */
284
285 #define type_pf_data_equal      TOKEN(TYPE, PF, _data_equal)
286 #define type_pf_data_isnull     TOKEN(TYPE, PF, _data_isnull)
287 #define type_pf_data_copy       TOKEN(TYPE, PF, _data_copy)
288 #define type_pf_data_zero_out   TOKEN(TYPE, PF, _data_zero_out)
289 #define type_pf_data_netmask    TOKEN(TYPE, PF, _data_netmask)
290 #define type_pf_data_list       TOKEN(TYPE, PF, _data_list)
291 #define type_pf_data_tlist      TOKEN(TYPE, PF, _data_tlist)
292 #define type_pf_data_next       TOKEN(TYPE, PF, _data_next)
293 #define type_pf_data_flags      TOKEN(TYPE, PF, _data_flags)
294 #define type_pf_data_reset_flags TOKEN(TYPE, PF, _data_reset_flags)
295 #ifdef IP_SET_HASH_WITH_NETS
296 #define type_pf_data_match      TOKEN(TYPE, PF, _data_match)
297 #else
298 #define type_pf_data_match(d)   1
299 #endif
300
301 #define type_pf_elem            TOKEN(TYPE, PF, _elem)
302 #define type_pf_telem           TOKEN(TYPE, PF, _telem)
303 #define type_pf_data_timeout    TOKEN(TYPE, PF, _data_timeout)
304 #define type_pf_data_expired    TOKEN(TYPE, PF, _data_expired)
305 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
306
307 #define type_pf_elem_add        TOKEN(TYPE, PF, _elem_add)
308 #define type_pf_add             TOKEN(TYPE, PF, _add)
309 #define type_pf_del             TOKEN(TYPE, PF, _del)
310 #define type_pf_test_cidrs      TOKEN(TYPE, PF, _test_cidrs)
311 #define type_pf_test            TOKEN(TYPE, PF, _test)
312
313 #define type_pf_elem_tadd       TOKEN(TYPE, PF, _elem_tadd)
314 #define type_pf_del_telem       TOKEN(TYPE, PF, _ahash_del_telem)
315 #define type_pf_expire          TOKEN(TYPE, PF, _expire)
316 #define type_pf_tadd            TOKEN(TYPE, PF, _tadd)
317 #define type_pf_tdel            TOKEN(TYPE, PF, _tdel)
318 #define type_pf_ttest_cidrs     TOKEN(TYPE, PF, _ahash_ttest_cidrs)
319 #define type_pf_ttest           TOKEN(TYPE, PF, _ahash_ttest)
320
321 #define type_pf_resize          TOKEN(TYPE, PF, _resize)
322 #define type_pf_tresize         TOKEN(TYPE, PF, _tresize)
323 #define type_pf_flush           ip_set_hash_flush
324 #define type_pf_destroy         ip_set_hash_destroy
325 #define type_pf_head            TOKEN(TYPE, PF, _head)
326 #define type_pf_list            TOKEN(TYPE, PF, _list)
327 #define type_pf_tlist           TOKEN(TYPE, PF, _tlist)
328 #define type_pf_same_set        TOKEN(TYPE, PF, _same_set)
329 #define type_pf_kadt            TOKEN(TYPE, PF, _kadt)
330 #define type_pf_uadt            TOKEN(TYPE, PF, _uadt)
331 #define type_pf_gc              TOKEN(TYPE, PF, _gc)
332 #define type_pf_gc_init         TOKEN(TYPE, PF, _gc_init)
333 #define type_pf_variant         TOKEN(TYPE, PF, _variant)
334 #define type_pf_tvariant        TOKEN(TYPE, PF, _tvariant)
335
336 /* Flavour without timeout */
337
338 /* Get the ith element from the array block n */
339 #define ahash_data(n, i)        \
340         ((struct type_pf_elem *)((n)->value) + (i))
341
342 /* Add an element to the hash table when resizing the set:
343  * we spare the maintenance of the internal counters. */
344 static int
345 type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value,
346                  u8 ahash_max, u32 cadt_flags)
347 {
348         struct type_pf_elem *data;
349
350         if (n->pos >= n->size) {
351                 void *tmp;
352
353                 if (n->size >= ahash_max)
354                         /* Trigger rehashing */
355                         return -EAGAIN;
356
357                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
358                               * sizeof(struct type_pf_elem),
359                               GFP_ATOMIC);
360                 if (!tmp)
361                         return -ENOMEM;
362                 if (n->size) {
363                         memcpy(tmp, n->value,
364                                sizeof(struct type_pf_elem) * n->size);
365                         kfree(n->value);
366                 }
367                 n->value = tmp;
368                 n->size += AHASH_INIT_SIZE;
369         }
370         data = ahash_data(n, n->pos++);
371         type_pf_data_copy(data, value);
372 #ifdef IP_SET_HASH_WITH_NETS
373         /* Resizing won't overwrite stored flags */
374         if (cadt_flags)
375                 type_pf_data_flags(data, cadt_flags);
376 #endif
377         return 0;
378 }
379
380 /* Resize a hash: create a new hash table with doubling the hashsize
381  * and inserting the elements to it. Repeat until we succeed or
382  * fail due to memory pressures. */
383 static int
384 type_pf_resize(struct ip_set *set, bool retried)
385 {
386         struct ip_set_hash *h = set->data;
387         struct htable *t, *orig = h->table;
388         u8 htable_bits = orig->htable_bits;
389         struct type_pf_elem *data;
390         struct hbucket *n, *m;
391         u32 i, j, flags = 0;
392         int ret;
393
394 retry:
395         ret = 0;
396         htable_bits++;
397         pr_debug("attempt to resize set %s from %u to %u, t %p\n",
398                  set->name, orig->htable_bits, htable_bits, orig);
399         if (!htable_bits) {
400                 /* In case we have plenty of memory :-) */
401                 pr_warning("Cannot increase the hashsize of set %s further\n",
402                            set->name);
403                 return -IPSET_ERR_HASH_FULL;
404         }
405         t = ip_set_alloc(sizeof(*t)
406                          + jhash_size(htable_bits) * sizeof(struct hbucket));
407         if (!t)
408                 return -ENOMEM;
409         t->htable_bits = htable_bits;
410
411         read_lock_bh(&set->lock);
412         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
413                 n = hbucket(orig, i);
414                 for (j = 0; j < n->pos; j++) {
415                         data = ahash_data(n, j);
416 #ifdef IP_SET_HASH_WITH_NETS
417                         flags = 0;
418                         type_pf_data_reset_flags(data, &flags);
419 #endif
420                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
421                         ret = type_pf_elem_add(m, data, AHASH_MAX(h), flags);
422                         if (ret < 0) {
423 #ifdef IP_SET_HASH_WITH_NETS
424                                 type_pf_data_flags(data, flags);
425 #endif
426                                 read_unlock_bh(&set->lock);
427                                 ahash_destroy(t);
428                                 if (ret == -EAGAIN)
429                                         goto retry;
430                                 return ret;
431                         }
432                 }
433         }
434
435         rcu_assign_pointer(h->table, t);
436         read_unlock_bh(&set->lock);
437
438         /* Give time to other readers of the set */
439         synchronize_rcu_bh();
440
441         pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
442                  orig->htable_bits, orig, t->htable_bits, t);
443         ahash_destroy(orig);
444
445         return 0;
446 }
447
448 static inline void
449 type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d);
450
451 /* Add an element to a hash and update the internal counters when succeeded,
452  * otherwise report the proper error code. */
453 static int
454 type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
455 {
456         struct ip_set_hash *h = set->data;
457         struct htable *t;
458         const struct type_pf_elem *d = value;
459         struct hbucket *n;
460         int i, ret = 0;
461         u32 key, multi = 0;
462         u32 cadt_flags = flags >> 16;
463
464         if (h->elements >= h->maxelem) {
465                 if (net_ratelimit())
466                         pr_warning("Set %s is full, maxelem %u reached\n",
467                                    set->name, h->maxelem);
468                 return -IPSET_ERR_HASH_FULL;
469         }
470
471         rcu_read_lock_bh();
472         t = rcu_dereference_bh(h->table);
473         key = HKEY(value, h->initval, t->htable_bits);
474         n = hbucket(t, key);
475         for (i = 0; i < n->pos; i++)
476                 if (type_pf_data_equal(ahash_data(n, i), d, &multi)) {
477 #ifdef IP_SET_HASH_WITH_NETS
478                         if (flags & IPSET_FLAG_EXIST)
479                                 /* Support overwriting just the flags */
480                                 type_pf_data_flags(ahash_data(n, i),
481                                                    cadt_flags);
482 #endif
483                         ret = -IPSET_ERR_EXIST;
484                         goto out;
485                 }
486         TUNE_AHASH_MAX(h, multi);
487         ret = type_pf_elem_add(n, value, AHASH_MAX(h), cadt_flags);
488         if (ret != 0) {
489                 if (ret == -EAGAIN)
490                         type_pf_data_next(h, d);
491                 goto out;
492         }
493
494 #ifdef IP_SET_HASH_WITH_NETS
495         add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
496 #endif
497         h->elements++;
498 out:
499         rcu_read_unlock_bh();
500         return ret;
501 }
502
503 /* Delete an element from the hash: swap it with the last element
504  * and free up space if possible.
505  */
506 static int
507 type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
508 {
509         struct ip_set_hash *h = set->data;
510         struct htable *t = h->table;
511         const struct type_pf_elem *d = value;
512         struct hbucket *n;
513         int i;
514         struct type_pf_elem *data;
515         u32 key, multi = 0;
516
517         key = HKEY(value, h->initval, t->htable_bits);
518         n = hbucket(t, key);
519         for (i = 0; i < n->pos; i++) {
520                 data = ahash_data(n, i);
521                 if (!type_pf_data_equal(data, d, &multi))
522                         continue;
523                 if (i != n->pos - 1)
524                         /* Not last one */
525                         type_pf_data_copy(data, ahash_data(n, n->pos - 1));
526
527                 n->pos--;
528                 h->elements--;
529 #ifdef IP_SET_HASH_WITH_NETS
530                 del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
531 #endif
532                 if (n->pos + AHASH_INIT_SIZE < n->size) {
533                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
534                                             * sizeof(struct type_pf_elem),
535                                             GFP_ATOMIC);
536                         if (!tmp)
537                                 return 0;
538                         n->size -= AHASH_INIT_SIZE;
539                         memcpy(tmp, n->value,
540                                n->size * sizeof(struct type_pf_elem));
541                         kfree(n->value);
542                         n->value = tmp;
543                 }
544                 return 0;
545         }
546
547         return -IPSET_ERR_EXIST;
548 }
549
550 #ifdef IP_SET_HASH_WITH_NETS
551
552 /* Special test function which takes into account the different network
553  * sizes added to the set */
554 static int
555 type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
556 {
557         struct ip_set_hash *h = set->data;
558         struct htable *t = h->table;
559         struct hbucket *n;
560         const struct type_pf_elem *data;
561         int i, j = 0;
562         u32 key, multi = 0;
563         u8 nets_length = NETS_LENGTH(set->family);
564
565         pr_debug("test by nets\n");
566         for (; j < nets_length && h->nets[j].nets && !multi; j++) {
567                 type_pf_data_netmask(d, h->nets[j].cidr);
568                 key = HKEY(d, h->initval, t->htable_bits);
569                 n = hbucket(t, key);
570                 for (i = 0; i < n->pos; i++) {
571                         data = ahash_data(n, i);
572                         if (type_pf_data_equal(data, d, &multi))
573                                 return type_pf_data_match(data);
574                 }
575         }
576         return 0;
577 }
578 #endif
579
580 /* Test whether the element is added to the set */
581 static int
582 type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
583 {
584         struct ip_set_hash *h = set->data;
585         struct htable *t = h->table;
586         struct type_pf_elem *d = value;
587         struct hbucket *n;
588         const struct type_pf_elem *data;
589         int i;
590         u32 key, multi = 0;
591
592 #ifdef IP_SET_HASH_WITH_NETS
593         /* If we test an IP address and not a network address,
594          * try all possible network sizes */
595         if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
596                 return type_pf_test_cidrs(set, d, timeout);
597 #endif
598
599         key = HKEY(d, h->initval, t->htable_bits);
600         n = hbucket(t, key);
601         for (i = 0; i < n->pos; i++) {
602                 data = ahash_data(n, i);
603                 if (type_pf_data_equal(data, d, &multi))
604                         return type_pf_data_match(data);
605         }
606         return 0;
607 }
608
609 /* Reply a HEADER request: fill out the header part of the set */
610 static int
611 type_pf_head(struct ip_set *set, struct sk_buff *skb)
612 {
613         const struct ip_set_hash *h = set->data;
614         struct nlattr *nested;
615         size_t memsize;
616
617         read_lock_bh(&set->lock);
618         memsize = ahash_memsize(h, with_timeout(h->timeout)
619                                         ? sizeof(struct type_pf_telem)
620                                         : sizeof(struct type_pf_elem),
621                                 NETS_LENGTH(set->family));
622         read_unlock_bh(&set->lock);
623
624         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
625         if (!nested)
626                 goto nla_put_failure;
627         if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
628                           htonl(jhash_size(h->table->htable_bits))) ||
629             nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
630                 goto nla_put_failure;
631 #ifdef IP_SET_HASH_WITH_NETMASK
632         if (h->netmask != HOST_MASK &&
633             nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
634                 goto nla_put_failure;
635 #endif
636         if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
637             nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
638             (with_timeout(h->timeout) &&
639              nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))))
640                 goto nla_put_failure;
641         ipset_nest_end(skb, nested);
642
643         return 0;
644 nla_put_failure:
645         return -EMSGSIZE;
646 }
647
648 /* Reply a LIST/SAVE request: dump the elements of the specified set */
649 static int
650 type_pf_list(const struct ip_set *set,
651              struct sk_buff *skb, struct netlink_callback *cb)
652 {
653         const struct ip_set_hash *h = set->data;
654         const struct htable *t = h->table;
655         struct nlattr *atd, *nested;
656         const struct hbucket *n;
657         const struct type_pf_elem *data;
658         u32 first = cb->args[2];
659         /* We assume that one hash bucket fills into one page */
660         void *incomplete;
661         int i;
662
663         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
664         if (!atd)
665                 return -EMSGSIZE;
666         pr_debug("list hash set %s\n", set->name);
667         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
668                 incomplete = skb_tail_pointer(skb);
669                 n = hbucket(t, cb->args[2]);
670                 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
671                 for (i = 0; i < n->pos; i++) {
672                         data = ahash_data(n, i);
673                         pr_debug("list hash %lu hbucket %p i %u, data %p\n",
674                                  cb->args[2], n, i, data);
675                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
676                         if (!nested) {
677                                 if (cb->args[2] == first) {
678                                         nla_nest_cancel(skb, atd);
679                                         return -EMSGSIZE;
680                                 } else
681                                         goto nla_put_failure;
682                         }
683                         if (type_pf_data_list(skb, data))
684                                 goto nla_put_failure;
685                         ipset_nest_end(skb, nested);
686                 }
687         }
688         ipset_nest_end(skb, atd);
689         /* Set listing finished */
690         cb->args[2] = 0;
691
692         return 0;
693
694 nla_put_failure:
695         nlmsg_trim(skb, incomplete);
696         ipset_nest_end(skb, atd);
697         if (unlikely(first == cb->args[2])) {
698                 pr_warning("Can't list set %s: one bucket does not fit into "
699                            "a message. Please report it!\n", set->name);
700                 cb->args[2] = 0;
701                 return -EMSGSIZE;
702         }
703         return 0;
704 }
705
706 static int
707 type_pf_kadt(struct ip_set *set, const struct sk_buff *skb,
708              const struct xt_action_param *par,
709              enum ipset_adt adt, const struct ip_set_adt_opt *opt);
710 static int
711 type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
712              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
713
714 static const struct ip_set_type_variant type_pf_variant = {
715         .kadt   = type_pf_kadt,
716         .uadt   = type_pf_uadt,
717         .adt    = {
718                 [IPSET_ADD] = type_pf_add,
719                 [IPSET_DEL] = type_pf_del,
720                 [IPSET_TEST] = type_pf_test,
721         },
722         .destroy = type_pf_destroy,
723         .flush  = type_pf_flush,
724         .head   = type_pf_head,
725         .list   = type_pf_list,
726         .resize = type_pf_resize,
727         .same_set = type_pf_same_set,
728 };
729
730 /* Flavour with timeout support */
731
732 #define ahash_tdata(n, i) \
733         (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
734
735 static inline u32
736 type_pf_data_timeout(const struct type_pf_elem *data)
737 {
738         const struct type_pf_telem *tdata =
739                 (const struct type_pf_telem *) data;
740
741         return tdata->timeout;
742 }
743
744 static inline bool
745 type_pf_data_expired(const struct type_pf_elem *data)
746 {
747         const struct type_pf_telem *tdata =
748                 (const struct type_pf_telem *) data;
749
750         return ip_set_timeout_expired(tdata->timeout);
751 }
752
753 static inline void
754 type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
755 {
756         struct type_pf_telem *tdata = (struct type_pf_telem *) data;
757
758         tdata->timeout = ip_set_timeout_set(timeout);
759 }
760
761 static int
762 type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
763                   u8 ahash_max, u32 cadt_flags, u32 timeout)
764 {
765         struct type_pf_elem *data;
766
767         if (n->pos >= n->size) {
768                 void *tmp;
769
770                 if (n->size >= ahash_max)
771                         /* Trigger rehashing */
772                         return -EAGAIN;
773
774                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
775                               * sizeof(struct type_pf_telem),
776                               GFP_ATOMIC);
777                 if (!tmp)
778                         return -ENOMEM;
779                 if (n->size) {
780                         memcpy(tmp, n->value,
781                                sizeof(struct type_pf_telem) * n->size);
782                         kfree(n->value);
783                 }
784                 n->value = tmp;
785                 n->size += AHASH_INIT_SIZE;
786         }
787         data = ahash_tdata(n, n->pos++);
788         type_pf_data_copy(data, value);
789         type_pf_data_timeout_set(data, timeout);
790 #ifdef IP_SET_HASH_WITH_NETS
791         /* Resizing won't overwrite stored flags */
792         if (cadt_flags)
793                 type_pf_data_flags(data, cadt_flags);
794 #endif
795         return 0;
796 }
797
798 /* Delete expired elements from the hashtable */
799 static void
800 type_pf_expire(struct ip_set_hash *h, u8 nets_length)
801 {
802         struct htable *t = h->table;
803         struct hbucket *n;
804         struct type_pf_elem *data;
805         u32 i;
806         int j;
807
808         for (i = 0; i < jhash_size(t->htable_bits); i++) {
809                 n = hbucket(t, i);
810                 for (j = 0; j < n->pos; j++) {
811                         data = ahash_tdata(n, j);
812                         if (type_pf_data_expired(data)) {
813                                 pr_debug("expired %u/%u\n", i, j);
814 #ifdef IP_SET_HASH_WITH_NETS
815                                 del_cidr(h, CIDR(data->cidr), nets_length);
816 #endif
817                                 if (j != n->pos - 1)
818                                         /* Not last one */
819                                         type_pf_data_copy(data,
820                                                 ahash_tdata(n, n->pos - 1));
821                                 n->pos--;
822                                 h->elements--;
823                         }
824                 }
825                 if (n->pos + AHASH_INIT_SIZE < n->size) {
826                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
827                                             * sizeof(struct type_pf_telem),
828                                             GFP_ATOMIC);
829                         if (!tmp)
830                                 /* Still try to delete expired elements */
831                                 continue;
832                         n->size -= AHASH_INIT_SIZE;
833                         memcpy(tmp, n->value,
834                                n->size * sizeof(struct type_pf_telem));
835                         kfree(n->value);
836                         n->value = tmp;
837                 }
838         }
839 }
840
841 static int
842 type_pf_tresize(struct ip_set *set, bool retried)
843 {
844         struct ip_set_hash *h = set->data;
845         struct htable *t, *orig = h->table;
846         u8 htable_bits = orig->htable_bits;
847         struct type_pf_elem *data;
848         struct hbucket *n, *m;
849         u32 i, j, flags = 0;
850         int ret;
851
852         /* Try to cleanup once */
853         if (!retried) {
854                 i = h->elements;
855                 write_lock_bh(&set->lock);
856                 type_pf_expire(set->data, NETS_LENGTH(set->family));
857                 write_unlock_bh(&set->lock);
858                 if (h->elements <  i)
859                         return 0;
860         }
861
862 retry:
863         ret = 0;
864         htable_bits++;
865         pr_debug("attempt to resize set %s from %u to %u, t %p\n",
866                  set->name, orig->htable_bits, htable_bits, orig);
867         if (!htable_bits) {
868                 /* In case we have plenty of memory :-) */
869                 pr_warning("Cannot increase the hashsize of set %s further\n",
870                            set->name);
871                 return -IPSET_ERR_HASH_FULL;
872         }
873         t = ip_set_alloc(sizeof(*t)
874                          + jhash_size(htable_bits) * sizeof(struct hbucket));
875         if (!t)
876                 return -ENOMEM;
877         t->htable_bits = htable_bits;
878
879         read_lock_bh(&set->lock);
880         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
881                 n = hbucket(orig, i);
882                 for (j = 0; j < n->pos; j++) {
883                         data = ahash_tdata(n, j);
884 #ifdef IP_SET_HASH_WITH_NETS
885                         flags = 0;
886                         type_pf_data_reset_flags(data, &flags);
887 #endif
888                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
889                         ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), flags,
890                                 ip_set_timeout_get(type_pf_data_timeout(data)));
891                         if (ret < 0) {
892 #ifdef IP_SET_HASH_WITH_NETS
893                                 type_pf_data_flags(data, flags);
894 #endif
895                                 read_unlock_bh(&set->lock);
896                                 ahash_destroy(t);
897                                 if (ret == -EAGAIN)
898                                         goto retry;
899                                 return ret;
900                         }
901                 }
902         }
903
904         rcu_assign_pointer(h->table, t);
905         read_unlock_bh(&set->lock);
906
907         /* Give time to other readers of the set */
908         synchronize_rcu_bh();
909
910         ahash_destroy(orig);
911
912         return 0;
913 }
914
915 static int
916 type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
917 {
918         struct ip_set_hash *h = set->data;
919         struct htable *t = h->table;
920         const struct type_pf_elem *d = value;
921         struct hbucket *n;
922         struct type_pf_elem *data;
923         int ret = 0, i, j = AHASH_MAX(h) + 1;
924         bool flag_exist = flags & IPSET_FLAG_EXIST;
925         u32 key, multi = 0;
926         u32 cadt_flags = flags >> 16;
927
928         if (h->elements >= h->maxelem)
929                 /* FIXME: when set is full, we slow down here */
930                 type_pf_expire(h, NETS_LENGTH(set->family));
931         if (h->elements >= h->maxelem) {
932                 if (net_ratelimit())
933                         pr_warning("Set %s is full, maxelem %u reached\n",
934                                    set->name, h->maxelem);
935                 return -IPSET_ERR_HASH_FULL;
936         }
937
938         rcu_read_lock_bh();
939         t = rcu_dereference_bh(h->table);
940         key = HKEY(d, h->initval, t->htable_bits);
941         n = hbucket(t, key);
942         for (i = 0; i < n->pos; i++) {
943                 data = ahash_tdata(n, i);
944                 if (type_pf_data_equal(data, d, &multi)) {
945                         if (type_pf_data_expired(data) || flag_exist)
946                                 /* Just timeout value may be updated */
947                                 j = i;
948                         else {
949                                 ret = -IPSET_ERR_EXIST;
950                                 goto out;
951                         }
952                 } else if (j == AHASH_MAX(h) + 1 &&
953                            type_pf_data_expired(data))
954                         j = i;
955         }
956         if (j != AHASH_MAX(h) + 1) {
957                 data = ahash_tdata(n, j);
958 #ifdef IP_SET_HASH_WITH_NETS
959                 del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
960                 add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
961 #endif
962                 type_pf_data_copy(data, d);
963                 type_pf_data_timeout_set(data, timeout);
964 #ifdef IP_SET_HASH_WITH_NETS
965                 type_pf_data_flags(data, cadt_flags);
966 #endif
967                 goto out;
968         }
969         TUNE_AHASH_MAX(h, multi);
970         ret = type_pf_elem_tadd(n, d, AHASH_MAX(h), cadt_flags, timeout);
971         if (ret != 0) {
972                 if (ret == -EAGAIN)
973                         type_pf_data_next(h, d);
974                 goto out;
975         }
976
977 #ifdef IP_SET_HASH_WITH_NETS
978         add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
979 #endif
980         h->elements++;
981 out:
982         rcu_read_unlock_bh();
983         return ret;
984 }
985
986 static int
987 type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
988 {
989         struct ip_set_hash *h = set->data;
990         struct htable *t = h->table;
991         const struct type_pf_elem *d = value;
992         struct hbucket *n;
993         int i;
994         struct type_pf_elem *data;
995         u32 key, multi = 0;
996
997         key = HKEY(value, h->initval, t->htable_bits);
998         n = hbucket(t, key);
999         for (i = 0; i < n->pos; i++) {
1000                 data = ahash_tdata(n, i);
1001                 if (!type_pf_data_equal(data, d, &multi))
1002                         continue;
1003                 if (type_pf_data_expired(data))
1004                         return -IPSET_ERR_EXIST;
1005                 if (i != n->pos - 1)
1006                         /* Not last one */
1007                         type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
1008
1009                 n->pos--;
1010                 h->elements--;
1011 #ifdef IP_SET_HASH_WITH_NETS
1012                 del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
1013 #endif
1014                 if (n->pos + AHASH_INIT_SIZE < n->size) {
1015                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
1016                                             * sizeof(struct type_pf_telem),
1017                                             GFP_ATOMIC);
1018                         if (!tmp)
1019                                 return 0;
1020                         n->size -= AHASH_INIT_SIZE;
1021                         memcpy(tmp, n->value,
1022                                n->size * sizeof(struct type_pf_telem));
1023                         kfree(n->value);
1024                         n->value = tmp;
1025                 }
1026                 return 0;
1027         }
1028
1029         return -IPSET_ERR_EXIST;
1030 }
1031
1032 #ifdef IP_SET_HASH_WITH_NETS
1033 static int
1034 type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
1035 {
1036         struct ip_set_hash *h = set->data;
1037         struct htable *t = h->table;
1038         struct type_pf_elem *data;
1039         struct hbucket *n;
1040         int i, j = 0;
1041         u32 key, multi = 0;
1042         u8 nets_length = NETS_LENGTH(set->family);
1043
1044         for (; j < nets_length && h->nets[j].nets && !multi; j++) {
1045                 type_pf_data_netmask(d, h->nets[j].cidr);
1046                 key = HKEY(d, h->initval, t->htable_bits);
1047                 n = hbucket(t, key);
1048                 for (i = 0; i < n->pos; i++) {
1049                         data = ahash_tdata(n, i);
1050 #ifdef IP_SET_HASH_WITH_MULTI
1051                         if (type_pf_data_equal(data, d, &multi)) {
1052                                 if (!type_pf_data_expired(data))
1053                                         return type_pf_data_match(data);
1054                                 multi = 0;
1055                         }
1056 #else
1057                         if (type_pf_data_equal(data, d, &multi) &&
1058                             !type_pf_data_expired(data))
1059                                 return type_pf_data_match(data);
1060 #endif
1061                 }
1062         }
1063         return 0;
1064 }
1065 #endif
1066
1067 static int
1068 type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
1069 {
1070         struct ip_set_hash *h = set->data;
1071         struct htable *t = h->table;
1072         struct type_pf_elem *data, *d = value;
1073         struct hbucket *n;
1074         int i;
1075         u32 key, multi = 0;
1076
1077 #ifdef IP_SET_HASH_WITH_NETS
1078         if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
1079                 return type_pf_ttest_cidrs(set, d, timeout);
1080 #endif
1081         key = HKEY(d, h->initval, t->htable_bits);
1082         n = hbucket(t, key);
1083         for (i = 0; i < n->pos; i++) {
1084                 data = ahash_tdata(n, i);
1085                 if (type_pf_data_equal(data, d, &multi) &&
1086                     !type_pf_data_expired(data))
1087                         return type_pf_data_match(data);
1088         }
1089         return 0;
1090 }
1091
1092 static int
1093 type_pf_tlist(const struct ip_set *set,
1094               struct sk_buff *skb, struct netlink_callback *cb)
1095 {
1096         const struct ip_set_hash *h = set->data;
1097         const struct htable *t = h->table;
1098         struct nlattr *atd, *nested;
1099         const struct hbucket *n;
1100         const struct type_pf_elem *data;
1101         u32 first = cb->args[2];
1102         /* We assume that one hash bucket fills into one page */
1103         void *incomplete;
1104         int i;
1105
1106         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
1107         if (!atd)
1108                 return -EMSGSIZE;
1109         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
1110                 incomplete = skb_tail_pointer(skb);
1111                 n = hbucket(t, cb->args[2]);
1112                 for (i = 0; i < n->pos; i++) {
1113                         data = ahash_tdata(n, i);
1114                         pr_debug("list %p %u\n", n, i);
1115                         if (type_pf_data_expired(data))
1116                                 continue;
1117                         pr_debug("do list %p %u\n", n, i);
1118                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1119                         if (!nested) {
1120                                 if (cb->args[2] == first) {
1121                                         nla_nest_cancel(skb, atd);
1122                                         return -EMSGSIZE;
1123                                 } else
1124                                         goto nla_put_failure;
1125                         }
1126                         if (type_pf_data_tlist(skb, data))
1127                                 goto nla_put_failure;
1128                         ipset_nest_end(skb, nested);
1129                 }
1130         }
1131         ipset_nest_end(skb, atd);
1132         /* Set listing finished */
1133         cb->args[2] = 0;
1134
1135         return 0;
1136
1137 nla_put_failure:
1138         nlmsg_trim(skb, incomplete);
1139         ipset_nest_end(skb, atd);
1140         if (unlikely(first == cb->args[2])) {
1141                 pr_warning("Can't list set %s: one bucket does not fit into "
1142                            "a message. Please report it!\n", set->name);
1143                 cb->args[2] = 0;
1144                 return -EMSGSIZE;
1145         }
1146         return 0;
1147 }
1148
1149 static const struct ip_set_type_variant type_pf_tvariant = {
1150         .kadt   = type_pf_kadt,
1151         .uadt   = type_pf_uadt,
1152         .adt    = {
1153                 [IPSET_ADD] = type_pf_tadd,
1154                 [IPSET_DEL] = type_pf_tdel,
1155                 [IPSET_TEST] = type_pf_ttest,
1156         },
1157         .destroy = type_pf_destroy,
1158         .flush  = type_pf_flush,
1159         .head   = type_pf_head,
1160         .list   = type_pf_tlist,
1161         .resize = type_pf_tresize,
1162         .same_set = type_pf_same_set,
1163 };
1164
1165 static void
1166 type_pf_gc(unsigned long ul_set)
1167 {
1168         struct ip_set *set = (struct ip_set *) ul_set;
1169         struct ip_set_hash *h = set->data;
1170
1171         pr_debug("called\n");
1172         write_lock_bh(&set->lock);
1173         type_pf_expire(h, NETS_LENGTH(set->family));
1174         write_unlock_bh(&set->lock);
1175
1176         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1177         add_timer(&h->gc);
1178 }
1179
1180 static void
1181 type_pf_gc_init(struct ip_set *set)
1182 {
1183         struct ip_set_hash *h = set->data;
1184
1185         init_timer(&h->gc);
1186         h->gc.data = (unsigned long) set;
1187         h->gc.function = type_pf_gc;
1188         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1189         add_timer(&h->gc);
1190         pr_debug("gc initialized, run in every %u\n",
1191                  IPSET_GC_PERIOD(h->timeout));
1192 }
1193
1194 #undef HKEY_DATALEN
1195 #undef HKEY
1196 #undef type_pf_data_equal
1197 #undef type_pf_data_isnull
1198 #undef type_pf_data_copy
1199 #undef type_pf_data_zero_out
1200 #undef type_pf_data_netmask
1201 #undef type_pf_data_list
1202 #undef type_pf_data_tlist
1203 #undef type_pf_data_next
1204 #undef type_pf_data_flags
1205 #undef type_pf_data_reset_flags
1206 #undef type_pf_data_match
1207
1208 #undef type_pf_elem
1209 #undef type_pf_telem
1210 #undef type_pf_data_timeout
1211 #undef type_pf_data_expired
1212 #undef type_pf_data_timeout_set
1213
1214 #undef type_pf_elem_add
1215 #undef type_pf_add
1216 #undef type_pf_del
1217 #undef type_pf_test_cidrs
1218 #undef type_pf_test
1219
1220 #undef type_pf_elem_tadd
1221 #undef type_pf_del_telem
1222 #undef type_pf_expire
1223 #undef type_pf_tadd
1224 #undef type_pf_tdel
1225 #undef type_pf_ttest_cidrs
1226 #undef type_pf_ttest
1227
1228 #undef type_pf_resize
1229 #undef type_pf_tresize
1230 #undef type_pf_flush
1231 #undef type_pf_destroy
1232 #undef type_pf_head
1233 #undef type_pf_list
1234 #undef type_pf_tlist
1235 #undef type_pf_same_set
1236 #undef type_pf_kadt
1237 #undef type_pf_uadt
1238 #undef type_pf_gc
1239 #undef type_pf_gc_init
1240 #undef type_pf_variant
1241 #undef type_pf_tvariant