]> Pileus Git - ~andy/linux/blob - net/netfilter/nf_conntrack_expect.c
3921e5bc12350e45b70c48e7c7f9e766e6ca3e4d
[~andy/linux] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <linux/moduleparam.h>
24 #include <linux/export.h>
25 #include <net/net_namespace.h>
26
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_core.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_tuple.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
33
34 unsigned int nf_ct_expect_hsize __read_mostly;
35 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37 unsigned int nf_ct_expect_max __read_mostly;
38
39 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40
41 /* nf_conntrack_expect helper functions */
42 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
43                                 u32 pid, int report)
44 {
45         struct nf_conn_help *master_help = nfct_help(exp->master);
46         struct net *net = nf_ct_exp_net(exp);
47
48         NF_CT_ASSERT(master_help);
49         NF_CT_ASSERT(!timer_pending(&exp->timeout));
50
51         hlist_del_rcu(&exp->hnode);
52         net->ct.expect_count--;
53
54         hlist_del(&exp->lnode);
55         master_help->expecting[exp->class]--;
56
57         nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
58         nf_ct_expect_put(exp);
59
60         NF_CT_STAT_INC(net, expect_delete);
61 }
62 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
63
64 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65 {
66         struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68         spin_lock_bh(&nf_conntrack_lock);
69         nf_ct_unlink_expect(exp);
70         spin_unlock_bh(&nf_conntrack_lock);
71         nf_ct_expect_put(exp);
72 }
73
74 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75 {
76         unsigned int hash;
77
78         if (unlikely(!nf_conntrack_hash_rnd)) {
79                 init_nf_conntrack_hash_rnd();
80         }
81
82         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
83                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
84                        (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
85         return ((u64)hash * nf_ct_expect_hsize) >> 32;
86 }
87
88 struct nf_conntrack_expect *
89 __nf_ct_expect_find(struct net *net, u16 zone,
90                     const struct nf_conntrack_tuple *tuple)
91 {
92         struct nf_conntrack_expect *i;
93         struct hlist_node *n;
94         unsigned int h;
95
96         if (!net->ct.expect_count)
97                 return NULL;
98
99         h = nf_ct_expect_dst_hash(tuple);
100         hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
101                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102                     nf_ct_zone(i->master) == zone)
103                         return i;
104         }
105         return NULL;
106 }
107 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
108
109 /* Just find a expectation corresponding to a tuple. */
110 struct nf_conntrack_expect *
111 nf_ct_expect_find_get(struct net *net, u16 zone,
112                       const struct nf_conntrack_tuple *tuple)
113 {
114         struct nf_conntrack_expect *i;
115
116         rcu_read_lock();
117         i = __nf_ct_expect_find(net, zone, tuple);
118         if (i && !atomic_inc_not_zero(&i->use))
119                 i = NULL;
120         rcu_read_unlock();
121
122         return i;
123 }
124 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
125
126 /* If an expectation for this connection is found, it gets delete from
127  * global list then returned. */
128 struct nf_conntrack_expect *
129 nf_ct_find_expectation(struct net *net, u16 zone,
130                        const struct nf_conntrack_tuple *tuple)
131 {
132         struct nf_conntrack_expect *i, *exp = NULL;
133         struct hlist_node *n;
134         unsigned int h;
135
136         if (!net->ct.expect_count)
137                 return NULL;
138
139         h = nf_ct_expect_dst_hash(tuple);
140         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
141                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
142                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
143                     nf_ct_zone(i->master) == zone) {
144                         exp = i;
145                         break;
146                 }
147         }
148         if (!exp)
149                 return NULL;
150
151         /* If master is not in hash table yet (ie. packet hasn't left
152            this machine yet), how can other end know about expected?
153            Hence these are not the droids you are looking for (if
154            master ct never got confirmed, we'd hold a reference to it
155            and weird things would happen to future packets). */
156         if (!nf_ct_is_confirmed(exp->master))
157                 return NULL;
158
159         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
160                 atomic_inc(&exp->use);
161                 return exp;
162         } else if (del_timer(&exp->timeout)) {
163                 nf_ct_unlink_expect(exp);
164                 return exp;
165         }
166
167         return NULL;
168 }
169
170 /* delete all expectations for this conntrack */
171 void nf_ct_remove_expectations(struct nf_conn *ct)
172 {
173         struct nf_conn_help *help = nfct_help(ct);
174         struct nf_conntrack_expect *exp;
175         struct hlist_node *n, *next;
176
177         /* Optimization: most connection never expect any others. */
178         if (!help)
179                 return;
180
181         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
182                 if (del_timer(&exp->timeout)) {
183                         nf_ct_unlink_expect(exp);
184                         nf_ct_expect_put(exp);
185                 }
186         }
187 }
188 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
189
190 /* Would two expected things clash? */
191 static inline int expect_clash(const struct nf_conntrack_expect *a,
192                                const struct nf_conntrack_expect *b)
193 {
194         /* Part covered by intersection of masks must be unequal,
195            otherwise they clash */
196         struct nf_conntrack_tuple_mask intersect_mask;
197         int count;
198
199         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
200
201         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
202                 intersect_mask.src.u3.all[count] =
203                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
204         }
205
206         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
207 }
208
209 static inline int expect_matches(const struct nf_conntrack_expect *a,
210                                  const struct nf_conntrack_expect *b)
211 {
212         return a->master == b->master && a->class == b->class &&
213                 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
214                 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
215                 nf_ct_zone(a->master) == nf_ct_zone(b->master);
216 }
217
218 /* Generally a bad idea to call this: could have matched already. */
219 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
220 {
221         spin_lock_bh(&nf_conntrack_lock);
222         if (del_timer(&exp->timeout)) {
223                 nf_ct_unlink_expect(exp);
224                 nf_ct_expect_put(exp);
225         }
226         spin_unlock_bh(&nf_conntrack_lock);
227 }
228 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
229
230 /* We don't increase the master conntrack refcount for non-fulfilled
231  * conntracks. During the conntrack destruction, the expectations are
232  * always killed before the conntrack itself */
233 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
234 {
235         struct nf_conntrack_expect *new;
236
237         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
238         if (!new)
239                 return NULL;
240
241         new->master = me;
242         atomic_set(&new->use, 1);
243         return new;
244 }
245 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
246
247 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
248                        u_int8_t family,
249                        const union nf_inet_addr *saddr,
250                        const union nf_inet_addr *daddr,
251                        u_int8_t proto, const __be16 *src, const __be16 *dst)
252 {
253         int len;
254
255         if (family == AF_INET)
256                 len = 4;
257         else
258                 len = 16;
259
260         exp->flags = 0;
261         exp->class = class;
262         exp->expectfn = NULL;
263         exp->helper = NULL;
264         exp->tuple.src.l3num = family;
265         exp->tuple.dst.protonum = proto;
266
267         if (saddr) {
268                 memcpy(&exp->tuple.src.u3, saddr, len);
269                 if (sizeof(exp->tuple.src.u3) > len)
270                         /* address needs to be cleared for nf_ct_tuple_equal */
271                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
272                                sizeof(exp->tuple.src.u3) - len);
273                 memset(&exp->mask.src.u3, 0xFF, len);
274                 if (sizeof(exp->mask.src.u3) > len)
275                         memset((void *)&exp->mask.src.u3 + len, 0x00,
276                                sizeof(exp->mask.src.u3) - len);
277         } else {
278                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
279                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
280         }
281
282         if (src) {
283                 exp->tuple.src.u.all = *src;
284                 exp->mask.src.u.all = htons(0xFFFF);
285         } else {
286                 exp->tuple.src.u.all = 0;
287                 exp->mask.src.u.all = 0;
288         }
289
290         memcpy(&exp->tuple.dst.u3, daddr, len);
291         if (sizeof(exp->tuple.dst.u3) > len)
292                 /* address needs to be cleared for nf_ct_tuple_equal */
293                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
294                        sizeof(exp->tuple.dst.u3) - len);
295
296         exp->tuple.dst.u.all = *dst;
297 }
298 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
299
300 static void nf_ct_expect_free_rcu(struct rcu_head *head)
301 {
302         struct nf_conntrack_expect *exp;
303
304         exp = container_of(head, struct nf_conntrack_expect, rcu);
305         kmem_cache_free(nf_ct_expect_cachep, exp);
306 }
307
308 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
309 {
310         if (atomic_dec_and_test(&exp->use))
311                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
312 }
313 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
314
315 static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
316 {
317         struct nf_conn_help *master_help = nfct_help(exp->master);
318         struct nf_conntrack_helper *helper;
319         struct net *net = nf_ct_exp_net(exp);
320         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321
322         /* two references : one for hash insert, one for the timer */
323         atomic_add(2, &exp->use);
324
325         hlist_add_head(&exp->lnode, &master_help->expectations);
326         master_help->expecting[exp->class]++;
327
328         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
329         net->ct.expect_count++;
330
331         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
332                     (unsigned long)exp);
333         helper = rcu_dereference_protected(master_help->helper,
334                                            lockdep_is_held(&nf_conntrack_lock));
335         if (helper) {
336                 exp->timeout.expires = jiffies +
337                         helper->expect_policy[exp->class].timeout * HZ;
338         }
339         add_timer(&exp->timeout);
340
341         NF_CT_STAT_INC(net, expect_create);
342         return 0;
343 }
344
345 /* Race with expectations being used means we could have none to find; OK. */
346 static void evict_oldest_expect(struct nf_conn *master,
347                                 struct nf_conntrack_expect *new)
348 {
349         struct nf_conn_help *master_help = nfct_help(master);
350         struct nf_conntrack_expect *exp, *last = NULL;
351         struct hlist_node *n;
352
353         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
354                 if (exp->class == new->class)
355                         last = exp;
356         }
357
358         if (last && del_timer(&last->timeout)) {
359                 nf_ct_unlink_expect(last);
360                 nf_ct_expect_put(last);
361         }
362 }
363
364 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
365 {
366         const struct nf_conntrack_expect_policy *p;
367         struct nf_conntrack_expect *i;
368         struct nf_conn *master = expect->master;
369         struct nf_conn_help *master_help = nfct_help(master);
370         struct nf_conntrack_helper *helper;
371         struct net *net = nf_ct_exp_net(expect);
372         struct hlist_node *n, *next;
373         unsigned int h;
374         int ret = 1;
375
376         if (!master_help) {
377                 ret = -ESHUTDOWN;
378                 goto out;
379         }
380         h = nf_ct_expect_dst_hash(&expect->tuple);
381         hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
382                 if (expect_matches(i, expect)) {
383                         if (del_timer(&i->timeout)) {
384                                 nf_ct_unlink_expect(i);
385                                 nf_ct_expect_put(i);
386                                 break;
387                         }
388                 } else if (expect_clash(i, expect)) {
389                         ret = -EBUSY;
390                         goto out;
391                 }
392         }
393         /* Will be over limit? */
394         helper = rcu_dereference_protected(master_help->helper,
395                                            lockdep_is_held(&nf_conntrack_lock));
396         if (helper) {
397                 p = &helper->expect_policy[expect->class];
398                 if (p->max_expected &&
399                     master_help->expecting[expect->class] >= p->max_expected) {
400                         evict_oldest_expect(master, expect);
401                         if (master_help->expecting[expect->class]
402                                                 >= p->max_expected) {
403                                 ret = -EMFILE;
404                                 goto out;
405                         }
406                 }
407         }
408
409         if (net->ct.expect_count >= nf_ct_expect_max) {
410                 net_warn_ratelimited("nf_conntrack: expectation table full\n");
411                 ret = -EMFILE;
412         }
413 out:
414         return ret;
415 }
416
417 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
418                                 u32 pid, int report)
419 {
420         int ret;
421
422         spin_lock_bh(&nf_conntrack_lock);
423         ret = __nf_ct_expect_check(expect);
424         if (ret <= 0)
425                 goto out;
426
427         ret = nf_ct_expect_insert(expect);
428         if (ret < 0)
429                 goto out;
430         spin_unlock_bh(&nf_conntrack_lock);
431         nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
432         return ret;
433 out:
434         spin_unlock_bh(&nf_conntrack_lock);
435         return ret;
436 }
437 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
438
439 #ifdef CONFIG_NF_CONNTRACK_PROCFS
440 struct ct_expect_iter_state {
441         struct seq_net_private p;
442         unsigned int bucket;
443 };
444
445 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
446 {
447         struct net *net = seq_file_net(seq);
448         struct ct_expect_iter_state *st = seq->private;
449         struct hlist_node *n;
450
451         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
452                 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
453                 if (n)
454                         return n;
455         }
456         return NULL;
457 }
458
459 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
460                                              struct hlist_node *head)
461 {
462         struct net *net = seq_file_net(seq);
463         struct ct_expect_iter_state *st = seq->private;
464
465         head = rcu_dereference(hlist_next_rcu(head));
466         while (head == NULL) {
467                 if (++st->bucket >= nf_ct_expect_hsize)
468                         return NULL;
469                 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
470         }
471         return head;
472 }
473
474 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
475 {
476         struct hlist_node *head = ct_expect_get_first(seq);
477
478         if (head)
479                 while (pos && (head = ct_expect_get_next(seq, head)))
480                         pos--;
481         return pos ? NULL : head;
482 }
483
484 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
485         __acquires(RCU)
486 {
487         rcu_read_lock();
488         return ct_expect_get_idx(seq, *pos);
489 }
490
491 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
492 {
493         (*pos)++;
494         return ct_expect_get_next(seq, v);
495 }
496
497 static void exp_seq_stop(struct seq_file *seq, void *v)
498         __releases(RCU)
499 {
500         rcu_read_unlock();
501 }
502
503 static int exp_seq_show(struct seq_file *s, void *v)
504 {
505         struct nf_conntrack_expect *expect;
506         struct nf_conntrack_helper *helper;
507         struct hlist_node *n = v;
508         char *delim = "";
509
510         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
511
512         if (expect->timeout.function)
513                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
514                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
515         else
516                 seq_printf(s, "- ");
517         seq_printf(s, "l3proto = %u proto=%u ",
518                    expect->tuple.src.l3num,
519                    expect->tuple.dst.protonum);
520         print_tuple(s, &expect->tuple,
521                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
522                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
523                                        expect->tuple.dst.protonum));
524
525         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
526                 seq_printf(s, "PERMANENT");
527                 delim = ",";
528         }
529         if (expect->flags & NF_CT_EXPECT_INACTIVE) {
530                 seq_printf(s, "%sINACTIVE", delim);
531                 delim = ",";
532         }
533         if (expect->flags & NF_CT_EXPECT_USERSPACE)
534                 seq_printf(s, "%sUSERSPACE", delim);
535
536         helper = rcu_dereference(nfct_help(expect->master)->helper);
537         if (helper) {
538                 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
539                 if (helper->expect_policy[expect->class].name)
540                         seq_printf(s, "/%s",
541                                    helper->expect_policy[expect->class].name);
542         }
543
544         return seq_putc(s, '\n');
545 }
546
547 static const struct seq_operations exp_seq_ops = {
548         .start = exp_seq_start,
549         .next = exp_seq_next,
550         .stop = exp_seq_stop,
551         .show = exp_seq_show
552 };
553
554 static int exp_open(struct inode *inode, struct file *file)
555 {
556         return seq_open_net(inode, file, &exp_seq_ops,
557                         sizeof(struct ct_expect_iter_state));
558 }
559
560 static const struct file_operations exp_file_ops = {
561         .owner   = THIS_MODULE,
562         .open    = exp_open,
563         .read    = seq_read,
564         .llseek  = seq_lseek,
565         .release = seq_release_net,
566 };
567 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
568
569 static int exp_proc_init(struct net *net)
570 {
571 #ifdef CONFIG_NF_CONNTRACK_PROCFS
572         struct proc_dir_entry *proc;
573
574         proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
575                            &exp_file_ops);
576         if (!proc)
577                 return -ENOMEM;
578 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
579         return 0;
580 }
581
582 static void exp_proc_remove(struct net *net)
583 {
584 #ifdef CONFIG_NF_CONNTRACK_PROCFS
585         remove_proc_entry("nf_conntrack_expect", net->proc_net);
586 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
587 }
588
589 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
590
591 int nf_conntrack_expect_pernet_init(struct net *net)
592 {
593         int err = -ENOMEM;
594
595         net->ct.expect_count = 0;
596         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
597         if (net->ct.expect_hash == NULL)
598                 goto err1;
599
600         err = exp_proc_init(net);
601         if (err < 0)
602                 goto err2;
603
604         return 0;
605 err2:
606         nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
607 err1:
608         return err;
609 }
610
611 void nf_conntrack_expect_pernet_fini(struct net *net)
612 {
613         exp_proc_remove(net);
614         nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
615 }
616
617 int nf_conntrack_expect_init(void)
618 {
619         if (!nf_ct_expect_hsize) {
620                 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
621                 if (!nf_ct_expect_hsize)
622                         nf_ct_expect_hsize = 1;
623         }
624         nf_ct_expect_max = nf_ct_expect_hsize * 4;
625         nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
626                                 sizeof(struct nf_conntrack_expect),
627                                 0, 0, NULL);
628         if (!nf_ct_expect_cachep)
629                 return -ENOMEM;
630         return 0;
631 }
632
633 void nf_conntrack_expect_fini(void)
634 {
635         rcu_barrier(); /* Wait for call_rcu() before destroy */
636         kmem_cache_destroy(nf_ct_expect_cachep);
637 }