]> Pileus Git - ~andy/linux/blob - net/xfrm/xfrm_state.c
[XFRM]: Dynamic xfrm_state hash table sizing.
[~andy/linux] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by daddr to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56
57 static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
58 {
59         unsigned int h;
60         h = ntohl(addr->a4);
61         h = (h ^ (h>>16)) & hmask;
62         return h;
63 }
64
65 static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
66 {
67         unsigned int h;
68         h = ntohl(addr->a6[2]^addr->a6[3]);
69         h = (h ^ (h>>16)) & hmask;
70         return h;
71 }
72
73 static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
74 {
75         return __xfrm4_dst_hash(addr, hmask);
76 }
77
78 static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
79 {
80         return __xfrm6_dst_hash(addr, hmask);
81 }
82
83 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,  unsigned int hmask)
84 {
85         switch (family) {
86         case AF_INET:
87                 return __xfrm4_src_hash(addr, hmask);
88         case AF_INET6:
89                 return __xfrm6_src_hash(addr, hmask);
90         }
91         return 0;
92 }
93
94 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
95 {
96         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
97 }
98
99 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
100 {
101         switch (family) {
102         case AF_INET:
103                 return __xfrm4_dst_hash(addr, hmask);
104         case AF_INET6:
105                 return __xfrm6_dst_hash(addr, hmask);
106         }
107         return 0;
108 }
109
110 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
111 {
112         return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
113 }
114
115 static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
116                                         unsigned int hmask)
117 {
118         unsigned int h;
119         h = ntohl(addr->a4^spi^proto);
120         h = (h ^ (h>>10) ^ (h>>20)) & hmask;
121         return h;
122 }
123
124 static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
125                                             unsigned int hmask)
126 {
127         unsigned int h;
128         h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
129         h = (h ^ (h>>10) ^ (h>>20)) & hmask;
130         return h;
131 }
132
133 static inline
134 unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
135                          unsigned int hmask)
136 {
137         switch (family) {
138         case AF_INET:
139                 return __xfrm4_spi_hash(addr, spi, proto, hmask);
140         case AF_INET6:
141                 return __xfrm6_spi_hash(addr, spi, proto, hmask);
142         }
143         return 0;       /*XXX*/
144 }
145
146 static inline unsigned int
147 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
148 {
149         return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
150 }
151
152 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
153 {
154         struct hlist_head *n;
155
156         if (sz <= PAGE_SIZE)
157                 n = kmalloc(sz, GFP_KERNEL);
158         else if (hashdist)
159                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
160         else
161                 n = (struct hlist_head *)
162                         __get_free_pages(GFP_KERNEL, get_order(sz));
163
164         if (n)
165                 memset(n, 0, sz);
166
167         return n;
168 }
169
170 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
171 {
172         if (sz <= PAGE_SIZE)
173                 kfree(n);
174         else if (hashdist)
175                 vfree(n);
176         else
177                 free_pages((unsigned long)n, get_order(sz));
178 }
179
180 static void xfrm_hash_transfer(struct hlist_head *list,
181                                struct hlist_head *ndsttable,
182                                struct hlist_head *nsrctable,
183                                struct hlist_head *nspitable,
184                                unsigned int nhashmask)
185 {
186         struct hlist_node *entry, *tmp;
187         struct xfrm_state *x;
188
189         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
190                 unsigned int h;
191
192                 h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
193                 hlist_add_head(&x->bydst, ndsttable+h);
194
195                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
196                                     nhashmask);
197                 hlist_add_head(&x->bysrc, nsrctable+h);
198
199                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
200                                     x->props.family, nhashmask);
201                 hlist_add_head(&x->byspi, nspitable+h);
202         }
203 }
204
205 static unsigned long xfrm_hash_new_size(void)
206 {
207         return ((xfrm_state_hmask + 1) << 1) *
208                 sizeof(struct hlist_head);
209 }
210
211 static DEFINE_MUTEX(hash_resize_mutex);
212
213 static void xfrm_hash_resize(void *__unused)
214 {
215         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
216         unsigned long nsize, osize;
217         unsigned int nhashmask, ohashmask;
218         int i;
219
220         mutex_lock(&hash_resize_mutex);
221
222         nsize = xfrm_hash_new_size();
223         ndst = xfrm_state_hash_alloc(nsize);
224         if (!ndst)
225                 goto out_unlock;
226         nsrc = xfrm_state_hash_alloc(nsize);
227         if (!nsrc) {
228                 xfrm_state_hash_free(ndst, nsize);
229                 goto out_unlock;
230         }
231         nspi = xfrm_state_hash_alloc(nsize);
232         if (!nspi) {
233                 xfrm_state_hash_free(ndst, nsize);
234                 xfrm_state_hash_free(nsrc, nsize);
235                 goto out_unlock;
236         }
237
238         spin_lock_bh(&xfrm_state_lock);
239
240         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
241         for (i = xfrm_state_hmask; i >= 0; i--)
242                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
243                                    nhashmask);
244
245         odst = xfrm_state_bydst;
246         osrc = xfrm_state_bysrc;
247         ospi = xfrm_state_byspi;
248         ohashmask = xfrm_state_hmask;
249
250         xfrm_state_bydst = ndst;
251         xfrm_state_bysrc = nsrc;
252         xfrm_state_byspi = nspi;
253         xfrm_state_hmask = nhashmask;
254
255         spin_unlock_bh(&xfrm_state_lock);
256
257         osize = (ohashmask + 1) * sizeof(struct hlist_head);
258         xfrm_state_hash_free(odst, osize);
259         xfrm_state_hash_free(osrc, osize);
260         xfrm_state_hash_free(ospi, osize);
261
262 out_unlock:
263         mutex_unlock(&hash_resize_mutex);
264 }
265
266 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
267
268 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
269 EXPORT_SYMBOL(km_waitq);
270
271 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
272 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
273
274 static struct work_struct xfrm_state_gc_work;
275 static HLIST_HEAD(xfrm_state_gc_list);
276 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
277
278 static int xfrm_state_gc_flush_bundles;
279
280 int __xfrm_state_delete(struct xfrm_state *x);
281
282 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
283 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
284
285 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
286 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
287
288 static void xfrm_state_gc_destroy(struct xfrm_state *x)
289 {
290         if (del_timer(&x->timer))
291                 BUG();
292         if (del_timer(&x->rtimer))
293                 BUG();
294         kfree(x->aalg);
295         kfree(x->ealg);
296         kfree(x->calg);
297         kfree(x->encap);
298         kfree(x->coaddr);
299         if (x->mode)
300                 xfrm_put_mode(x->mode);
301         if (x->type) {
302                 x->type->destructor(x);
303                 xfrm_put_type(x->type);
304         }
305         security_xfrm_state_free(x);
306         kfree(x);
307 }
308
309 static void xfrm_state_gc_task(void *data)
310 {
311         struct xfrm_state *x;
312         struct hlist_node *entry, *tmp;
313         struct hlist_head gc_list;
314
315         if (xfrm_state_gc_flush_bundles) {
316                 xfrm_state_gc_flush_bundles = 0;
317                 xfrm_flush_bundles();
318         }
319
320         spin_lock_bh(&xfrm_state_gc_lock);
321         gc_list.first = xfrm_state_gc_list.first;
322         INIT_HLIST_HEAD(&xfrm_state_gc_list);
323         spin_unlock_bh(&xfrm_state_gc_lock);
324
325         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
326                 xfrm_state_gc_destroy(x);
327
328         wake_up(&km_waitq);
329 }
330
331 static inline unsigned long make_jiffies(long secs)
332 {
333         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
334                 return MAX_SCHEDULE_TIMEOUT-1;
335         else
336                 return secs*HZ;
337 }
338
339 static void xfrm_timer_handler(unsigned long data)
340 {
341         struct xfrm_state *x = (struct xfrm_state*)data;
342         unsigned long now = (unsigned long)xtime.tv_sec;
343         long next = LONG_MAX;
344         int warn = 0;
345
346         spin_lock(&x->lock);
347         if (x->km.state == XFRM_STATE_DEAD)
348                 goto out;
349         if (x->km.state == XFRM_STATE_EXPIRED)
350                 goto expired;
351         if (x->lft.hard_add_expires_seconds) {
352                 long tmo = x->lft.hard_add_expires_seconds +
353                         x->curlft.add_time - now;
354                 if (tmo <= 0)
355                         goto expired;
356                 if (tmo < next)
357                         next = tmo;
358         }
359         if (x->lft.hard_use_expires_seconds) {
360                 long tmo = x->lft.hard_use_expires_seconds +
361                         (x->curlft.use_time ? : now) - now;
362                 if (tmo <= 0)
363                         goto expired;
364                 if (tmo < next)
365                         next = tmo;
366         }
367         if (x->km.dying)
368                 goto resched;
369         if (x->lft.soft_add_expires_seconds) {
370                 long tmo = x->lft.soft_add_expires_seconds +
371                         x->curlft.add_time - now;
372                 if (tmo <= 0)
373                         warn = 1;
374                 else if (tmo < next)
375                         next = tmo;
376         }
377         if (x->lft.soft_use_expires_seconds) {
378                 long tmo = x->lft.soft_use_expires_seconds +
379                         (x->curlft.use_time ? : now) - now;
380                 if (tmo <= 0)
381                         warn = 1;
382                 else if (tmo < next)
383                         next = tmo;
384         }
385
386         x->km.dying = warn;
387         if (warn)
388                 km_state_expired(x, 0, 0);
389 resched:
390         if (next != LONG_MAX &&
391             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
392                 xfrm_state_hold(x);
393         goto out;
394
395 expired:
396         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
397                 x->km.state = XFRM_STATE_EXPIRED;
398                 wake_up(&km_waitq);
399                 next = 2;
400                 goto resched;
401         }
402         if (!__xfrm_state_delete(x) && x->id.spi)
403                 km_state_expired(x, 1, 0);
404
405 out:
406         spin_unlock(&x->lock);
407         xfrm_state_put(x);
408 }
409
410 static void xfrm_replay_timer_handler(unsigned long data);
411
412 struct xfrm_state *xfrm_state_alloc(void)
413 {
414         struct xfrm_state *x;
415
416         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
417
418         if (x) {
419                 atomic_set(&x->refcnt, 1);
420                 atomic_set(&x->tunnel_users, 0);
421                 INIT_HLIST_NODE(&x->bydst);
422                 INIT_HLIST_NODE(&x->bysrc);
423                 INIT_HLIST_NODE(&x->byspi);
424                 init_timer(&x->timer);
425                 x->timer.function = xfrm_timer_handler;
426                 x->timer.data     = (unsigned long)x;
427                 init_timer(&x->rtimer);
428                 x->rtimer.function = xfrm_replay_timer_handler;
429                 x->rtimer.data     = (unsigned long)x;
430                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
431                 x->lft.soft_byte_limit = XFRM_INF;
432                 x->lft.soft_packet_limit = XFRM_INF;
433                 x->lft.hard_byte_limit = XFRM_INF;
434                 x->lft.hard_packet_limit = XFRM_INF;
435                 x->replay_maxage = 0;
436                 x->replay_maxdiff = 0;
437                 spin_lock_init(&x->lock);
438         }
439         return x;
440 }
441 EXPORT_SYMBOL(xfrm_state_alloc);
442
443 void __xfrm_state_destroy(struct xfrm_state *x)
444 {
445         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
446
447         spin_lock_bh(&xfrm_state_gc_lock);
448         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
449         spin_unlock_bh(&xfrm_state_gc_lock);
450         schedule_work(&xfrm_state_gc_work);
451 }
452 EXPORT_SYMBOL(__xfrm_state_destroy);
453
454 int __xfrm_state_delete(struct xfrm_state *x)
455 {
456         int err = -ESRCH;
457
458         if (x->km.state != XFRM_STATE_DEAD) {
459                 x->km.state = XFRM_STATE_DEAD;
460                 spin_lock(&xfrm_state_lock);
461                 hlist_del(&x->bydst);
462                 __xfrm_state_put(x);
463                 hlist_del(&x->bysrc);
464                 __xfrm_state_put(x);
465                 if (x->id.spi) {
466                         hlist_del(&x->byspi);
467                         __xfrm_state_put(x);
468                 }
469                 xfrm_state_num--;
470                 spin_unlock(&xfrm_state_lock);
471                 if (del_timer(&x->timer))
472                         __xfrm_state_put(x);
473                 if (del_timer(&x->rtimer))
474                         __xfrm_state_put(x);
475
476                 /* The number two in this test is the reference
477                  * mentioned in the comment below plus the reference
478                  * our caller holds.  A larger value means that
479                  * there are DSTs attached to this xfrm_state.
480                  */
481                 if (atomic_read(&x->refcnt) > 2) {
482                         xfrm_state_gc_flush_bundles = 1;
483                         schedule_work(&xfrm_state_gc_work);
484                 }
485
486                 /* All xfrm_state objects are created by xfrm_state_alloc.
487                  * The xfrm_state_alloc call gives a reference, and that
488                  * is what we are dropping here.
489                  */
490                 __xfrm_state_put(x);
491                 err = 0;
492         }
493
494         return err;
495 }
496 EXPORT_SYMBOL(__xfrm_state_delete);
497
498 int xfrm_state_delete(struct xfrm_state *x)
499 {
500         int err;
501
502         spin_lock_bh(&x->lock);
503         err = __xfrm_state_delete(x);
504         spin_unlock_bh(&x->lock);
505
506         return err;
507 }
508 EXPORT_SYMBOL(xfrm_state_delete);
509
510 void xfrm_state_flush(u8 proto)
511 {
512         int i;
513
514         spin_lock_bh(&xfrm_state_lock);
515         for (i = 0; i < xfrm_state_hmask; i++) {
516                 struct hlist_node *entry;
517                 struct xfrm_state *x;
518 restart:
519                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
520                         if (!xfrm_state_kern(x) &&
521                             xfrm_id_proto_match(x->id.proto, proto)) {
522                                 xfrm_state_hold(x);
523                                 spin_unlock_bh(&xfrm_state_lock);
524
525                                 xfrm_state_delete(x);
526                                 xfrm_state_put(x);
527
528                                 spin_lock_bh(&xfrm_state_lock);
529                                 goto restart;
530                         }
531                 }
532         }
533         spin_unlock_bh(&xfrm_state_lock);
534         wake_up(&km_waitq);
535 }
536 EXPORT_SYMBOL(xfrm_state_flush);
537
538 static int
539 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
540                   struct xfrm_tmpl *tmpl,
541                   xfrm_address_t *daddr, xfrm_address_t *saddr,
542                   unsigned short family)
543 {
544         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
545         if (!afinfo)
546                 return -1;
547         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
548         xfrm_state_put_afinfo(afinfo);
549         return 0;
550 }
551
552 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
553 {
554         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
555         struct xfrm_state *x;
556         struct hlist_node *entry;
557
558         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
559                 if (x->props.family != family ||
560                     x->id.spi       != spi ||
561                     x->id.proto     != proto)
562                         continue;
563
564                 switch (family) {
565                 case AF_INET:
566                         if (x->id.daddr.a4 != daddr->a4)
567                                 continue;
568                         break;
569                 case AF_INET6:
570                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
571                                              (struct in6_addr *)
572                                              x->id.daddr.a6))
573                                 continue;
574                         break;
575                 };
576
577                 xfrm_state_hold(x);
578                 return x;
579         }
580
581         return NULL;
582 }
583
584 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
585 {
586         unsigned int h = xfrm_src_hash(saddr, family);
587         struct xfrm_state *x;
588         struct hlist_node *entry;
589
590         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
591                 if (x->props.family != family ||
592                     x->id.proto     != proto)
593                         continue;
594
595                 switch (family) {
596                 case AF_INET:
597                         if (x->id.daddr.a4 != daddr->a4 ||
598                             x->props.saddr.a4 != saddr->a4)
599                                 continue;
600                         break;
601                 case AF_INET6:
602                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
603                                              (struct in6_addr *)
604                                              x->id.daddr.a6) ||
605                             !ipv6_addr_equal((struct in6_addr *)saddr,
606                                              (struct in6_addr *)
607                                              x->props.saddr.a6))
608                                 continue;
609                         break;
610                 };
611
612                 xfrm_state_hold(x);
613                 return x;
614         }
615
616         return NULL;
617 }
618
619 static inline struct xfrm_state *
620 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
621 {
622         if (use_spi)
623                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
624                                            x->id.proto, family);
625         else
626                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
627                                                   &x->props.saddr,
628                                                   x->id.proto, family);
629 }
630
631 struct xfrm_state *
632 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
633                 struct flowi *fl, struct xfrm_tmpl *tmpl,
634                 struct xfrm_policy *pol, int *err,
635                 unsigned short family)
636 {
637         unsigned int h = xfrm_dst_hash(daddr, family);
638         struct hlist_node *entry;
639         struct xfrm_state *x, *x0;
640         int acquire_in_progress = 0;
641         int error = 0;
642         struct xfrm_state *best = NULL;
643         
644         spin_lock_bh(&xfrm_state_lock);
645         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
646                 if (x->props.family == family &&
647                     x->props.reqid == tmpl->reqid &&
648                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
649                     xfrm_state_addr_check(x, daddr, saddr, family) &&
650                     tmpl->mode == x->props.mode &&
651                     tmpl->id.proto == x->id.proto &&
652                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
653                         /* Resolution logic:
654                            1. There is a valid state with matching selector.
655                               Done.
656                            2. Valid state with inappropriate selector. Skip.
657
658                            Entering area of "sysdeps".
659
660                            3. If state is not valid, selector is temporary,
661                               it selects only session which triggered
662                               previous resolution. Key manager will do
663                               something to install a state with proper
664                               selector.
665                          */
666                         if (x->km.state == XFRM_STATE_VALID) {
667                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
668                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
669                                         continue;
670                                 if (!best ||
671                                     best->km.dying > x->km.dying ||
672                                     (best->km.dying == x->km.dying &&
673                                      best->curlft.add_time < x->curlft.add_time))
674                                         best = x;
675                         } else if (x->km.state == XFRM_STATE_ACQ) {
676                                 acquire_in_progress = 1;
677                         } else if (x->km.state == XFRM_STATE_ERROR ||
678                                    x->km.state == XFRM_STATE_EXPIRED) {
679                                 if (xfrm_selector_match(&x->sel, fl, family) &&
680                                     security_xfrm_state_pol_flow_match(x, pol, fl))
681                                         error = -ESRCH;
682                         }
683                 }
684         }
685
686         x = best;
687         if (!x && !error && !acquire_in_progress) {
688                 if (tmpl->id.spi &&
689                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
690                                               tmpl->id.proto, family)) != NULL) {
691                         xfrm_state_put(x0);
692                         error = -EEXIST;
693                         goto out;
694                 }
695                 x = xfrm_state_alloc();
696                 if (x == NULL) {
697                         error = -ENOMEM;
698                         goto out;
699                 }
700                 /* Initialize temporary selector matching only
701                  * to current session. */
702                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
703
704                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
705                 if (error) {
706                         x->km.state = XFRM_STATE_DEAD;
707                         xfrm_state_put(x);
708                         x = NULL;
709                         goto out;
710                 }
711
712                 if (km_query(x, tmpl, pol) == 0) {
713                         x->km.state = XFRM_STATE_ACQ;
714                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
715                         xfrm_state_hold(x);
716                         h = xfrm_src_hash(saddr, family);
717                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
718                         xfrm_state_hold(x);
719                         if (x->id.spi) {
720                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
721                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
722                                 xfrm_state_hold(x);
723                         }
724                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
725                         xfrm_state_hold(x);
726                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
727                         add_timer(&x->timer);
728                 } else {
729                         x->km.state = XFRM_STATE_DEAD;
730                         xfrm_state_put(x);
731                         x = NULL;
732                         error = -ESRCH;
733                 }
734         }
735 out:
736         if (x)
737                 xfrm_state_hold(x);
738         else
739                 *err = acquire_in_progress ? -EAGAIN : error;
740         spin_unlock_bh(&xfrm_state_lock);
741         return x;
742 }
743
744 static void __xfrm_state_insert(struct xfrm_state *x)
745 {
746         unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
747
748         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
749         xfrm_state_hold(x);
750
751         h = xfrm_src_hash(&x->props.saddr, x->props.family);
752
753         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
754         xfrm_state_hold(x);
755
756         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
757                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
758                                   x->props.family);
759
760                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
761                 xfrm_state_hold(x);
762         }
763
764         if (!mod_timer(&x->timer, jiffies + HZ))
765                 xfrm_state_hold(x);
766
767         if (x->replay_maxage &&
768             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
769                 xfrm_state_hold(x);
770
771         wake_up(&km_waitq);
772
773         xfrm_state_num++;
774
775         if (x->bydst.next != NULL &&
776             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
777             xfrm_state_num > xfrm_state_hmask)
778                 schedule_work(&xfrm_hash_work);
779 }
780
781 void xfrm_state_insert(struct xfrm_state *x)
782 {
783         spin_lock_bh(&xfrm_state_lock);
784         __xfrm_state_insert(x);
785         spin_unlock_bh(&xfrm_state_lock);
786
787         xfrm_flush_all_bundles();
788 }
789 EXPORT_SYMBOL(xfrm_state_insert);
790
791 /* xfrm_state_lock is held */
792 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
793 {
794         unsigned int h = xfrm_dst_hash(daddr, family);
795         struct hlist_node *entry;
796         struct xfrm_state *x;
797
798         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
799                 if (x->props.reqid  != reqid ||
800                     x->props.mode   != mode ||
801                     x->props.family != family ||
802                     x->km.state     != XFRM_STATE_ACQ ||
803                     x->id.spi       != 0)
804                         continue;
805
806                 switch (family) {
807                 case AF_INET:
808                         if (x->id.daddr.a4    != daddr->a4 ||
809                             x->props.saddr.a4 != saddr->a4)
810                                 continue;
811                         break;
812                 case AF_INET6:
813                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
814                                              (struct in6_addr *)daddr) ||
815                             !ipv6_addr_equal((struct in6_addr *)
816                                              x->props.saddr.a6,
817                                              (struct in6_addr *)saddr))
818                                 continue;
819                         break;
820                 };
821
822                 xfrm_state_hold(x);
823                 return x;
824         }
825
826         if (!create)
827                 return NULL;
828
829         x = xfrm_state_alloc();
830         if (likely(x)) {
831                 switch (family) {
832                 case AF_INET:
833                         x->sel.daddr.a4 = daddr->a4;
834                         x->sel.saddr.a4 = saddr->a4;
835                         x->sel.prefixlen_d = 32;
836                         x->sel.prefixlen_s = 32;
837                         x->props.saddr.a4 = saddr->a4;
838                         x->id.daddr.a4 = daddr->a4;
839                         break;
840
841                 case AF_INET6:
842                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
843                                        (struct in6_addr *)daddr);
844                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
845                                        (struct in6_addr *)saddr);
846                         x->sel.prefixlen_d = 128;
847                         x->sel.prefixlen_s = 128;
848                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
849                                        (struct in6_addr *)saddr);
850                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
851                                        (struct in6_addr *)daddr);
852                         break;
853                 };
854
855                 x->km.state = XFRM_STATE_ACQ;
856                 x->id.proto = proto;
857                 x->props.family = family;
858                 x->props.mode = mode;
859                 x->props.reqid = reqid;
860                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
861                 xfrm_state_hold(x);
862                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
863                 add_timer(&x->timer);
864                 xfrm_state_hold(x);
865                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
866                 h = xfrm_src_hash(saddr, family);
867                 xfrm_state_hold(x);
868                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
869                 wake_up(&km_waitq);
870         }
871
872         return x;
873 }
874
875 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
876
877 int xfrm_state_add(struct xfrm_state *x)
878 {
879         struct xfrm_state *x1;
880         int family;
881         int err;
882         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
883
884         family = x->props.family;
885
886         spin_lock_bh(&xfrm_state_lock);
887
888         x1 = __xfrm_state_locate(x, use_spi, family);
889         if (x1) {
890                 xfrm_state_put(x1);
891                 x1 = NULL;
892                 err = -EEXIST;
893                 goto out;
894         }
895
896         if (use_spi && x->km.seq) {
897                 x1 = __xfrm_find_acq_byseq(x->km.seq);
898                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
899                         xfrm_state_put(x1);
900                         x1 = NULL;
901                 }
902         }
903
904         if (use_spi && !x1)
905                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
906                                      x->id.proto,
907                                      &x->id.daddr, &x->props.saddr, 0);
908
909         __xfrm_state_insert(x);
910         err = 0;
911
912 out:
913         spin_unlock_bh(&xfrm_state_lock);
914
915         if (!err)
916                 xfrm_flush_all_bundles();
917
918         if (x1) {
919                 xfrm_state_delete(x1);
920                 xfrm_state_put(x1);
921         }
922
923         return err;
924 }
925 EXPORT_SYMBOL(xfrm_state_add);
926
927 int xfrm_state_update(struct xfrm_state *x)
928 {
929         struct xfrm_state *x1;
930         int err;
931         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
932
933         spin_lock_bh(&xfrm_state_lock);
934         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
935
936         err = -ESRCH;
937         if (!x1)
938                 goto out;
939
940         if (xfrm_state_kern(x1)) {
941                 xfrm_state_put(x1);
942                 err = -EEXIST;
943                 goto out;
944         }
945
946         if (x1->km.state == XFRM_STATE_ACQ) {
947                 __xfrm_state_insert(x);
948                 x = NULL;
949         }
950         err = 0;
951
952 out:
953         spin_unlock_bh(&xfrm_state_lock);
954
955         if (err)
956                 return err;
957
958         if (!x) {
959                 xfrm_state_delete(x1);
960                 xfrm_state_put(x1);
961                 return 0;
962         }
963
964         err = -EINVAL;
965         spin_lock_bh(&x1->lock);
966         if (likely(x1->km.state == XFRM_STATE_VALID)) {
967                 if (x->encap && x1->encap)
968                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
969                 if (x->coaddr && x1->coaddr) {
970                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
971                 }
972                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
973                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
974                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
975                 x1->km.dying = 0;
976
977                 if (!mod_timer(&x1->timer, jiffies + HZ))
978                         xfrm_state_hold(x1);
979                 if (x1->curlft.use_time)
980                         xfrm_state_check_expire(x1);
981
982                 err = 0;
983         }
984         spin_unlock_bh(&x1->lock);
985
986         xfrm_state_put(x1);
987
988         return err;
989 }
990 EXPORT_SYMBOL(xfrm_state_update);
991
992 int xfrm_state_check_expire(struct xfrm_state *x)
993 {
994         if (!x->curlft.use_time)
995                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
996
997         if (x->km.state != XFRM_STATE_VALID)
998                 return -EINVAL;
999
1000         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1001             x->curlft.packets >= x->lft.hard_packet_limit) {
1002                 x->km.state = XFRM_STATE_EXPIRED;
1003                 if (!mod_timer(&x->timer, jiffies))
1004                         xfrm_state_hold(x);
1005                 return -EINVAL;
1006         }
1007
1008         if (!x->km.dying &&
1009             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1010              x->curlft.packets >= x->lft.soft_packet_limit)) {
1011                 x->km.dying = 1;
1012                 km_state_expired(x, 0, 0);
1013         }
1014         return 0;
1015 }
1016 EXPORT_SYMBOL(xfrm_state_check_expire);
1017
1018 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1019 {
1020         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1021                 - skb_headroom(skb);
1022
1023         if (nhead > 0)
1024                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1025
1026         /* Check tail too... */
1027         return 0;
1028 }
1029
1030 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1031 {
1032         int err = xfrm_state_check_expire(x);
1033         if (err < 0)
1034                 goto err;
1035         err = xfrm_state_check_space(x, skb);
1036 err:
1037         return err;
1038 }
1039 EXPORT_SYMBOL(xfrm_state_check);
1040
1041 struct xfrm_state *
1042 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1043                   unsigned short family)
1044 {
1045         struct xfrm_state *x;
1046
1047         spin_lock_bh(&xfrm_state_lock);
1048         x = __xfrm_state_lookup(daddr, spi, proto, family);
1049         spin_unlock_bh(&xfrm_state_lock);
1050         return x;
1051 }
1052 EXPORT_SYMBOL(xfrm_state_lookup);
1053
1054 struct xfrm_state *
1055 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1056                          u8 proto, unsigned short family)
1057 {
1058         struct xfrm_state *x;
1059
1060         spin_lock_bh(&xfrm_state_lock);
1061         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1062         spin_unlock_bh(&xfrm_state_lock);
1063         return x;
1064 }
1065 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1066
1067 struct xfrm_state *
1068 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1069               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1070               int create, unsigned short family)
1071 {
1072         struct xfrm_state *x;
1073
1074         spin_lock_bh(&xfrm_state_lock);
1075         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1076         spin_unlock_bh(&xfrm_state_lock);
1077
1078         return x;
1079 }
1080 EXPORT_SYMBOL(xfrm_find_acq);
1081
1082 #ifdef CONFIG_XFRM_SUB_POLICY
1083 int
1084 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1085                unsigned short family)
1086 {
1087         int err = 0;
1088         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1089         if (!afinfo)
1090                 return -EAFNOSUPPORT;
1091
1092         spin_lock_bh(&xfrm_state_lock);
1093         if (afinfo->tmpl_sort)
1094                 err = afinfo->tmpl_sort(dst, src, n);
1095         spin_unlock_bh(&xfrm_state_lock);
1096         xfrm_state_put_afinfo(afinfo);
1097         return err;
1098 }
1099 EXPORT_SYMBOL(xfrm_tmpl_sort);
1100
1101 int
1102 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1103                 unsigned short family)
1104 {
1105         int err = 0;
1106         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1107         if (!afinfo)
1108                 return -EAFNOSUPPORT;
1109
1110         spin_lock_bh(&xfrm_state_lock);
1111         if (afinfo->state_sort)
1112                 err = afinfo->state_sort(dst, src, n);
1113         spin_unlock_bh(&xfrm_state_lock);
1114         xfrm_state_put_afinfo(afinfo);
1115         return err;
1116 }
1117 EXPORT_SYMBOL(xfrm_state_sort);
1118 #endif
1119
1120 /* Silly enough, but I'm lazy to build resolution list */
1121
1122 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1123 {
1124         int i;
1125
1126         for (i = 0; i <= xfrm_state_hmask; i++) {
1127                 struct hlist_node *entry;
1128                 struct xfrm_state *x;
1129
1130                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1131                         if (x->km.seq == seq &&
1132                             x->km.state == XFRM_STATE_ACQ) {
1133                                 xfrm_state_hold(x);
1134                                 return x;
1135                         }
1136                 }
1137         }
1138         return NULL;
1139 }
1140
1141 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1142 {
1143         struct xfrm_state *x;
1144
1145         spin_lock_bh(&xfrm_state_lock);
1146         x = __xfrm_find_acq_byseq(seq);
1147         spin_unlock_bh(&xfrm_state_lock);
1148         return x;
1149 }
1150 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1151
1152 u32 xfrm_get_acqseq(void)
1153 {
1154         u32 res;
1155         static u32 acqseq;
1156         static DEFINE_SPINLOCK(acqseq_lock);
1157
1158         spin_lock_bh(&acqseq_lock);
1159         res = (++acqseq ? : ++acqseq);
1160         spin_unlock_bh(&acqseq_lock);
1161         return res;
1162 }
1163 EXPORT_SYMBOL(xfrm_get_acqseq);
1164
1165 void
1166 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1167 {
1168         unsigned int h;
1169         struct xfrm_state *x0;
1170
1171         if (x->id.spi)
1172                 return;
1173
1174         if (minspi == maxspi) {
1175                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1176                 if (x0) {
1177                         xfrm_state_put(x0);
1178                         return;
1179                 }
1180                 x->id.spi = minspi;
1181         } else {
1182                 u32 spi = 0;
1183                 minspi = ntohl(minspi);
1184                 maxspi = ntohl(maxspi);
1185                 for (h=0; h<maxspi-minspi+1; h++) {
1186                         spi = minspi + net_random()%(maxspi-minspi+1);
1187                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1188                         if (x0 == NULL) {
1189                                 x->id.spi = htonl(spi);
1190                                 break;
1191                         }
1192                         xfrm_state_put(x0);
1193                 }
1194         }
1195         if (x->id.spi) {
1196                 spin_lock_bh(&xfrm_state_lock);
1197                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1198                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1199                 xfrm_state_hold(x);
1200                 spin_unlock_bh(&xfrm_state_lock);
1201                 wake_up(&km_waitq);
1202         }
1203 }
1204 EXPORT_SYMBOL(xfrm_alloc_spi);
1205
1206 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1207                     void *data)
1208 {
1209         int i;
1210         struct xfrm_state *x;
1211         struct hlist_node *entry;
1212         int count = 0;
1213         int err = 0;
1214
1215         spin_lock_bh(&xfrm_state_lock);
1216         for (i = 0; i <= xfrm_state_hmask; i++) {
1217                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1218                         if (xfrm_id_proto_match(x->id.proto, proto))
1219                                 count++;
1220                 }
1221         }
1222         if (count == 0) {
1223                 err = -ENOENT;
1224                 goto out;
1225         }
1226
1227         for (i = 0; i <= xfrm_state_hmask; i++) {
1228                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1229                         if (!xfrm_id_proto_match(x->id.proto, proto))
1230                                 continue;
1231                         err = func(x, --count, data);
1232                         if (err)
1233                                 goto out;
1234                 }
1235         }
1236 out:
1237         spin_unlock_bh(&xfrm_state_lock);
1238         return err;
1239 }
1240 EXPORT_SYMBOL(xfrm_state_walk);
1241
1242
1243 void xfrm_replay_notify(struct xfrm_state *x, int event)
1244 {
1245         struct km_event c;
1246         /* we send notify messages in case
1247          *  1. we updated on of the sequence numbers, and the seqno difference
1248          *     is at least x->replay_maxdiff, in this case we also update the
1249          *     timeout of our timer function
1250          *  2. if x->replay_maxage has elapsed since last update,
1251          *     and there were changes
1252          *
1253          *  The state structure must be locked!
1254          */
1255
1256         switch (event) {
1257         case XFRM_REPLAY_UPDATE:
1258                 if (x->replay_maxdiff &&
1259                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1260                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1261                         if (x->xflags & XFRM_TIME_DEFER)
1262                                 event = XFRM_REPLAY_TIMEOUT;
1263                         else
1264                                 return;
1265                 }
1266
1267                 break;
1268
1269         case XFRM_REPLAY_TIMEOUT:
1270                 if ((x->replay.seq == x->preplay.seq) &&
1271                     (x->replay.bitmap == x->preplay.bitmap) &&
1272                     (x->replay.oseq == x->preplay.oseq)) {
1273                         x->xflags |= XFRM_TIME_DEFER;
1274                         return;
1275                 }
1276
1277                 break;
1278         }
1279
1280         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1281         c.event = XFRM_MSG_NEWAE;
1282         c.data.aevent = event;
1283         km_state_notify(x, &c);
1284
1285         if (x->replay_maxage &&
1286             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1287                 xfrm_state_hold(x);
1288                 x->xflags &= ~XFRM_TIME_DEFER;
1289         }
1290 }
1291 EXPORT_SYMBOL(xfrm_replay_notify);
1292
1293 static void xfrm_replay_timer_handler(unsigned long data)
1294 {
1295         struct xfrm_state *x = (struct xfrm_state*)data;
1296
1297         spin_lock(&x->lock);
1298
1299         if (x->km.state == XFRM_STATE_VALID) {
1300                 if (xfrm_aevent_is_on())
1301                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1302                 else
1303                         x->xflags |= XFRM_TIME_DEFER;
1304         }
1305
1306         spin_unlock(&x->lock);
1307         xfrm_state_put(x);
1308 }
1309
1310 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1311 {
1312         u32 diff;
1313
1314         seq = ntohl(seq);
1315
1316         if (unlikely(seq == 0))
1317                 return -EINVAL;
1318
1319         if (likely(seq > x->replay.seq))
1320                 return 0;
1321
1322         diff = x->replay.seq - seq;
1323         if (diff >= x->props.replay_window) {
1324                 x->stats.replay_window++;
1325                 return -EINVAL;
1326         }
1327
1328         if (x->replay.bitmap & (1U << diff)) {
1329                 x->stats.replay++;
1330                 return -EINVAL;
1331         }
1332         return 0;
1333 }
1334 EXPORT_SYMBOL(xfrm_replay_check);
1335
1336 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1337 {
1338         u32 diff;
1339
1340         seq = ntohl(seq);
1341
1342         if (seq > x->replay.seq) {
1343                 diff = seq - x->replay.seq;
1344                 if (diff < x->props.replay_window)
1345                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1346                 else
1347                         x->replay.bitmap = 1;
1348                 x->replay.seq = seq;
1349         } else {
1350                 diff = x->replay.seq - seq;
1351                 x->replay.bitmap |= (1U << diff);
1352         }
1353
1354         if (xfrm_aevent_is_on())
1355                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1356 }
1357 EXPORT_SYMBOL(xfrm_replay_advance);
1358
1359 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1360 static DEFINE_RWLOCK(xfrm_km_lock);
1361
1362 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1363 {
1364         struct xfrm_mgr *km;
1365
1366         read_lock(&xfrm_km_lock);
1367         list_for_each_entry(km, &xfrm_km_list, list)
1368                 if (km->notify_policy)
1369                         km->notify_policy(xp, dir, c);
1370         read_unlock(&xfrm_km_lock);
1371 }
1372
1373 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1374 {
1375         struct xfrm_mgr *km;
1376         read_lock(&xfrm_km_lock);
1377         list_for_each_entry(km, &xfrm_km_list, list)
1378                 if (km->notify)
1379                         km->notify(x, c);
1380         read_unlock(&xfrm_km_lock);
1381 }
1382
1383 EXPORT_SYMBOL(km_policy_notify);
1384 EXPORT_SYMBOL(km_state_notify);
1385
1386 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1387 {
1388         struct km_event c;
1389
1390         c.data.hard = hard;
1391         c.pid = pid;
1392         c.event = XFRM_MSG_EXPIRE;
1393         km_state_notify(x, &c);
1394
1395         if (hard)
1396                 wake_up(&km_waitq);
1397 }
1398
1399 EXPORT_SYMBOL(km_state_expired);
1400 /*
1401  * We send to all registered managers regardless of failure
1402  * We are happy with one success
1403 */
1404 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1405 {
1406         int err = -EINVAL, acqret;
1407         struct xfrm_mgr *km;
1408
1409         read_lock(&xfrm_km_lock);
1410         list_for_each_entry(km, &xfrm_km_list, list) {
1411                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1412                 if (!acqret)
1413                         err = acqret;
1414         }
1415         read_unlock(&xfrm_km_lock);
1416         return err;
1417 }
1418 EXPORT_SYMBOL(km_query);
1419
1420 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1421 {
1422         int err = -EINVAL;
1423         struct xfrm_mgr *km;
1424
1425         read_lock(&xfrm_km_lock);
1426         list_for_each_entry(km, &xfrm_km_list, list) {
1427                 if (km->new_mapping)
1428                         err = km->new_mapping(x, ipaddr, sport);
1429                 if (!err)
1430                         break;
1431         }
1432         read_unlock(&xfrm_km_lock);
1433         return err;
1434 }
1435 EXPORT_SYMBOL(km_new_mapping);
1436
1437 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1438 {
1439         struct km_event c;
1440
1441         c.data.hard = hard;
1442         c.pid = pid;
1443         c.event = XFRM_MSG_POLEXPIRE;
1444         km_policy_notify(pol, dir, &c);
1445
1446         if (hard)
1447                 wake_up(&km_waitq);
1448 }
1449 EXPORT_SYMBOL(km_policy_expired);
1450
1451 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1452 {
1453         int err = -EINVAL;
1454         int ret;
1455         struct xfrm_mgr *km;
1456
1457         read_lock(&xfrm_km_lock);
1458         list_for_each_entry(km, &xfrm_km_list, list) {
1459                 if (km->report) {
1460                         ret = km->report(proto, sel, addr);
1461                         if (!ret)
1462                                 err = ret;
1463                 }
1464         }
1465         read_unlock(&xfrm_km_lock);
1466         return err;
1467 }
1468 EXPORT_SYMBOL(km_report);
1469
1470 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1471 {
1472         int err;
1473         u8 *data;
1474         struct xfrm_mgr *km;
1475         struct xfrm_policy *pol = NULL;
1476
1477         if (optlen <= 0 || optlen > PAGE_SIZE)
1478                 return -EMSGSIZE;
1479
1480         data = kmalloc(optlen, GFP_KERNEL);
1481         if (!data)
1482                 return -ENOMEM;
1483
1484         err = -EFAULT;
1485         if (copy_from_user(data, optval, optlen))
1486                 goto out;
1487
1488         err = -EINVAL;
1489         read_lock(&xfrm_km_lock);
1490         list_for_each_entry(km, &xfrm_km_list, list) {
1491                 pol = km->compile_policy(sk, optname, data,
1492                                          optlen, &err);
1493                 if (err >= 0)
1494                         break;
1495         }
1496         read_unlock(&xfrm_km_lock);
1497
1498         if (err >= 0) {
1499                 xfrm_sk_policy_insert(sk, err, pol);
1500                 xfrm_pol_put(pol);
1501                 err = 0;
1502         }
1503
1504 out:
1505         kfree(data);
1506         return err;
1507 }
1508 EXPORT_SYMBOL(xfrm_user_policy);
1509
1510 int xfrm_register_km(struct xfrm_mgr *km)
1511 {
1512         write_lock_bh(&xfrm_km_lock);
1513         list_add_tail(&km->list, &xfrm_km_list);
1514         write_unlock_bh(&xfrm_km_lock);
1515         return 0;
1516 }
1517 EXPORT_SYMBOL(xfrm_register_km);
1518
1519 int xfrm_unregister_km(struct xfrm_mgr *km)
1520 {
1521         write_lock_bh(&xfrm_km_lock);
1522         list_del(&km->list);
1523         write_unlock_bh(&xfrm_km_lock);
1524         return 0;
1525 }
1526 EXPORT_SYMBOL(xfrm_unregister_km);
1527
1528 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1529 {
1530         int err = 0;
1531         if (unlikely(afinfo == NULL))
1532                 return -EINVAL;
1533         if (unlikely(afinfo->family >= NPROTO))
1534                 return -EAFNOSUPPORT;
1535         write_lock_bh(&xfrm_state_afinfo_lock);
1536         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1537                 err = -ENOBUFS;
1538         else
1539                 xfrm_state_afinfo[afinfo->family] = afinfo;
1540         write_unlock_bh(&xfrm_state_afinfo_lock);
1541         return err;
1542 }
1543 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1544
1545 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1546 {
1547         int err = 0;
1548         if (unlikely(afinfo == NULL))
1549                 return -EINVAL;
1550         if (unlikely(afinfo->family >= NPROTO))
1551                 return -EAFNOSUPPORT;
1552         write_lock_bh(&xfrm_state_afinfo_lock);
1553         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1554                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1555                         err = -EINVAL;
1556                 else
1557                         xfrm_state_afinfo[afinfo->family] = NULL;
1558         }
1559         write_unlock_bh(&xfrm_state_afinfo_lock);
1560         return err;
1561 }
1562 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1563
1564 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1565 {
1566         struct xfrm_state_afinfo *afinfo;
1567         if (unlikely(family >= NPROTO))
1568                 return NULL;
1569         read_lock(&xfrm_state_afinfo_lock);
1570         afinfo = xfrm_state_afinfo[family];
1571         if (unlikely(!afinfo))
1572                 read_unlock(&xfrm_state_afinfo_lock);
1573         return afinfo;
1574 }
1575
1576 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1577 {
1578         read_unlock(&xfrm_state_afinfo_lock);
1579 }
1580
1581 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1582 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1583 {
1584         if (x->tunnel) {
1585                 struct xfrm_state *t = x->tunnel;
1586
1587                 if (atomic_read(&t->tunnel_users) == 2)
1588                         xfrm_state_delete(t);
1589                 atomic_dec(&t->tunnel_users);
1590                 xfrm_state_put(t);
1591                 x->tunnel = NULL;
1592         }
1593 }
1594 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1595
1596 /*
1597  * This function is NOT optimal.  For example, with ESP it will give an
1598  * MTU that's usually two bytes short of being optimal.  However, it will
1599  * usually give an answer that's a multiple of 4 provided the input is
1600  * also a multiple of 4.
1601  */
1602 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1603 {
1604         int res = mtu;
1605
1606         res -= x->props.header_len;
1607
1608         for (;;) {
1609                 int m = res;
1610
1611                 if (m < 68)
1612                         return 68;
1613
1614                 spin_lock_bh(&x->lock);
1615                 if (x->km.state == XFRM_STATE_VALID &&
1616                     x->type && x->type->get_max_size)
1617                         m = x->type->get_max_size(x, m);
1618                 else
1619                         m += x->props.header_len;
1620                 spin_unlock_bh(&x->lock);
1621
1622                 if (m <= mtu)
1623                         break;
1624                 res -= (m - mtu);
1625         }
1626
1627         return res;
1628 }
1629
1630 int xfrm_init_state(struct xfrm_state *x)
1631 {
1632         struct xfrm_state_afinfo *afinfo;
1633         int family = x->props.family;
1634         int err;
1635
1636         err = -EAFNOSUPPORT;
1637         afinfo = xfrm_state_get_afinfo(family);
1638         if (!afinfo)
1639                 goto error;
1640
1641         err = 0;
1642         if (afinfo->init_flags)
1643                 err = afinfo->init_flags(x);
1644
1645         xfrm_state_put_afinfo(afinfo);
1646
1647         if (err)
1648                 goto error;
1649
1650         err = -EPROTONOSUPPORT;
1651         x->type = xfrm_get_type(x->id.proto, family);
1652         if (x->type == NULL)
1653                 goto error;
1654
1655         err = x->type->init_state(x);
1656         if (err)
1657                 goto error;
1658
1659         x->mode = xfrm_get_mode(x->props.mode, family);
1660         if (x->mode == NULL)
1661                 goto error;
1662
1663         x->km.state = XFRM_STATE_VALID;
1664
1665 error:
1666         return err;
1667 }
1668
1669 EXPORT_SYMBOL(xfrm_init_state);
1670  
1671 void __init xfrm_state_init(void)
1672 {
1673         unsigned int sz;
1674
1675         sz = sizeof(struct hlist_head) * 8;
1676
1677         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1678         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1679         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1680         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1681                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1682         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1683
1684         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1685 }
1686