]> Pileus Git - ~andy/linux/blob - net/xfrm/xfrm_state.c
[XFRM]: Put more keys into destination hash function.
[~andy/linux] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
59 {
60         return ntohl(addr->a4);
61 }
62
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
64 {
65         return ntohl(addr->a6[2]^addr->a6[3]);
66 }
67
68 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
69                                            u32 reqid, unsigned short family,
70                                            unsigned int hmask)
71 {
72         unsigned int h = family ^ reqid;
73         switch (family) {
74         case AF_INET:
75                 h ^= __xfrm4_addr_hash(addr);
76                 break;
77         case AF_INET6:
78                 h ^= __xfrm6_addr_hash(addr);
79                 break;
80         };
81         return (h ^ (h >> 16)) & hmask;
82 }
83
84 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
85                                          unsigned short family)
86 {
87         return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
88 }
89
90 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
91                                        unsigned int hmask)
92 {
93         unsigned int h = family;
94         switch (family) {
95         case AF_INET:
96                 h ^= __xfrm4_addr_hash(addr);
97                 break;
98         case AF_INET6:
99                 h ^= __xfrm6_addr_hash(addr);
100                 break;
101         };
102         return (h ^ (h >> 16)) & hmask;
103 }
104
105 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
106 {
107         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
108 }
109
110 static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
111                                         unsigned int hmask)
112 {
113         unsigned int h;
114         h = ntohl(addr->a4^spi^proto);
115         h = (h ^ (h>>10) ^ (h>>20)) & hmask;
116         return h;
117 }
118
119 static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
120                                             unsigned int hmask)
121 {
122         unsigned int h;
123         h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
124         h = (h ^ (h>>10) ^ (h>>20)) & hmask;
125         return h;
126 }
127
128 static inline
129 unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
130                          unsigned int hmask)
131 {
132         switch (family) {
133         case AF_INET:
134                 return __xfrm4_spi_hash(addr, spi, proto, hmask);
135         case AF_INET6:
136                 return __xfrm6_spi_hash(addr, spi, proto, hmask);
137         }
138         return 0;       /*XXX*/
139 }
140
141 static inline unsigned int
142 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
143 {
144         return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
145 }
146
147 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
148 {
149         struct hlist_head *n;
150
151         if (sz <= PAGE_SIZE)
152                 n = kmalloc(sz, GFP_KERNEL);
153         else if (hashdist)
154                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
155         else
156                 n = (struct hlist_head *)
157                         __get_free_pages(GFP_KERNEL, get_order(sz));
158
159         if (n)
160                 memset(n, 0, sz);
161
162         return n;
163 }
164
165 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
166 {
167         if (sz <= PAGE_SIZE)
168                 kfree(n);
169         else if (hashdist)
170                 vfree(n);
171         else
172                 free_pages((unsigned long)n, get_order(sz));
173 }
174
175 static void xfrm_hash_transfer(struct hlist_head *list,
176                                struct hlist_head *ndsttable,
177                                struct hlist_head *nsrctable,
178                                struct hlist_head *nspitable,
179                                unsigned int nhashmask)
180 {
181         struct hlist_node *entry, *tmp;
182         struct xfrm_state *x;
183
184         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
185                 unsigned int h;
186
187                 h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
188                                     x->props.family, nhashmask);
189                 hlist_add_head(&x->bydst, ndsttable+h);
190
191                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
192                                     nhashmask);
193                 hlist_add_head(&x->bysrc, nsrctable+h);
194
195                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
196                                     x->props.family, nhashmask);
197                 hlist_add_head(&x->byspi, nspitable+h);
198         }
199 }
200
201 static unsigned long xfrm_hash_new_size(void)
202 {
203         return ((xfrm_state_hmask + 1) << 1) *
204                 sizeof(struct hlist_head);
205 }
206
207 static DEFINE_MUTEX(hash_resize_mutex);
208
209 static void xfrm_hash_resize(void *__unused)
210 {
211         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
212         unsigned long nsize, osize;
213         unsigned int nhashmask, ohashmask;
214         int i;
215
216         mutex_lock(&hash_resize_mutex);
217
218         nsize = xfrm_hash_new_size();
219         ndst = xfrm_state_hash_alloc(nsize);
220         if (!ndst)
221                 goto out_unlock;
222         nsrc = xfrm_state_hash_alloc(nsize);
223         if (!nsrc) {
224                 xfrm_state_hash_free(ndst, nsize);
225                 goto out_unlock;
226         }
227         nspi = xfrm_state_hash_alloc(nsize);
228         if (!nspi) {
229                 xfrm_state_hash_free(ndst, nsize);
230                 xfrm_state_hash_free(nsrc, nsize);
231                 goto out_unlock;
232         }
233
234         spin_lock_bh(&xfrm_state_lock);
235
236         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
237         for (i = xfrm_state_hmask; i >= 0; i--)
238                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
239                                    nhashmask);
240
241         odst = xfrm_state_bydst;
242         osrc = xfrm_state_bysrc;
243         ospi = xfrm_state_byspi;
244         ohashmask = xfrm_state_hmask;
245
246         xfrm_state_bydst = ndst;
247         xfrm_state_bysrc = nsrc;
248         xfrm_state_byspi = nspi;
249         xfrm_state_hmask = nhashmask;
250
251         spin_unlock_bh(&xfrm_state_lock);
252
253         osize = (ohashmask + 1) * sizeof(struct hlist_head);
254         xfrm_state_hash_free(odst, osize);
255         xfrm_state_hash_free(osrc, osize);
256         xfrm_state_hash_free(ospi, osize);
257
258 out_unlock:
259         mutex_unlock(&hash_resize_mutex);
260 }
261
262 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
263
264 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
265 EXPORT_SYMBOL(km_waitq);
266
267 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
268 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
269
270 static struct work_struct xfrm_state_gc_work;
271 static HLIST_HEAD(xfrm_state_gc_list);
272 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
273
274 static int xfrm_state_gc_flush_bundles;
275
276 int __xfrm_state_delete(struct xfrm_state *x);
277
278 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
279 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
280
281 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
282 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
283
284 static void xfrm_state_gc_destroy(struct xfrm_state *x)
285 {
286         if (del_timer(&x->timer))
287                 BUG();
288         if (del_timer(&x->rtimer))
289                 BUG();
290         kfree(x->aalg);
291         kfree(x->ealg);
292         kfree(x->calg);
293         kfree(x->encap);
294         kfree(x->coaddr);
295         if (x->mode)
296                 xfrm_put_mode(x->mode);
297         if (x->type) {
298                 x->type->destructor(x);
299                 xfrm_put_type(x->type);
300         }
301         security_xfrm_state_free(x);
302         kfree(x);
303 }
304
305 static void xfrm_state_gc_task(void *data)
306 {
307         struct xfrm_state *x;
308         struct hlist_node *entry, *tmp;
309         struct hlist_head gc_list;
310
311         if (xfrm_state_gc_flush_bundles) {
312                 xfrm_state_gc_flush_bundles = 0;
313                 xfrm_flush_bundles();
314         }
315
316         spin_lock_bh(&xfrm_state_gc_lock);
317         gc_list.first = xfrm_state_gc_list.first;
318         INIT_HLIST_HEAD(&xfrm_state_gc_list);
319         spin_unlock_bh(&xfrm_state_gc_lock);
320
321         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
322                 xfrm_state_gc_destroy(x);
323
324         wake_up(&km_waitq);
325 }
326
327 static inline unsigned long make_jiffies(long secs)
328 {
329         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
330                 return MAX_SCHEDULE_TIMEOUT-1;
331         else
332                 return secs*HZ;
333 }
334
335 static void xfrm_timer_handler(unsigned long data)
336 {
337         struct xfrm_state *x = (struct xfrm_state*)data;
338         unsigned long now = (unsigned long)xtime.tv_sec;
339         long next = LONG_MAX;
340         int warn = 0;
341
342         spin_lock(&x->lock);
343         if (x->km.state == XFRM_STATE_DEAD)
344                 goto out;
345         if (x->km.state == XFRM_STATE_EXPIRED)
346                 goto expired;
347         if (x->lft.hard_add_expires_seconds) {
348                 long tmo = x->lft.hard_add_expires_seconds +
349                         x->curlft.add_time - now;
350                 if (tmo <= 0)
351                         goto expired;
352                 if (tmo < next)
353                         next = tmo;
354         }
355         if (x->lft.hard_use_expires_seconds) {
356                 long tmo = x->lft.hard_use_expires_seconds +
357                         (x->curlft.use_time ? : now) - now;
358                 if (tmo <= 0)
359                         goto expired;
360                 if (tmo < next)
361                         next = tmo;
362         }
363         if (x->km.dying)
364                 goto resched;
365         if (x->lft.soft_add_expires_seconds) {
366                 long tmo = x->lft.soft_add_expires_seconds +
367                         x->curlft.add_time - now;
368                 if (tmo <= 0)
369                         warn = 1;
370                 else if (tmo < next)
371                         next = tmo;
372         }
373         if (x->lft.soft_use_expires_seconds) {
374                 long tmo = x->lft.soft_use_expires_seconds +
375                         (x->curlft.use_time ? : now) - now;
376                 if (tmo <= 0)
377                         warn = 1;
378                 else if (tmo < next)
379                         next = tmo;
380         }
381
382         x->km.dying = warn;
383         if (warn)
384                 km_state_expired(x, 0, 0);
385 resched:
386         if (next != LONG_MAX &&
387             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
388                 xfrm_state_hold(x);
389         goto out;
390
391 expired:
392         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
393                 x->km.state = XFRM_STATE_EXPIRED;
394                 wake_up(&km_waitq);
395                 next = 2;
396                 goto resched;
397         }
398         if (!__xfrm_state_delete(x) && x->id.spi)
399                 km_state_expired(x, 1, 0);
400
401 out:
402         spin_unlock(&x->lock);
403         xfrm_state_put(x);
404 }
405
406 static void xfrm_replay_timer_handler(unsigned long data);
407
408 struct xfrm_state *xfrm_state_alloc(void)
409 {
410         struct xfrm_state *x;
411
412         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
413
414         if (x) {
415                 atomic_set(&x->refcnt, 1);
416                 atomic_set(&x->tunnel_users, 0);
417                 INIT_HLIST_NODE(&x->bydst);
418                 INIT_HLIST_NODE(&x->bysrc);
419                 INIT_HLIST_NODE(&x->byspi);
420                 init_timer(&x->timer);
421                 x->timer.function = xfrm_timer_handler;
422                 x->timer.data     = (unsigned long)x;
423                 init_timer(&x->rtimer);
424                 x->rtimer.function = xfrm_replay_timer_handler;
425                 x->rtimer.data     = (unsigned long)x;
426                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
427                 x->lft.soft_byte_limit = XFRM_INF;
428                 x->lft.soft_packet_limit = XFRM_INF;
429                 x->lft.hard_byte_limit = XFRM_INF;
430                 x->lft.hard_packet_limit = XFRM_INF;
431                 x->replay_maxage = 0;
432                 x->replay_maxdiff = 0;
433                 spin_lock_init(&x->lock);
434         }
435         return x;
436 }
437 EXPORT_SYMBOL(xfrm_state_alloc);
438
439 void __xfrm_state_destroy(struct xfrm_state *x)
440 {
441         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
442
443         spin_lock_bh(&xfrm_state_gc_lock);
444         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
445         spin_unlock_bh(&xfrm_state_gc_lock);
446         schedule_work(&xfrm_state_gc_work);
447 }
448 EXPORT_SYMBOL(__xfrm_state_destroy);
449
450 int __xfrm_state_delete(struct xfrm_state *x)
451 {
452         int err = -ESRCH;
453
454         if (x->km.state != XFRM_STATE_DEAD) {
455                 x->km.state = XFRM_STATE_DEAD;
456                 spin_lock(&xfrm_state_lock);
457                 hlist_del(&x->bydst);
458                 __xfrm_state_put(x);
459                 hlist_del(&x->bysrc);
460                 __xfrm_state_put(x);
461                 if (x->id.spi) {
462                         hlist_del(&x->byspi);
463                         __xfrm_state_put(x);
464                 }
465                 xfrm_state_num--;
466                 spin_unlock(&xfrm_state_lock);
467                 if (del_timer(&x->timer))
468                         __xfrm_state_put(x);
469                 if (del_timer(&x->rtimer))
470                         __xfrm_state_put(x);
471
472                 /* The number two in this test is the reference
473                  * mentioned in the comment below plus the reference
474                  * our caller holds.  A larger value means that
475                  * there are DSTs attached to this xfrm_state.
476                  */
477                 if (atomic_read(&x->refcnt) > 2) {
478                         xfrm_state_gc_flush_bundles = 1;
479                         schedule_work(&xfrm_state_gc_work);
480                 }
481
482                 /* All xfrm_state objects are created by xfrm_state_alloc.
483                  * The xfrm_state_alloc call gives a reference, and that
484                  * is what we are dropping here.
485                  */
486                 __xfrm_state_put(x);
487                 err = 0;
488         }
489
490         return err;
491 }
492 EXPORT_SYMBOL(__xfrm_state_delete);
493
494 int xfrm_state_delete(struct xfrm_state *x)
495 {
496         int err;
497
498         spin_lock_bh(&x->lock);
499         err = __xfrm_state_delete(x);
500         spin_unlock_bh(&x->lock);
501
502         return err;
503 }
504 EXPORT_SYMBOL(xfrm_state_delete);
505
506 void xfrm_state_flush(u8 proto)
507 {
508         int i;
509
510         spin_lock_bh(&xfrm_state_lock);
511         for (i = 0; i < xfrm_state_hmask; i++) {
512                 struct hlist_node *entry;
513                 struct xfrm_state *x;
514 restart:
515                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
516                         if (!xfrm_state_kern(x) &&
517                             xfrm_id_proto_match(x->id.proto, proto)) {
518                                 xfrm_state_hold(x);
519                                 spin_unlock_bh(&xfrm_state_lock);
520
521                                 xfrm_state_delete(x);
522                                 xfrm_state_put(x);
523
524                                 spin_lock_bh(&xfrm_state_lock);
525                                 goto restart;
526                         }
527                 }
528         }
529         spin_unlock_bh(&xfrm_state_lock);
530         wake_up(&km_waitq);
531 }
532 EXPORT_SYMBOL(xfrm_state_flush);
533
534 static int
535 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
536                   struct xfrm_tmpl *tmpl,
537                   xfrm_address_t *daddr, xfrm_address_t *saddr,
538                   unsigned short family)
539 {
540         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
541         if (!afinfo)
542                 return -1;
543         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
544         xfrm_state_put_afinfo(afinfo);
545         return 0;
546 }
547
548 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
549 {
550         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
551         struct xfrm_state *x;
552         struct hlist_node *entry;
553
554         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
555                 if (x->props.family != family ||
556                     x->id.spi       != spi ||
557                     x->id.proto     != proto)
558                         continue;
559
560                 switch (family) {
561                 case AF_INET:
562                         if (x->id.daddr.a4 != daddr->a4)
563                                 continue;
564                         break;
565                 case AF_INET6:
566                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
567                                              (struct in6_addr *)
568                                              x->id.daddr.a6))
569                                 continue;
570                         break;
571                 };
572
573                 xfrm_state_hold(x);
574                 return x;
575         }
576
577         return NULL;
578 }
579
580 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
581 {
582         unsigned int h = xfrm_src_hash(saddr, family);
583         struct xfrm_state *x;
584         struct hlist_node *entry;
585
586         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
587                 if (x->props.family != family ||
588                     x->id.proto     != proto)
589                         continue;
590
591                 switch (family) {
592                 case AF_INET:
593                         if (x->id.daddr.a4 != daddr->a4 ||
594                             x->props.saddr.a4 != saddr->a4)
595                                 continue;
596                         break;
597                 case AF_INET6:
598                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
599                                              (struct in6_addr *)
600                                              x->id.daddr.a6) ||
601                             !ipv6_addr_equal((struct in6_addr *)saddr,
602                                              (struct in6_addr *)
603                                              x->props.saddr.a6))
604                                 continue;
605                         break;
606                 };
607
608                 xfrm_state_hold(x);
609                 return x;
610         }
611
612         return NULL;
613 }
614
615 static inline struct xfrm_state *
616 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
617 {
618         if (use_spi)
619                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
620                                            x->id.proto, family);
621         else
622                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
623                                                   &x->props.saddr,
624                                                   x->id.proto, family);
625 }
626
627 struct xfrm_state *
628 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
629                 struct flowi *fl, struct xfrm_tmpl *tmpl,
630                 struct xfrm_policy *pol, int *err,
631                 unsigned short family)
632 {
633         unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
634         struct hlist_node *entry;
635         struct xfrm_state *x, *x0;
636         int acquire_in_progress = 0;
637         int error = 0;
638         struct xfrm_state *best = NULL;
639         
640         spin_lock_bh(&xfrm_state_lock);
641         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
642                 if (x->props.family == family &&
643                     x->props.reqid == tmpl->reqid &&
644                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
645                     xfrm_state_addr_check(x, daddr, saddr, family) &&
646                     tmpl->mode == x->props.mode &&
647                     tmpl->id.proto == x->id.proto &&
648                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
649                         /* Resolution logic:
650                            1. There is a valid state with matching selector.
651                               Done.
652                            2. Valid state with inappropriate selector. Skip.
653
654                            Entering area of "sysdeps".
655
656                            3. If state is not valid, selector is temporary,
657                               it selects only session which triggered
658                               previous resolution. Key manager will do
659                               something to install a state with proper
660                               selector.
661                          */
662                         if (x->km.state == XFRM_STATE_VALID) {
663                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
664                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
665                                         continue;
666                                 if (!best ||
667                                     best->km.dying > x->km.dying ||
668                                     (best->km.dying == x->km.dying &&
669                                      best->curlft.add_time < x->curlft.add_time))
670                                         best = x;
671                         } else if (x->km.state == XFRM_STATE_ACQ) {
672                                 acquire_in_progress = 1;
673                         } else if (x->km.state == XFRM_STATE_ERROR ||
674                                    x->km.state == XFRM_STATE_EXPIRED) {
675                                 if (xfrm_selector_match(&x->sel, fl, family) &&
676                                     security_xfrm_state_pol_flow_match(x, pol, fl))
677                                         error = -ESRCH;
678                         }
679                 }
680         }
681
682         x = best;
683         if (!x && !error && !acquire_in_progress) {
684                 if (tmpl->id.spi &&
685                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
686                                               tmpl->id.proto, family)) != NULL) {
687                         xfrm_state_put(x0);
688                         error = -EEXIST;
689                         goto out;
690                 }
691                 x = xfrm_state_alloc();
692                 if (x == NULL) {
693                         error = -ENOMEM;
694                         goto out;
695                 }
696                 /* Initialize temporary selector matching only
697                  * to current session. */
698                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
699
700                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
701                 if (error) {
702                         x->km.state = XFRM_STATE_DEAD;
703                         xfrm_state_put(x);
704                         x = NULL;
705                         goto out;
706                 }
707
708                 if (km_query(x, tmpl, pol) == 0) {
709                         x->km.state = XFRM_STATE_ACQ;
710                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
711                         xfrm_state_hold(x);
712                         h = xfrm_src_hash(saddr, family);
713                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
714                         xfrm_state_hold(x);
715                         if (x->id.spi) {
716                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
717                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
718                                 xfrm_state_hold(x);
719                         }
720                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
721                         xfrm_state_hold(x);
722                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
723                         add_timer(&x->timer);
724                 } else {
725                         x->km.state = XFRM_STATE_DEAD;
726                         xfrm_state_put(x);
727                         x = NULL;
728                         error = -ESRCH;
729                 }
730         }
731 out:
732         if (x)
733                 xfrm_state_hold(x);
734         else
735                 *err = acquire_in_progress ? -EAGAIN : error;
736         spin_unlock_bh(&xfrm_state_lock);
737         return x;
738 }
739
740 static void __xfrm_state_insert(struct xfrm_state *x)
741 {
742         unsigned int h;
743
744         x->genid = ++xfrm_state_genid;
745
746         h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
747         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
748         xfrm_state_hold(x);
749
750         h = xfrm_src_hash(&x->props.saddr, x->props.family);
751         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
752         xfrm_state_hold(x);
753
754         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
755                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
756                                   x->props.family);
757
758                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
759                 xfrm_state_hold(x);
760         }
761
762         if (!mod_timer(&x->timer, jiffies + HZ))
763                 xfrm_state_hold(x);
764
765         if (x->replay_maxage &&
766             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
767                 xfrm_state_hold(x);
768
769         wake_up(&km_waitq);
770
771         xfrm_state_num++;
772
773         if (x->bydst.next != NULL &&
774             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
775             xfrm_state_num > xfrm_state_hmask)
776                 schedule_work(&xfrm_hash_work);
777 }
778
779 void xfrm_state_insert(struct xfrm_state *x)
780 {
781         spin_lock_bh(&xfrm_state_lock);
782         __xfrm_state_insert(x);
783         spin_unlock_bh(&xfrm_state_lock);
784
785         xfrm_flush_all_bundles();
786 }
787 EXPORT_SYMBOL(xfrm_state_insert);
788
789 /* xfrm_state_lock is held */
790 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
791 {
792         unsigned int h = xfrm_dst_hash(daddr, reqid, family);
793         struct hlist_node *entry;
794         struct xfrm_state *x;
795
796         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
797                 if (x->props.reqid  != reqid ||
798                     x->props.mode   != mode ||
799                     x->props.family != family ||
800                     x->km.state     != XFRM_STATE_ACQ ||
801                     x->id.spi       != 0)
802                         continue;
803
804                 switch (family) {
805                 case AF_INET:
806                         if (x->id.daddr.a4    != daddr->a4 ||
807                             x->props.saddr.a4 != saddr->a4)
808                                 continue;
809                         break;
810                 case AF_INET6:
811                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
812                                              (struct in6_addr *)daddr) ||
813                             !ipv6_addr_equal((struct in6_addr *)
814                                              x->props.saddr.a6,
815                                              (struct in6_addr *)saddr))
816                                 continue;
817                         break;
818                 };
819
820                 xfrm_state_hold(x);
821                 return x;
822         }
823
824         if (!create)
825                 return NULL;
826
827         x = xfrm_state_alloc();
828         if (likely(x)) {
829                 switch (family) {
830                 case AF_INET:
831                         x->sel.daddr.a4 = daddr->a4;
832                         x->sel.saddr.a4 = saddr->a4;
833                         x->sel.prefixlen_d = 32;
834                         x->sel.prefixlen_s = 32;
835                         x->props.saddr.a4 = saddr->a4;
836                         x->id.daddr.a4 = daddr->a4;
837                         break;
838
839                 case AF_INET6:
840                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
841                                        (struct in6_addr *)daddr);
842                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
843                                        (struct in6_addr *)saddr);
844                         x->sel.prefixlen_d = 128;
845                         x->sel.prefixlen_s = 128;
846                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
847                                        (struct in6_addr *)saddr);
848                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
849                                        (struct in6_addr *)daddr);
850                         break;
851                 };
852
853                 x->km.state = XFRM_STATE_ACQ;
854                 x->id.proto = proto;
855                 x->props.family = family;
856                 x->props.mode = mode;
857                 x->props.reqid = reqid;
858                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
859                 xfrm_state_hold(x);
860                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
861                 add_timer(&x->timer);
862                 xfrm_state_hold(x);
863                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
864                 h = xfrm_src_hash(saddr, family);
865                 xfrm_state_hold(x);
866                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
867                 wake_up(&km_waitq);
868         }
869
870         return x;
871 }
872
873 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
874
875 int xfrm_state_add(struct xfrm_state *x)
876 {
877         struct xfrm_state *x1;
878         int family;
879         int err;
880         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
881
882         family = x->props.family;
883
884         spin_lock_bh(&xfrm_state_lock);
885
886         x1 = __xfrm_state_locate(x, use_spi, family);
887         if (x1) {
888                 xfrm_state_put(x1);
889                 x1 = NULL;
890                 err = -EEXIST;
891                 goto out;
892         }
893
894         if (use_spi && x->km.seq) {
895                 x1 = __xfrm_find_acq_byseq(x->km.seq);
896                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
897                         xfrm_state_put(x1);
898                         x1 = NULL;
899                 }
900         }
901
902         if (use_spi && !x1)
903                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
904                                      x->id.proto,
905                                      &x->id.daddr, &x->props.saddr, 0);
906
907         __xfrm_state_insert(x);
908         err = 0;
909
910 out:
911         spin_unlock_bh(&xfrm_state_lock);
912
913         if (!err)
914                 xfrm_flush_all_bundles();
915
916         if (x1) {
917                 xfrm_state_delete(x1);
918                 xfrm_state_put(x1);
919         }
920
921         return err;
922 }
923 EXPORT_SYMBOL(xfrm_state_add);
924
925 int xfrm_state_update(struct xfrm_state *x)
926 {
927         struct xfrm_state *x1;
928         int err;
929         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
930
931         spin_lock_bh(&xfrm_state_lock);
932         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
933
934         err = -ESRCH;
935         if (!x1)
936                 goto out;
937
938         if (xfrm_state_kern(x1)) {
939                 xfrm_state_put(x1);
940                 err = -EEXIST;
941                 goto out;
942         }
943
944         if (x1->km.state == XFRM_STATE_ACQ) {
945                 __xfrm_state_insert(x);
946                 x = NULL;
947         }
948         err = 0;
949
950 out:
951         spin_unlock_bh(&xfrm_state_lock);
952
953         if (err)
954                 return err;
955
956         if (!x) {
957                 xfrm_state_delete(x1);
958                 xfrm_state_put(x1);
959                 return 0;
960         }
961
962         err = -EINVAL;
963         spin_lock_bh(&x1->lock);
964         if (likely(x1->km.state == XFRM_STATE_VALID)) {
965                 if (x->encap && x1->encap)
966                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
967                 if (x->coaddr && x1->coaddr) {
968                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
969                 }
970                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
971                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
972                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
973                 x1->km.dying = 0;
974
975                 if (!mod_timer(&x1->timer, jiffies + HZ))
976                         xfrm_state_hold(x1);
977                 if (x1->curlft.use_time)
978                         xfrm_state_check_expire(x1);
979
980                 err = 0;
981         }
982         spin_unlock_bh(&x1->lock);
983
984         xfrm_state_put(x1);
985
986         return err;
987 }
988 EXPORT_SYMBOL(xfrm_state_update);
989
990 int xfrm_state_check_expire(struct xfrm_state *x)
991 {
992         if (!x->curlft.use_time)
993                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
994
995         if (x->km.state != XFRM_STATE_VALID)
996                 return -EINVAL;
997
998         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
999             x->curlft.packets >= x->lft.hard_packet_limit) {
1000                 x->km.state = XFRM_STATE_EXPIRED;
1001                 if (!mod_timer(&x->timer, jiffies))
1002                         xfrm_state_hold(x);
1003                 return -EINVAL;
1004         }
1005
1006         if (!x->km.dying &&
1007             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1008              x->curlft.packets >= x->lft.soft_packet_limit)) {
1009                 x->km.dying = 1;
1010                 km_state_expired(x, 0, 0);
1011         }
1012         return 0;
1013 }
1014 EXPORT_SYMBOL(xfrm_state_check_expire);
1015
1016 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1017 {
1018         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1019                 - skb_headroom(skb);
1020
1021         if (nhead > 0)
1022                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1023
1024         /* Check tail too... */
1025         return 0;
1026 }
1027
1028 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1029 {
1030         int err = xfrm_state_check_expire(x);
1031         if (err < 0)
1032                 goto err;
1033         err = xfrm_state_check_space(x, skb);
1034 err:
1035         return err;
1036 }
1037 EXPORT_SYMBOL(xfrm_state_check);
1038
1039 struct xfrm_state *
1040 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1041                   unsigned short family)
1042 {
1043         struct xfrm_state *x;
1044
1045         spin_lock_bh(&xfrm_state_lock);
1046         x = __xfrm_state_lookup(daddr, spi, proto, family);
1047         spin_unlock_bh(&xfrm_state_lock);
1048         return x;
1049 }
1050 EXPORT_SYMBOL(xfrm_state_lookup);
1051
1052 struct xfrm_state *
1053 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1054                          u8 proto, unsigned short family)
1055 {
1056         struct xfrm_state *x;
1057
1058         spin_lock_bh(&xfrm_state_lock);
1059         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1060         spin_unlock_bh(&xfrm_state_lock);
1061         return x;
1062 }
1063 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1064
1065 struct xfrm_state *
1066 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1067               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1068               int create, unsigned short family)
1069 {
1070         struct xfrm_state *x;
1071
1072         spin_lock_bh(&xfrm_state_lock);
1073         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1074         spin_unlock_bh(&xfrm_state_lock);
1075
1076         return x;
1077 }
1078 EXPORT_SYMBOL(xfrm_find_acq);
1079
1080 #ifdef CONFIG_XFRM_SUB_POLICY
1081 int
1082 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1083                unsigned short family)
1084 {
1085         int err = 0;
1086         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1087         if (!afinfo)
1088                 return -EAFNOSUPPORT;
1089
1090         spin_lock_bh(&xfrm_state_lock);
1091         if (afinfo->tmpl_sort)
1092                 err = afinfo->tmpl_sort(dst, src, n);
1093         spin_unlock_bh(&xfrm_state_lock);
1094         xfrm_state_put_afinfo(afinfo);
1095         return err;
1096 }
1097 EXPORT_SYMBOL(xfrm_tmpl_sort);
1098
1099 int
1100 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1101                 unsigned short family)
1102 {
1103         int err = 0;
1104         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1105         if (!afinfo)
1106                 return -EAFNOSUPPORT;
1107
1108         spin_lock_bh(&xfrm_state_lock);
1109         if (afinfo->state_sort)
1110                 err = afinfo->state_sort(dst, src, n);
1111         spin_unlock_bh(&xfrm_state_lock);
1112         xfrm_state_put_afinfo(afinfo);
1113         return err;
1114 }
1115 EXPORT_SYMBOL(xfrm_state_sort);
1116 #endif
1117
1118 /* Silly enough, but I'm lazy to build resolution list */
1119
1120 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1121 {
1122         int i;
1123
1124         for (i = 0; i <= xfrm_state_hmask; i++) {
1125                 struct hlist_node *entry;
1126                 struct xfrm_state *x;
1127
1128                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1129                         if (x->km.seq == seq &&
1130                             x->km.state == XFRM_STATE_ACQ) {
1131                                 xfrm_state_hold(x);
1132                                 return x;
1133                         }
1134                 }
1135         }
1136         return NULL;
1137 }
1138
1139 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1140 {
1141         struct xfrm_state *x;
1142
1143         spin_lock_bh(&xfrm_state_lock);
1144         x = __xfrm_find_acq_byseq(seq);
1145         spin_unlock_bh(&xfrm_state_lock);
1146         return x;
1147 }
1148 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1149
1150 u32 xfrm_get_acqseq(void)
1151 {
1152         u32 res;
1153         static u32 acqseq;
1154         static DEFINE_SPINLOCK(acqseq_lock);
1155
1156         spin_lock_bh(&acqseq_lock);
1157         res = (++acqseq ? : ++acqseq);
1158         spin_unlock_bh(&acqseq_lock);
1159         return res;
1160 }
1161 EXPORT_SYMBOL(xfrm_get_acqseq);
1162
1163 void
1164 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1165 {
1166         unsigned int h;
1167         struct xfrm_state *x0;
1168
1169         if (x->id.spi)
1170                 return;
1171
1172         if (minspi == maxspi) {
1173                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1174                 if (x0) {
1175                         xfrm_state_put(x0);
1176                         return;
1177                 }
1178                 x->id.spi = minspi;
1179         } else {
1180                 u32 spi = 0;
1181                 minspi = ntohl(minspi);
1182                 maxspi = ntohl(maxspi);
1183                 for (h=0; h<maxspi-minspi+1; h++) {
1184                         spi = minspi + net_random()%(maxspi-minspi+1);
1185                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1186                         if (x0 == NULL) {
1187                                 x->id.spi = htonl(spi);
1188                                 break;
1189                         }
1190                         xfrm_state_put(x0);
1191                 }
1192         }
1193         if (x->id.spi) {
1194                 spin_lock_bh(&xfrm_state_lock);
1195                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1196                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1197                 xfrm_state_hold(x);
1198                 spin_unlock_bh(&xfrm_state_lock);
1199                 wake_up(&km_waitq);
1200         }
1201 }
1202 EXPORT_SYMBOL(xfrm_alloc_spi);
1203
1204 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1205                     void *data)
1206 {
1207         int i;
1208         struct xfrm_state *x;
1209         struct hlist_node *entry;
1210         int count = 0;
1211         int err = 0;
1212
1213         spin_lock_bh(&xfrm_state_lock);
1214         for (i = 0; i <= xfrm_state_hmask; i++) {
1215                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1216                         if (xfrm_id_proto_match(x->id.proto, proto))
1217                                 count++;
1218                 }
1219         }
1220         if (count == 0) {
1221                 err = -ENOENT;
1222                 goto out;
1223         }
1224
1225         for (i = 0; i <= xfrm_state_hmask; i++) {
1226                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1227                         if (!xfrm_id_proto_match(x->id.proto, proto))
1228                                 continue;
1229                         err = func(x, --count, data);
1230                         if (err)
1231                                 goto out;
1232                 }
1233         }
1234 out:
1235         spin_unlock_bh(&xfrm_state_lock);
1236         return err;
1237 }
1238 EXPORT_SYMBOL(xfrm_state_walk);
1239
1240
1241 void xfrm_replay_notify(struct xfrm_state *x, int event)
1242 {
1243         struct km_event c;
1244         /* we send notify messages in case
1245          *  1. we updated on of the sequence numbers, and the seqno difference
1246          *     is at least x->replay_maxdiff, in this case we also update the
1247          *     timeout of our timer function
1248          *  2. if x->replay_maxage has elapsed since last update,
1249          *     and there were changes
1250          *
1251          *  The state structure must be locked!
1252          */
1253
1254         switch (event) {
1255         case XFRM_REPLAY_UPDATE:
1256                 if (x->replay_maxdiff &&
1257                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1258                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1259                         if (x->xflags & XFRM_TIME_DEFER)
1260                                 event = XFRM_REPLAY_TIMEOUT;
1261                         else
1262                                 return;
1263                 }
1264
1265                 break;
1266
1267         case XFRM_REPLAY_TIMEOUT:
1268                 if ((x->replay.seq == x->preplay.seq) &&
1269                     (x->replay.bitmap == x->preplay.bitmap) &&
1270                     (x->replay.oseq == x->preplay.oseq)) {
1271                         x->xflags |= XFRM_TIME_DEFER;
1272                         return;
1273                 }
1274
1275                 break;
1276         }
1277
1278         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1279         c.event = XFRM_MSG_NEWAE;
1280         c.data.aevent = event;
1281         km_state_notify(x, &c);
1282
1283         if (x->replay_maxage &&
1284             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1285                 xfrm_state_hold(x);
1286                 x->xflags &= ~XFRM_TIME_DEFER;
1287         }
1288 }
1289 EXPORT_SYMBOL(xfrm_replay_notify);
1290
1291 static void xfrm_replay_timer_handler(unsigned long data)
1292 {
1293         struct xfrm_state *x = (struct xfrm_state*)data;
1294
1295         spin_lock(&x->lock);
1296
1297         if (x->km.state == XFRM_STATE_VALID) {
1298                 if (xfrm_aevent_is_on())
1299                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1300                 else
1301                         x->xflags |= XFRM_TIME_DEFER;
1302         }
1303
1304         spin_unlock(&x->lock);
1305         xfrm_state_put(x);
1306 }
1307
1308 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1309 {
1310         u32 diff;
1311
1312         seq = ntohl(seq);
1313
1314         if (unlikely(seq == 0))
1315                 return -EINVAL;
1316
1317         if (likely(seq > x->replay.seq))
1318                 return 0;
1319
1320         diff = x->replay.seq - seq;
1321         if (diff >= x->props.replay_window) {
1322                 x->stats.replay_window++;
1323                 return -EINVAL;
1324         }
1325
1326         if (x->replay.bitmap & (1U << diff)) {
1327                 x->stats.replay++;
1328                 return -EINVAL;
1329         }
1330         return 0;
1331 }
1332 EXPORT_SYMBOL(xfrm_replay_check);
1333
1334 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1335 {
1336         u32 diff;
1337
1338         seq = ntohl(seq);
1339
1340         if (seq > x->replay.seq) {
1341                 diff = seq - x->replay.seq;
1342                 if (diff < x->props.replay_window)
1343                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1344                 else
1345                         x->replay.bitmap = 1;
1346                 x->replay.seq = seq;
1347         } else {
1348                 diff = x->replay.seq - seq;
1349                 x->replay.bitmap |= (1U << diff);
1350         }
1351
1352         if (xfrm_aevent_is_on())
1353                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1354 }
1355 EXPORT_SYMBOL(xfrm_replay_advance);
1356
1357 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1358 static DEFINE_RWLOCK(xfrm_km_lock);
1359
1360 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1361 {
1362         struct xfrm_mgr *km;
1363
1364         read_lock(&xfrm_km_lock);
1365         list_for_each_entry(km, &xfrm_km_list, list)
1366                 if (km->notify_policy)
1367                         km->notify_policy(xp, dir, c);
1368         read_unlock(&xfrm_km_lock);
1369 }
1370
1371 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1372 {
1373         struct xfrm_mgr *km;
1374         read_lock(&xfrm_km_lock);
1375         list_for_each_entry(km, &xfrm_km_list, list)
1376                 if (km->notify)
1377                         km->notify(x, c);
1378         read_unlock(&xfrm_km_lock);
1379 }
1380
1381 EXPORT_SYMBOL(km_policy_notify);
1382 EXPORT_SYMBOL(km_state_notify);
1383
1384 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1385 {
1386         struct km_event c;
1387
1388         c.data.hard = hard;
1389         c.pid = pid;
1390         c.event = XFRM_MSG_EXPIRE;
1391         km_state_notify(x, &c);
1392
1393         if (hard)
1394                 wake_up(&km_waitq);
1395 }
1396
1397 EXPORT_SYMBOL(km_state_expired);
1398 /*
1399  * We send to all registered managers regardless of failure
1400  * We are happy with one success
1401 */
1402 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1403 {
1404         int err = -EINVAL, acqret;
1405         struct xfrm_mgr *km;
1406
1407         read_lock(&xfrm_km_lock);
1408         list_for_each_entry(km, &xfrm_km_list, list) {
1409                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1410                 if (!acqret)
1411                         err = acqret;
1412         }
1413         read_unlock(&xfrm_km_lock);
1414         return err;
1415 }
1416 EXPORT_SYMBOL(km_query);
1417
1418 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1419 {
1420         int err = -EINVAL;
1421         struct xfrm_mgr *km;
1422
1423         read_lock(&xfrm_km_lock);
1424         list_for_each_entry(km, &xfrm_km_list, list) {
1425                 if (km->new_mapping)
1426                         err = km->new_mapping(x, ipaddr, sport);
1427                 if (!err)
1428                         break;
1429         }
1430         read_unlock(&xfrm_km_lock);
1431         return err;
1432 }
1433 EXPORT_SYMBOL(km_new_mapping);
1434
1435 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1436 {
1437         struct km_event c;
1438
1439         c.data.hard = hard;
1440         c.pid = pid;
1441         c.event = XFRM_MSG_POLEXPIRE;
1442         km_policy_notify(pol, dir, &c);
1443
1444         if (hard)
1445                 wake_up(&km_waitq);
1446 }
1447 EXPORT_SYMBOL(km_policy_expired);
1448
1449 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1450 {
1451         int err = -EINVAL;
1452         int ret;
1453         struct xfrm_mgr *km;
1454
1455         read_lock(&xfrm_km_lock);
1456         list_for_each_entry(km, &xfrm_km_list, list) {
1457                 if (km->report) {
1458                         ret = km->report(proto, sel, addr);
1459                         if (!ret)
1460                                 err = ret;
1461                 }
1462         }
1463         read_unlock(&xfrm_km_lock);
1464         return err;
1465 }
1466 EXPORT_SYMBOL(km_report);
1467
1468 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1469 {
1470         int err;
1471         u8 *data;
1472         struct xfrm_mgr *km;
1473         struct xfrm_policy *pol = NULL;
1474
1475         if (optlen <= 0 || optlen > PAGE_SIZE)
1476                 return -EMSGSIZE;
1477
1478         data = kmalloc(optlen, GFP_KERNEL);
1479         if (!data)
1480                 return -ENOMEM;
1481
1482         err = -EFAULT;
1483         if (copy_from_user(data, optval, optlen))
1484                 goto out;
1485
1486         err = -EINVAL;
1487         read_lock(&xfrm_km_lock);
1488         list_for_each_entry(km, &xfrm_km_list, list) {
1489                 pol = km->compile_policy(sk, optname, data,
1490                                          optlen, &err);
1491                 if (err >= 0)
1492                         break;
1493         }
1494         read_unlock(&xfrm_km_lock);
1495
1496         if (err >= 0) {
1497                 xfrm_sk_policy_insert(sk, err, pol);
1498                 xfrm_pol_put(pol);
1499                 err = 0;
1500         }
1501
1502 out:
1503         kfree(data);
1504         return err;
1505 }
1506 EXPORT_SYMBOL(xfrm_user_policy);
1507
1508 int xfrm_register_km(struct xfrm_mgr *km)
1509 {
1510         write_lock_bh(&xfrm_km_lock);
1511         list_add_tail(&km->list, &xfrm_km_list);
1512         write_unlock_bh(&xfrm_km_lock);
1513         return 0;
1514 }
1515 EXPORT_SYMBOL(xfrm_register_km);
1516
1517 int xfrm_unregister_km(struct xfrm_mgr *km)
1518 {
1519         write_lock_bh(&xfrm_km_lock);
1520         list_del(&km->list);
1521         write_unlock_bh(&xfrm_km_lock);
1522         return 0;
1523 }
1524 EXPORT_SYMBOL(xfrm_unregister_km);
1525
1526 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1527 {
1528         int err = 0;
1529         if (unlikely(afinfo == NULL))
1530                 return -EINVAL;
1531         if (unlikely(afinfo->family >= NPROTO))
1532                 return -EAFNOSUPPORT;
1533         write_lock_bh(&xfrm_state_afinfo_lock);
1534         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1535                 err = -ENOBUFS;
1536         else
1537                 xfrm_state_afinfo[afinfo->family] = afinfo;
1538         write_unlock_bh(&xfrm_state_afinfo_lock);
1539         return err;
1540 }
1541 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1542
1543 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1544 {
1545         int err = 0;
1546         if (unlikely(afinfo == NULL))
1547                 return -EINVAL;
1548         if (unlikely(afinfo->family >= NPROTO))
1549                 return -EAFNOSUPPORT;
1550         write_lock_bh(&xfrm_state_afinfo_lock);
1551         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1552                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1553                         err = -EINVAL;
1554                 else
1555                         xfrm_state_afinfo[afinfo->family] = NULL;
1556         }
1557         write_unlock_bh(&xfrm_state_afinfo_lock);
1558         return err;
1559 }
1560 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1561
1562 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1563 {
1564         struct xfrm_state_afinfo *afinfo;
1565         if (unlikely(family >= NPROTO))
1566                 return NULL;
1567         read_lock(&xfrm_state_afinfo_lock);
1568         afinfo = xfrm_state_afinfo[family];
1569         if (unlikely(!afinfo))
1570                 read_unlock(&xfrm_state_afinfo_lock);
1571         return afinfo;
1572 }
1573
1574 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1575 {
1576         read_unlock(&xfrm_state_afinfo_lock);
1577 }
1578
1579 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1580 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1581 {
1582         if (x->tunnel) {
1583                 struct xfrm_state *t = x->tunnel;
1584
1585                 if (atomic_read(&t->tunnel_users) == 2)
1586                         xfrm_state_delete(t);
1587                 atomic_dec(&t->tunnel_users);
1588                 xfrm_state_put(t);
1589                 x->tunnel = NULL;
1590         }
1591 }
1592 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1593
1594 /*
1595  * This function is NOT optimal.  For example, with ESP it will give an
1596  * MTU that's usually two bytes short of being optimal.  However, it will
1597  * usually give an answer that's a multiple of 4 provided the input is
1598  * also a multiple of 4.
1599  */
1600 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1601 {
1602         int res = mtu;
1603
1604         res -= x->props.header_len;
1605
1606         for (;;) {
1607                 int m = res;
1608
1609                 if (m < 68)
1610                         return 68;
1611
1612                 spin_lock_bh(&x->lock);
1613                 if (x->km.state == XFRM_STATE_VALID &&
1614                     x->type && x->type->get_max_size)
1615                         m = x->type->get_max_size(x, m);
1616                 else
1617                         m += x->props.header_len;
1618                 spin_unlock_bh(&x->lock);
1619
1620                 if (m <= mtu)
1621                         break;
1622                 res -= (m - mtu);
1623         }
1624
1625         return res;
1626 }
1627
1628 int xfrm_init_state(struct xfrm_state *x)
1629 {
1630         struct xfrm_state_afinfo *afinfo;
1631         int family = x->props.family;
1632         int err;
1633
1634         err = -EAFNOSUPPORT;
1635         afinfo = xfrm_state_get_afinfo(family);
1636         if (!afinfo)
1637                 goto error;
1638
1639         err = 0;
1640         if (afinfo->init_flags)
1641                 err = afinfo->init_flags(x);
1642
1643         xfrm_state_put_afinfo(afinfo);
1644
1645         if (err)
1646                 goto error;
1647
1648         err = -EPROTONOSUPPORT;
1649         x->type = xfrm_get_type(x->id.proto, family);
1650         if (x->type == NULL)
1651                 goto error;
1652
1653         err = x->type->init_state(x);
1654         if (err)
1655                 goto error;
1656
1657         x->mode = xfrm_get_mode(x->props.mode, family);
1658         if (x->mode == NULL)
1659                 goto error;
1660
1661         x->km.state = XFRM_STATE_VALID;
1662
1663 error:
1664         return err;
1665 }
1666
1667 EXPORT_SYMBOL(xfrm_init_state);
1668  
1669 void __init xfrm_state_init(void)
1670 {
1671         unsigned int sz;
1672
1673         sz = sizeof(struct hlist_head) * 8;
1674
1675         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1676         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1677         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1678         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1679                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1680         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1681
1682         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1683 }
1684