]> Pileus Git - ~andy/linux/blob - net/xfrm/xfrm_state.c
[XFRM]: Simplify xfrm_spi_hash
[~andy/linux] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
59 {
60         return ntohl(addr->a4);
61 }
62
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
64 {
65         return ntohl(addr->a6[2]^addr->a6[3]);
66 }
67
68 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
69                                            u32 reqid, unsigned short family,
70                                            unsigned int hmask)
71 {
72         unsigned int h = family ^ reqid;
73         switch (family) {
74         case AF_INET:
75                 h ^= __xfrm4_addr_hash(addr);
76                 break;
77         case AF_INET6:
78                 h ^= __xfrm6_addr_hash(addr);
79                 break;
80         };
81         return (h ^ (h >> 16)) & hmask;
82 }
83
84 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
85                                          unsigned short family)
86 {
87         return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
88 }
89
90 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
91                                        unsigned int hmask)
92 {
93         unsigned int h = family;
94         switch (family) {
95         case AF_INET:
96                 h ^= __xfrm4_addr_hash(addr);
97                 break;
98         case AF_INET6:
99                 h ^= __xfrm6_addr_hash(addr);
100                 break;
101         };
102         return (h ^ (h >> 16)) & hmask;
103 }
104
105 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
106 {
107         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
108 }
109
110 static inline unsigned int
111 __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
112                 unsigned int hmask)
113 {
114         unsigned int h = spi ^ proto;
115         switch (family) {
116         case AF_INET:
117                 h ^= __xfrm4_addr_hash(addr);
118                 break;
119         case AF_INET6:
120                 h ^= __xfrm6_addr_hash(addr);
121                 break;
122         }
123         return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
124 }
125
126 static inline unsigned int
127 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
128 {
129         return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
130 }
131
132 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
133 {
134         struct hlist_head *n;
135
136         if (sz <= PAGE_SIZE)
137                 n = kmalloc(sz, GFP_KERNEL);
138         else if (hashdist)
139                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
140         else
141                 n = (struct hlist_head *)
142                         __get_free_pages(GFP_KERNEL, get_order(sz));
143
144         if (n)
145                 memset(n, 0, sz);
146
147         return n;
148 }
149
150 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
151 {
152         if (sz <= PAGE_SIZE)
153                 kfree(n);
154         else if (hashdist)
155                 vfree(n);
156         else
157                 free_pages((unsigned long)n, get_order(sz));
158 }
159
160 static void xfrm_hash_transfer(struct hlist_head *list,
161                                struct hlist_head *ndsttable,
162                                struct hlist_head *nsrctable,
163                                struct hlist_head *nspitable,
164                                unsigned int nhashmask)
165 {
166         struct hlist_node *entry, *tmp;
167         struct xfrm_state *x;
168
169         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
170                 unsigned int h;
171
172                 h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
173                                     x->props.family, nhashmask);
174                 hlist_add_head(&x->bydst, ndsttable+h);
175
176                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
177                                     nhashmask);
178                 hlist_add_head(&x->bysrc, nsrctable+h);
179
180                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
181                                     x->props.family, nhashmask);
182                 hlist_add_head(&x->byspi, nspitable+h);
183         }
184 }
185
186 static unsigned long xfrm_hash_new_size(void)
187 {
188         return ((xfrm_state_hmask + 1) << 1) *
189                 sizeof(struct hlist_head);
190 }
191
192 static DEFINE_MUTEX(hash_resize_mutex);
193
194 static void xfrm_hash_resize(void *__unused)
195 {
196         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
197         unsigned long nsize, osize;
198         unsigned int nhashmask, ohashmask;
199         int i;
200
201         mutex_lock(&hash_resize_mutex);
202
203         nsize = xfrm_hash_new_size();
204         ndst = xfrm_state_hash_alloc(nsize);
205         if (!ndst)
206                 goto out_unlock;
207         nsrc = xfrm_state_hash_alloc(nsize);
208         if (!nsrc) {
209                 xfrm_state_hash_free(ndst, nsize);
210                 goto out_unlock;
211         }
212         nspi = xfrm_state_hash_alloc(nsize);
213         if (!nspi) {
214                 xfrm_state_hash_free(ndst, nsize);
215                 xfrm_state_hash_free(nsrc, nsize);
216                 goto out_unlock;
217         }
218
219         spin_lock_bh(&xfrm_state_lock);
220
221         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
222         for (i = xfrm_state_hmask; i >= 0; i--)
223                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
224                                    nhashmask);
225
226         odst = xfrm_state_bydst;
227         osrc = xfrm_state_bysrc;
228         ospi = xfrm_state_byspi;
229         ohashmask = xfrm_state_hmask;
230
231         xfrm_state_bydst = ndst;
232         xfrm_state_bysrc = nsrc;
233         xfrm_state_byspi = nspi;
234         xfrm_state_hmask = nhashmask;
235
236         spin_unlock_bh(&xfrm_state_lock);
237
238         osize = (ohashmask + 1) * sizeof(struct hlist_head);
239         xfrm_state_hash_free(odst, osize);
240         xfrm_state_hash_free(osrc, osize);
241         xfrm_state_hash_free(ospi, osize);
242
243 out_unlock:
244         mutex_unlock(&hash_resize_mutex);
245 }
246
247 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
248
249 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
250 EXPORT_SYMBOL(km_waitq);
251
252 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
253 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
254
255 static struct work_struct xfrm_state_gc_work;
256 static HLIST_HEAD(xfrm_state_gc_list);
257 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
258
259 static int xfrm_state_gc_flush_bundles;
260
261 int __xfrm_state_delete(struct xfrm_state *x);
262
263 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
264 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
265
266 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
267 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
268
269 static void xfrm_state_gc_destroy(struct xfrm_state *x)
270 {
271         if (del_timer(&x->timer))
272                 BUG();
273         if (del_timer(&x->rtimer))
274                 BUG();
275         kfree(x->aalg);
276         kfree(x->ealg);
277         kfree(x->calg);
278         kfree(x->encap);
279         kfree(x->coaddr);
280         if (x->mode)
281                 xfrm_put_mode(x->mode);
282         if (x->type) {
283                 x->type->destructor(x);
284                 xfrm_put_type(x->type);
285         }
286         security_xfrm_state_free(x);
287         kfree(x);
288 }
289
290 static void xfrm_state_gc_task(void *data)
291 {
292         struct xfrm_state *x;
293         struct hlist_node *entry, *tmp;
294         struct hlist_head gc_list;
295
296         if (xfrm_state_gc_flush_bundles) {
297                 xfrm_state_gc_flush_bundles = 0;
298                 xfrm_flush_bundles();
299         }
300
301         spin_lock_bh(&xfrm_state_gc_lock);
302         gc_list.first = xfrm_state_gc_list.first;
303         INIT_HLIST_HEAD(&xfrm_state_gc_list);
304         spin_unlock_bh(&xfrm_state_gc_lock);
305
306         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
307                 xfrm_state_gc_destroy(x);
308
309         wake_up(&km_waitq);
310 }
311
312 static inline unsigned long make_jiffies(long secs)
313 {
314         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
315                 return MAX_SCHEDULE_TIMEOUT-1;
316         else
317                 return secs*HZ;
318 }
319
320 static void xfrm_timer_handler(unsigned long data)
321 {
322         struct xfrm_state *x = (struct xfrm_state*)data;
323         unsigned long now = (unsigned long)xtime.tv_sec;
324         long next = LONG_MAX;
325         int warn = 0;
326
327         spin_lock(&x->lock);
328         if (x->km.state == XFRM_STATE_DEAD)
329                 goto out;
330         if (x->km.state == XFRM_STATE_EXPIRED)
331                 goto expired;
332         if (x->lft.hard_add_expires_seconds) {
333                 long tmo = x->lft.hard_add_expires_seconds +
334                         x->curlft.add_time - now;
335                 if (tmo <= 0)
336                         goto expired;
337                 if (tmo < next)
338                         next = tmo;
339         }
340         if (x->lft.hard_use_expires_seconds) {
341                 long tmo = x->lft.hard_use_expires_seconds +
342                         (x->curlft.use_time ? : now) - now;
343                 if (tmo <= 0)
344                         goto expired;
345                 if (tmo < next)
346                         next = tmo;
347         }
348         if (x->km.dying)
349                 goto resched;
350         if (x->lft.soft_add_expires_seconds) {
351                 long tmo = x->lft.soft_add_expires_seconds +
352                         x->curlft.add_time - now;
353                 if (tmo <= 0)
354                         warn = 1;
355                 else if (tmo < next)
356                         next = tmo;
357         }
358         if (x->lft.soft_use_expires_seconds) {
359                 long tmo = x->lft.soft_use_expires_seconds +
360                         (x->curlft.use_time ? : now) - now;
361                 if (tmo <= 0)
362                         warn = 1;
363                 else if (tmo < next)
364                         next = tmo;
365         }
366
367         x->km.dying = warn;
368         if (warn)
369                 km_state_expired(x, 0, 0);
370 resched:
371         if (next != LONG_MAX &&
372             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
373                 xfrm_state_hold(x);
374         goto out;
375
376 expired:
377         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
378                 x->km.state = XFRM_STATE_EXPIRED;
379                 wake_up(&km_waitq);
380                 next = 2;
381                 goto resched;
382         }
383         if (!__xfrm_state_delete(x) && x->id.spi)
384                 km_state_expired(x, 1, 0);
385
386 out:
387         spin_unlock(&x->lock);
388         xfrm_state_put(x);
389 }
390
391 static void xfrm_replay_timer_handler(unsigned long data);
392
393 struct xfrm_state *xfrm_state_alloc(void)
394 {
395         struct xfrm_state *x;
396
397         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
398
399         if (x) {
400                 atomic_set(&x->refcnt, 1);
401                 atomic_set(&x->tunnel_users, 0);
402                 INIT_HLIST_NODE(&x->bydst);
403                 INIT_HLIST_NODE(&x->bysrc);
404                 INIT_HLIST_NODE(&x->byspi);
405                 init_timer(&x->timer);
406                 x->timer.function = xfrm_timer_handler;
407                 x->timer.data     = (unsigned long)x;
408                 init_timer(&x->rtimer);
409                 x->rtimer.function = xfrm_replay_timer_handler;
410                 x->rtimer.data     = (unsigned long)x;
411                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
412                 x->lft.soft_byte_limit = XFRM_INF;
413                 x->lft.soft_packet_limit = XFRM_INF;
414                 x->lft.hard_byte_limit = XFRM_INF;
415                 x->lft.hard_packet_limit = XFRM_INF;
416                 x->replay_maxage = 0;
417                 x->replay_maxdiff = 0;
418                 spin_lock_init(&x->lock);
419         }
420         return x;
421 }
422 EXPORT_SYMBOL(xfrm_state_alloc);
423
424 void __xfrm_state_destroy(struct xfrm_state *x)
425 {
426         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
427
428         spin_lock_bh(&xfrm_state_gc_lock);
429         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
430         spin_unlock_bh(&xfrm_state_gc_lock);
431         schedule_work(&xfrm_state_gc_work);
432 }
433 EXPORT_SYMBOL(__xfrm_state_destroy);
434
435 int __xfrm_state_delete(struct xfrm_state *x)
436 {
437         int err = -ESRCH;
438
439         if (x->km.state != XFRM_STATE_DEAD) {
440                 x->km.state = XFRM_STATE_DEAD;
441                 spin_lock(&xfrm_state_lock);
442                 hlist_del(&x->bydst);
443                 __xfrm_state_put(x);
444                 hlist_del(&x->bysrc);
445                 __xfrm_state_put(x);
446                 if (x->id.spi) {
447                         hlist_del(&x->byspi);
448                         __xfrm_state_put(x);
449                 }
450                 xfrm_state_num--;
451                 spin_unlock(&xfrm_state_lock);
452                 if (del_timer(&x->timer))
453                         __xfrm_state_put(x);
454                 if (del_timer(&x->rtimer))
455                         __xfrm_state_put(x);
456
457                 /* The number two in this test is the reference
458                  * mentioned in the comment below plus the reference
459                  * our caller holds.  A larger value means that
460                  * there are DSTs attached to this xfrm_state.
461                  */
462                 if (atomic_read(&x->refcnt) > 2) {
463                         xfrm_state_gc_flush_bundles = 1;
464                         schedule_work(&xfrm_state_gc_work);
465                 }
466
467                 /* All xfrm_state objects are created by xfrm_state_alloc.
468                  * The xfrm_state_alloc call gives a reference, and that
469                  * is what we are dropping here.
470                  */
471                 __xfrm_state_put(x);
472                 err = 0;
473         }
474
475         return err;
476 }
477 EXPORT_SYMBOL(__xfrm_state_delete);
478
479 int xfrm_state_delete(struct xfrm_state *x)
480 {
481         int err;
482
483         spin_lock_bh(&x->lock);
484         err = __xfrm_state_delete(x);
485         spin_unlock_bh(&x->lock);
486
487         return err;
488 }
489 EXPORT_SYMBOL(xfrm_state_delete);
490
491 void xfrm_state_flush(u8 proto)
492 {
493         int i;
494
495         spin_lock_bh(&xfrm_state_lock);
496         for (i = 0; i < xfrm_state_hmask; i++) {
497                 struct hlist_node *entry;
498                 struct xfrm_state *x;
499 restart:
500                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
501                         if (!xfrm_state_kern(x) &&
502                             xfrm_id_proto_match(x->id.proto, proto)) {
503                                 xfrm_state_hold(x);
504                                 spin_unlock_bh(&xfrm_state_lock);
505
506                                 xfrm_state_delete(x);
507                                 xfrm_state_put(x);
508
509                                 spin_lock_bh(&xfrm_state_lock);
510                                 goto restart;
511                         }
512                 }
513         }
514         spin_unlock_bh(&xfrm_state_lock);
515         wake_up(&km_waitq);
516 }
517 EXPORT_SYMBOL(xfrm_state_flush);
518
519 static int
520 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
521                   struct xfrm_tmpl *tmpl,
522                   xfrm_address_t *daddr, xfrm_address_t *saddr,
523                   unsigned short family)
524 {
525         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
526         if (!afinfo)
527                 return -1;
528         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
529         xfrm_state_put_afinfo(afinfo);
530         return 0;
531 }
532
533 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
534 {
535         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
536         struct xfrm_state *x;
537         struct hlist_node *entry;
538
539         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
540                 if (x->props.family != family ||
541                     x->id.spi       != spi ||
542                     x->id.proto     != proto)
543                         continue;
544
545                 switch (family) {
546                 case AF_INET:
547                         if (x->id.daddr.a4 != daddr->a4)
548                                 continue;
549                         break;
550                 case AF_INET6:
551                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
552                                              (struct in6_addr *)
553                                              x->id.daddr.a6))
554                                 continue;
555                         break;
556                 };
557
558                 xfrm_state_hold(x);
559                 return x;
560         }
561
562         return NULL;
563 }
564
565 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
566 {
567         unsigned int h = xfrm_src_hash(saddr, family);
568         struct xfrm_state *x;
569         struct hlist_node *entry;
570
571         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
572                 if (x->props.family != family ||
573                     x->id.proto     != proto)
574                         continue;
575
576                 switch (family) {
577                 case AF_INET:
578                         if (x->id.daddr.a4 != daddr->a4 ||
579                             x->props.saddr.a4 != saddr->a4)
580                                 continue;
581                         break;
582                 case AF_INET6:
583                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
584                                              (struct in6_addr *)
585                                              x->id.daddr.a6) ||
586                             !ipv6_addr_equal((struct in6_addr *)saddr,
587                                              (struct in6_addr *)
588                                              x->props.saddr.a6))
589                                 continue;
590                         break;
591                 };
592
593                 xfrm_state_hold(x);
594                 return x;
595         }
596
597         return NULL;
598 }
599
600 static inline struct xfrm_state *
601 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
602 {
603         if (use_spi)
604                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
605                                            x->id.proto, family);
606         else
607                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
608                                                   &x->props.saddr,
609                                                   x->id.proto, family);
610 }
611
612 struct xfrm_state *
613 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
614                 struct flowi *fl, struct xfrm_tmpl *tmpl,
615                 struct xfrm_policy *pol, int *err,
616                 unsigned short family)
617 {
618         unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
619         struct hlist_node *entry;
620         struct xfrm_state *x, *x0;
621         int acquire_in_progress = 0;
622         int error = 0;
623         struct xfrm_state *best = NULL;
624         
625         spin_lock_bh(&xfrm_state_lock);
626         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
627                 if (x->props.family == family &&
628                     x->props.reqid == tmpl->reqid &&
629                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
630                     xfrm_state_addr_check(x, daddr, saddr, family) &&
631                     tmpl->mode == x->props.mode &&
632                     tmpl->id.proto == x->id.proto &&
633                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
634                         /* Resolution logic:
635                            1. There is a valid state with matching selector.
636                               Done.
637                            2. Valid state with inappropriate selector. Skip.
638
639                            Entering area of "sysdeps".
640
641                            3. If state is not valid, selector is temporary,
642                               it selects only session which triggered
643                               previous resolution. Key manager will do
644                               something to install a state with proper
645                               selector.
646                          */
647                         if (x->km.state == XFRM_STATE_VALID) {
648                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
649                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
650                                         continue;
651                                 if (!best ||
652                                     best->km.dying > x->km.dying ||
653                                     (best->km.dying == x->km.dying &&
654                                      best->curlft.add_time < x->curlft.add_time))
655                                         best = x;
656                         } else if (x->km.state == XFRM_STATE_ACQ) {
657                                 acquire_in_progress = 1;
658                         } else if (x->km.state == XFRM_STATE_ERROR ||
659                                    x->km.state == XFRM_STATE_EXPIRED) {
660                                 if (xfrm_selector_match(&x->sel, fl, family) &&
661                                     security_xfrm_state_pol_flow_match(x, pol, fl))
662                                         error = -ESRCH;
663                         }
664                 }
665         }
666
667         x = best;
668         if (!x && !error && !acquire_in_progress) {
669                 if (tmpl->id.spi &&
670                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
671                                               tmpl->id.proto, family)) != NULL) {
672                         xfrm_state_put(x0);
673                         error = -EEXIST;
674                         goto out;
675                 }
676                 x = xfrm_state_alloc();
677                 if (x == NULL) {
678                         error = -ENOMEM;
679                         goto out;
680                 }
681                 /* Initialize temporary selector matching only
682                  * to current session. */
683                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
684
685                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
686                 if (error) {
687                         x->km.state = XFRM_STATE_DEAD;
688                         xfrm_state_put(x);
689                         x = NULL;
690                         goto out;
691                 }
692
693                 if (km_query(x, tmpl, pol) == 0) {
694                         x->km.state = XFRM_STATE_ACQ;
695                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
696                         xfrm_state_hold(x);
697                         h = xfrm_src_hash(saddr, family);
698                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
699                         xfrm_state_hold(x);
700                         if (x->id.spi) {
701                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
702                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
703                                 xfrm_state_hold(x);
704                         }
705                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
706                         xfrm_state_hold(x);
707                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
708                         add_timer(&x->timer);
709                 } else {
710                         x->km.state = XFRM_STATE_DEAD;
711                         xfrm_state_put(x);
712                         x = NULL;
713                         error = -ESRCH;
714                 }
715         }
716 out:
717         if (x)
718                 xfrm_state_hold(x);
719         else
720                 *err = acquire_in_progress ? -EAGAIN : error;
721         spin_unlock_bh(&xfrm_state_lock);
722         return x;
723 }
724
725 static void __xfrm_state_insert(struct xfrm_state *x)
726 {
727         unsigned int h;
728
729         x->genid = ++xfrm_state_genid;
730
731         h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
732         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
733         xfrm_state_hold(x);
734
735         h = xfrm_src_hash(&x->props.saddr, x->props.family);
736         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
737         xfrm_state_hold(x);
738
739         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
740                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
741                                   x->props.family);
742
743                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
744                 xfrm_state_hold(x);
745         }
746
747         if (!mod_timer(&x->timer, jiffies + HZ))
748                 xfrm_state_hold(x);
749
750         if (x->replay_maxage &&
751             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
752                 xfrm_state_hold(x);
753
754         wake_up(&km_waitq);
755
756         xfrm_state_num++;
757
758         if (x->bydst.next != NULL &&
759             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
760             xfrm_state_num > xfrm_state_hmask)
761                 schedule_work(&xfrm_hash_work);
762 }
763
764 void xfrm_state_insert(struct xfrm_state *x)
765 {
766         spin_lock_bh(&xfrm_state_lock);
767         __xfrm_state_insert(x);
768         spin_unlock_bh(&xfrm_state_lock);
769
770         xfrm_flush_all_bundles();
771 }
772 EXPORT_SYMBOL(xfrm_state_insert);
773
774 /* xfrm_state_lock is held */
775 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
776 {
777         unsigned int h = xfrm_dst_hash(daddr, reqid, family);
778         struct hlist_node *entry;
779         struct xfrm_state *x;
780
781         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
782                 if (x->props.reqid  != reqid ||
783                     x->props.mode   != mode ||
784                     x->props.family != family ||
785                     x->km.state     != XFRM_STATE_ACQ ||
786                     x->id.spi       != 0)
787                         continue;
788
789                 switch (family) {
790                 case AF_INET:
791                         if (x->id.daddr.a4    != daddr->a4 ||
792                             x->props.saddr.a4 != saddr->a4)
793                                 continue;
794                         break;
795                 case AF_INET6:
796                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
797                                              (struct in6_addr *)daddr) ||
798                             !ipv6_addr_equal((struct in6_addr *)
799                                              x->props.saddr.a6,
800                                              (struct in6_addr *)saddr))
801                                 continue;
802                         break;
803                 };
804
805                 xfrm_state_hold(x);
806                 return x;
807         }
808
809         if (!create)
810                 return NULL;
811
812         x = xfrm_state_alloc();
813         if (likely(x)) {
814                 switch (family) {
815                 case AF_INET:
816                         x->sel.daddr.a4 = daddr->a4;
817                         x->sel.saddr.a4 = saddr->a4;
818                         x->sel.prefixlen_d = 32;
819                         x->sel.prefixlen_s = 32;
820                         x->props.saddr.a4 = saddr->a4;
821                         x->id.daddr.a4 = daddr->a4;
822                         break;
823
824                 case AF_INET6:
825                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
826                                        (struct in6_addr *)daddr);
827                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
828                                        (struct in6_addr *)saddr);
829                         x->sel.prefixlen_d = 128;
830                         x->sel.prefixlen_s = 128;
831                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
832                                        (struct in6_addr *)saddr);
833                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
834                                        (struct in6_addr *)daddr);
835                         break;
836                 };
837
838                 x->km.state = XFRM_STATE_ACQ;
839                 x->id.proto = proto;
840                 x->props.family = family;
841                 x->props.mode = mode;
842                 x->props.reqid = reqid;
843                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
844                 xfrm_state_hold(x);
845                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
846                 add_timer(&x->timer);
847                 xfrm_state_hold(x);
848                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
849                 h = xfrm_src_hash(saddr, family);
850                 xfrm_state_hold(x);
851                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
852                 wake_up(&km_waitq);
853         }
854
855         return x;
856 }
857
858 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
859
860 int xfrm_state_add(struct xfrm_state *x)
861 {
862         struct xfrm_state *x1;
863         int family;
864         int err;
865         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
866
867         family = x->props.family;
868
869         spin_lock_bh(&xfrm_state_lock);
870
871         x1 = __xfrm_state_locate(x, use_spi, family);
872         if (x1) {
873                 xfrm_state_put(x1);
874                 x1 = NULL;
875                 err = -EEXIST;
876                 goto out;
877         }
878
879         if (use_spi && x->km.seq) {
880                 x1 = __xfrm_find_acq_byseq(x->km.seq);
881                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
882                         xfrm_state_put(x1);
883                         x1 = NULL;
884                 }
885         }
886
887         if (use_spi && !x1)
888                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
889                                      x->id.proto,
890                                      &x->id.daddr, &x->props.saddr, 0);
891
892         __xfrm_state_insert(x);
893         err = 0;
894
895 out:
896         spin_unlock_bh(&xfrm_state_lock);
897
898         if (!err)
899                 xfrm_flush_all_bundles();
900
901         if (x1) {
902                 xfrm_state_delete(x1);
903                 xfrm_state_put(x1);
904         }
905
906         return err;
907 }
908 EXPORT_SYMBOL(xfrm_state_add);
909
910 int xfrm_state_update(struct xfrm_state *x)
911 {
912         struct xfrm_state *x1;
913         int err;
914         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
915
916         spin_lock_bh(&xfrm_state_lock);
917         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
918
919         err = -ESRCH;
920         if (!x1)
921                 goto out;
922
923         if (xfrm_state_kern(x1)) {
924                 xfrm_state_put(x1);
925                 err = -EEXIST;
926                 goto out;
927         }
928
929         if (x1->km.state == XFRM_STATE_ACQ) {
930                 __xfrm_state_insert(x);
931                 x = NULL;
932         }
933         err = 0;
934
935 out:
936         spin_unlock_bh(&xfrm_state_lock);
937
938         if (err)
939                 return err;
940
941         if (!x) {
942                 xfrm_state_delete(x1);
943                 xfrm_state_put(x1);
944                 return 0;
945         }
946
947         err = -EINVAL;
948         spin_lock_bh(&x1->lock);
949         if (likely(x1->km.state == XFRM_STATE_VALID)) {
950                 if (x->encap && x1->encap)
951                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
952                 if (x->coaddr && x1->coaddr) {
953                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
954                 }
955                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
956                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
957                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
958                 x1->km.dying = 0;
959
960                 if (!mod_timer(&x1->timer, jiffies + HZ))
961                         xfrm_state_hold(x1);
962                 if (x1->curlft.use_time)
963                         xfrm_state_check_expire(x1);
964
965                 err = 0;
966         }
967         spin_unlock_bh(&x1->lock);
968
969         xfrm_state_put(x1);
970
971         return err;
972 }
973 EXPORT_SYMBOL(xfrm_state_update);
974
975 int xfrm_state_check_expire(struct xfrm_state *x)
976 {
977         if (!x->curlft.use_time)
978                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
979
980         if (x->km.state != XFRM_STATE_VALID)
981                 return -EINVAL;
982
983         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
984             x->curlft.packets >= x->lft.hard_packet_limit) {
985                 x->km.state = XFRM_STATE_EXPIRED;
986                 if (!mod_timer(&x->timer, jiffies))
987                         xfrm_state_hold(x);
988                 return -EINVAL;
989         }
990
991         if (!x->km.dying &&
992             (x->curlft.bytes >= x->lft.soft_byte_limit ||
993              x->curlft.packets >= x->lft.soft_packet_limit)) {
994                 x->km.dying = 1;
995                 km_state_expired(x, 0, 0);
996         }
997         return 0;
998 }
999 EXPORT_SYMBOL(xfrm_state_check_expire);
1000
1001 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1002 {
1003         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1004                 - skb_headroom(skb);
1005
1006         if (nhead > 0)
1007                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1008
1009         /* Check tail too... */
1010         return 0;
1011 }
1012
1013 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1014 {
1015         int err = xfrm_state_check_expire(x);
1016         if (err < 0)
1017                 goto err;
1018         err = xfrm_state_check_space(x, skb);
1019 err:
1020         return err;
1021 }
1022 EXPORT_SYMBOL(xfrm_state_check);
1023
1024 struct xfrm_state *
1025 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1026                   unsigned short family)
1027 {
1028         struct xfrm_state *x;
1029
1030         spin_lock_bh(&xfrm_state_lock);
1031         x = __xfrm_state_lookup(daddr, spi, proto, family);
1032         spin_unlock_bh(&xfrm_state_lock);
1033         return x;
1034 }
1035 EXPORT_SYMBOL(xfrm_state_lookup);
1036
1037 struct xfrm_state *
1038 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1039                          u8 proto, unsigned short family)
1040 {
1041         struct xfrm_state *x;
1042
1043         spin_lock_bh(&xfrm_state_lock);
1044         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1045         spin_unlock_bh(&xfrm_state_lock);
1046         return x;
1047 }
1048 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1049
1050 struct xfrm_state *
1051 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1052               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1053               int create, unsigned short family)
1054 {
1055         struct xfrm_state *x;
1056
1057         spin_lock_bh(&xfrm_state_lock);
1058         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1059         spin_unlock_bh(&xfrm_state_lock);
1060
1061         return x;
1062 }
1063 EXPORT_SYMBOL(xfrm_find_acq);
1064
1065 #ifdef CONFIG_XFRM_SUB_POLICY
1066 int
1067 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1068                unsigned short family)
1069 {
1070         int err = 0;
1071         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1072         if (!afinfo)
1073                 return -EAFNOSUPPORT;
1074
1075         spin_lock_bh(&xfrm_state_lock);
1076         if (afinfo->tmpl_sort)
1077                 err = afinfo->tmpl_sort(dst, src, n);
1078         spin_unlock_bh(&xfrm_state_lock);
1079         xfrm_state_put_afinfo(afinfo);
1080         return err;
1081 }
1082 EXPORT_SYMBOL(xfrm_tmpl_sort);
1083
1084 int
1085 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1086                 unsigned short family)
1087 {
1088         int err = 0;
1089         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1090         if (!afinfo)
1091                 return -EAFNOSUPPORT;
1092
1093         spin_lock_bh(&xfrm_state_lock);
1094         if (afinfo->state_sort)
1095                 err = afinfo->state_sort(dst, src, n);
1096         spin_unlock_bh(&xfrm_state_lock);
1097         xfrm_state_put_afinfo(afinfo);
1098         return err;
1099 }
1100 EXPORT_SYMBOL(xfrm_state_sort);
1101 #endif
1102
1103 /* Silly enough, but I'm lazy to build resolution list */
1104
1105 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1106 {
1107         int i;
1108
1109         for (i = 0; i <= xfrm_state_hmask; i++) {
1110                 struct hlist_node *entry;
1111                 struct xfrm_state *x;
1112
1113                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1114                         if (x->km.seq == seq &&
1115                             x->km.state == XFRM_STATE_ACQ) {
1116                                 xfrm_state_hold(x);
1117                                 return x;
1118                         }
1119                 }
1120         }
1121         return NULL;
1122 }
1123
1124 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1125 {
1126         struct xfrm_state *x;
1127
1128         spin_lock_bh(&xfrm_state_lock);
1129         x = __xfrm_find_acq_byseq(seq);
1130         spin_unlock_bh(&xfrm_state_lock);
1131         return x;
1132 }
1133 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1134
1135 u32 xfrm_get_acqseq(void)
1136 {
1137         u32 res;
1138         static u32 acqseq;
1139         static DEFINE_SPINLOCK(acqseq_lock);
1140
1141         spin_lock_bh(&acqseq_lock);
1142         res = (++acqseq ? : ++acqseq);
1143         spin_unlock_bh(&acqseq_lock);
1144         return res;
1145 }
1146 EXPORT_SYMBOL(xfrm_get_acqseq);
1147
1148 void
1149 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1150 {
1151         unsigned int h;
1152         struct xfrm_state *x0;
1153
1154         if (x->id.spi)
1155                 return;
1156
1157         if (minspi == maxspi) {
1158                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1159                 if (x0) {
1160                         xfrm_state_put(x0);
1161                         return;
1162                 }
1163                 x->id.spi = minspi;
1164         } else {
1165                 u32 spi = 0;
1166                 minspi = ntohl(minspi);
1167                 maxspi = ntohl(maxspi);
1168                 for (h=0; h<maxspi-minspi+1; h++) {
1169                         spi = minspi + net_random()%(maxspi-minspi+1);
1170                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1171                         if (x0 == NULL) {
1172                                 x->id.spi = htonl(spi);
1173                                 break;
1174                         }
1175                         xfrm_state_put(x0);
1176                 }
1177         }
1178         if (x->id.spi) {
1179                 spin_lock_bh(&xfrm_state_lock);
1180                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1181                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1182                 xfrm_state_hold(x);
1183                 spin_unlock_bh(&xfrm_state_lock);
1184                 wake_up(&km_waitq);
1185         }
1186 }
1187 EXPORT_SYMBOL(xfrm_alloc_spi);
1188
1189 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1190                     void *data)
1191 {
1192         int i;
1193         struct xfrm_state *x;
1194         struct hlist_node *entry;
1195         int count = 0;
1196         int err = 0;
1197
1198         spin_lock_bh(&xfrm_state_lock);
1199         for (i = 0; i <= xfrm_state_hmask; i++) {
1200                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1201                         if (xfrm_id_proto_match(x->id.proto, proto))
1202                                 count++;
1203                 }
1204         }
1205         if (count == 0) {
1206                 err = -ENOENT;
1207                 goto out;
1208         }
1209
1210         for (i = 0; i <= xfrm_state_hmask; i++) {
1211                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1212                         if (!xfrm_id_proto_match(x->id.proto, proto))
1213                                 continue;
1214                         err = func(x, --count, data);
1215                         if (err)
1216                                 goto out;
1217                 }
1218         }
1219 out:
1220         spin_unlock_bh(&xfrm_state_lock);
1221         return err;
1222 }
1223 EXPORT_SYMBOL(xfrm_state_walk);
1224
1225
1226 void xfrm_replay_notify(struct xfrm_state *x, int event)
1227 {
1228         struct km_event c;
1229         /* we send notify messages in case
1230          *  1. we updated on of the sequence numbers, and the seqno difference
1231          *     is at least x->replay_maxdiff, in this case we also update the
1232          *     timeout of our timer function
1233          *  2. if x->replay_maxage has elapsed since last update,
1234          *     and there were changes
1235          *
1236          *  The state structure must be locked!
1237          */
1238
1239         switch (event) {
1240         case XFRM_REPLAY_UPDATE:
1241                 if (x->replay_maxdiff &&
1242                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1243                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1244                         if (x->xflags & XFRM_TIME_DEFER)
1245                                 event = XFRM_REPLAY_TIMEOUT;
1246                         else
1247                                 return;
1248                 }
1249
1250                 break;
1251
1252         case XFRM_REPLAY_TIMEOUT:
1253                 if ((x->replay.seq == x->preplay.seq) &&
1254                     (x->replay.bitmap == x->preplay.bitmap) &&
1255                     (x->replay.oseq == x->preplay.oseq)) {
1256                         x->xflags |= XFRM_TIME_DEFER;
1257                         return;
1258                 }
1259
1260                 break;
1261         }
1262
1263         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1264         c.event = XFRM_MSG_NEWAE;
1265         c.data.aevent = event;
1266         km_state_notify(x, &c);
1267
1268         if (x->replay_maxage &&
1269             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1270                 xfrm_state_hold(x);
1271                 x->xflags &= ~XFRM_TIME_DEFER;
1272         }
1273 }
1274 EXPORT_SYMBOL(xfrm_replay_notify);
1275
1276 static void xfrm_replay_timer_handler(unsigned long data)
1277 {
1278         struct xfrm_state *x = (struct xfrm_state*)data;
1279
1280         spin_lock(&x->lock);
1281
1282         if (x->km.state == XFRM_STATE_VALID) {
1283                 if (xfrm_aevent_is_on())
1284                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1285                 else
1286                         x->xflags |= XFRM_TIME_DEFER;
1287         }
1288
1289         spin_unlock(&x->lock);
1290         xfrm_state_put(x);
1291 }
1292
1293 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1294 {
1295         u32 diff;
1296
1297         seq = ntohl(seq);
1298
1299         if (unlikely(seq == 0))
1300                 return -EINVAL;
1301
1302         if (likely(seq > x->replay.seq))
1303                 return 0;
1304
1305         diff = x->replay.seq - seq;
1306         if (diff >= x->props.replay_window) {
1307                 x->stats.replay_window++;
1308                 return -EINVAL;
1309         }
1310
1311         if (x->replay.bitmap & (1U << diff)) {
1312                 x->stats.replay++;
1313                 return -EINVAL;
1314         }
1315         return 0;
1316 }
1317 EXPORT_SYMBOL(xfrm_replay_check);
1318
1319 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1320 {
1321         u32 diff;
1322
1323         seq = ntohl(seq);
1324
1325         if (seq > x->replay.seq) {
1326                 diff = seq - x->replay.seq;
1327                 if (diff < x->props.replay_window)
1328                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1329                 else
1330                         x->replay.bitmap = 1;
1331                 x->replay.seq = seq;
1332         } else {
1333                 diff = x->replay.seq - seq;
1334                 x->replay.bitmap |= (1U << diff);
1335         }
1336
1337         if (xfrm_aevent_is_on())
1338                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1339 }
1340 EXPORT_SYMBOL(xfrm_replay_advance);
1341
1342 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1343 static DEFINE_RWLOCK(xfrm_km_lock);
1344
1345 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1346 {
1347         struct xfrm_mgr *km;
1348
1349         read_lock(&xfrm_km_lock);
1350         list_for_each_entry(km, &xfrm_km_list, list)
1351                 if (km->notify_policy)
1352                         km->notify_policy(xp, dir, c);
1353         read_unlock(&xfrm_km_lock);
1354 }
1355
1356 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1357 {
1358         struct xfrm_mgr *km;
1359         read_lock(&xfrm_km_lock);
1360         list_for_each_entry(km, &xfrm_km_list, list)
1361                 if (km->notify)
1362                         km->notify(x, c);
1363         read_unlock(&xfrm_km_lock);
1364 }
1365
1366 EXPORT_SYMBOL(km_policy_notify);
1367 EXPORT_SYMBOL(km_state_notify);
1368
1369 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1370 {
1371         struct km_event c;
1372
1373         c.data.hard = hard;
1374         c.pid = pid;
1375         c.event = XFRM_MSG_EXPIRE;
1376         km_state_notify(x, &c);
1377
1378         if (hard)
1379                 wake_up(&km_waitq);
1380 }
1381
1382 EXPORT_SYMBOL(km_state_expired);
1383 /*
1384  * We send to all registered managers regardless of failure
1385  * We are happy with one success
1386 */
1387 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1388 {
1389         int err = -EINVAL, acqret;
1390         struct xfrm_mgr *km;
1391
1392         read_lock(&xfrm_km_lock);
1393         list_for_each_entry(km, &xfrm_km_list, list) {
1394                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1395                 if (!acqret)
1396                         err = acqret;
1397         }
1398         read_unlock(&xfrm_km_lock);
1399         return err;
1400 }
1401 EXPORT_SYMBOL(km_query);
1402
1403 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1404 {
1405         int err = -EINVAL;
1406         struct xfrm_mgr *km;
1407
1408         read_lock(&xfrm_km_lock);
1409         list_for_each_entry(km, &xfrm_km_list, list) {
1410                 if (km->new_mapping)
1411                         err = km->new_mapping(x, ipaddr, sport);
1412                 if (!err)
1413                         break;
1414         }
1415         read_unlock(&xfrm_km_lock);
1416         return err;
1417 }
1418 EXPORT_SYMBOL(km_new_mapping);
1419
1420 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1421 {
1422         struct km_event c;
1423
1424         c.data.hard = hard;
1425         c.pid = pid;
1426         c.event = XFRM_MSG_POLEXPIRE;
1427         km_policy_notify(pol, dir, &c);
1428
1429         if (hard)
1430                 wake_up(&km_waitq);
1431 }
1432 EXPORT_SYMBOL(km_policy_expired);
1433
1434 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1435 {
1436         int err = -EINVAL;
1437         int ret;
1438         struct xfrm_mgr *km;
1439
1440         read_lock(&xfrm_km_lock);
1441         list_for_each_entry(km, &xfrm_km_list, list) {
1442                 if (km->report) {
1443                         ret = km->report(proto, sel, addr);
1444                         if (!ret)
1445                                 err = ret;
1446                 }
1447         }
1448         read_unlock(&xfrm_km_lock);
1449         return err;
1450 }
1451 EXPORT_SYMBOL(km_report);
1452
1453 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1454 {
1455         int err;
1456         u8 *data;
1457         struct xfrm_mgr *km;
1458         struct xfrm_policy *pol = NULL;
1459
1460         if (optlen <= 0 || optlen > PAGE_SIZE)
1461                 return -EMSGSIZE;
1462
1463         data = kmalloc(optlen, GFP_KERNEL);
1464         if (!data)
1465                 return -ENOMEM;
1466
1467         err = -EFAULT;
1468         if (copy_from_user(data, optval, optlen))
1469                 goto out;
1470
1471         err = -EINVAL;
1472         read_lock(&xfrm_km_lock);
1473         list_for_each_entry(km, &xfrm_km_list, list) {
1474                 pol = km->compile_policy(sk, optname, data,
1475                                          optlen, &err);
1476                 if (err >= 0)
1477                         break;
1478         }
1479         read_unlock(&xfrm_km_lock);
1480
1481         if (err >= 0) {
1482                 xfrm_sk_policy_insert(sk, err, pol);
1483                 xfrm_pol_put(pol);
1484                 err = 0;
1485         }
1486
1487 out:
1488         kfree(data);
1489         return err;
1490 }
1491 EXPORT_SYMBOL(xfrm_user_policy);
1492
1493 int xfrm_register_km(struct xfrm_mgr *km)
1494 {
1495         write_lock_bh(&xfrm_km_lock);
1496         list_add_tail(&km->list, &xfrm_km_list);
1497         write_unlock_bh(&xfrm_km_lock);
1498         return 0;
1499 }
1500 EXPORT_SYMBOL(xfrm_register_km);
1501
1502 int xfrm_unregister_km(struct xfrm_mgr *km)
1503 {
1504         write_lock_bh(&xfrm_km_lock);
1505         list_del(&km->list);
1506         write_unlock_bh(&xfrm_km_lock);
1507         return 0;
1508 }
1509 EXPORT_SYMBOL(xfrm_unregister_km);
1510
1511 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1512 {
1513         int err = 0;
1514         if (unlikely(afinfo == NULL))
1515                 return -EINVAL;
1516         if (unlikely(afinfo->family >= NPROTO))
1517                 return -EAFNOSUPPORT;
1518         write_lock_bh(&xfrm_state_afinfo_lock);
1519         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1520                 err = -ENOBUFS;
1521         else
1522                 xfrm_state_afinfo[afinfo->family] = afinfo;
1523         write_unlock_bh(&xfrm_state_afinfo_lock);
1524         return err;
1525 }
1526 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1527
1528 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1529 {
1530         int err = 0;
1531         if (unlikely(afinfo == NULL))
1532                 return -EINVAL;
1533         if (unlikely(afinfo->family >= NPROTO))
1534                 return -EAFNOSUPPORT;
1535         write_lock_bh(&xfrm_state_afinfo_lock);
1536         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1537                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1538                         err = -EINVAL;
1539                 else
1540                         xfrm_state_afinfo[afinfo->family] = NULL;
1541         }
1542         write_unlock_bh(&xfrm_state_afinfo_lock);
1543         return err;
1544 }
1545 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1546
1547 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1548 {
1549         struct xfrm_state_afinfo *afinfo;
1550         if (unlikely(family >= NPROTO))
1551                 return NULL;
1552         read_lock(&xfrm_state_afinfo_lock);
1553         afinfo = xfrm_state_afinfo[family];
1554         if (unlikely(!afinfo))
1555                 read_unlock(&xfrm_state_afinfo_lock);
1556         return afinfo;
1557 }
1558
1559 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1560 {
1561         read_unlock(&xfrm_state_afinfo_lock);
1562 }
1563
1564 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1565 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1566 {
1567         if (x->tunnel) {
1568                 struct xfrm_state *t = x->tunnel;
1569
1570                 if (atomic_read(&t->tunnel_users) == 2)
1571                         xfrm_state_delete(t);
1572                 atomic_dec(&t->tunnel_users);
1573                 xfrm_state_put(t);
1574                 x->tunnel = NULL;
1575         }
1576 }
1577 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1578
1579 /*
1580  * This function is NOT optimal.  For example, with ESP it will give an
1581  * MTU that's usually two bytes short of being optimal.  However, it will
1582  * usually give an answer that's a multiple of 4 provided the input is
1583  * also a multiple of 4.
1584  */
1585 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1586 {
1587         int res = mtu;
1588
1589         res -= x->props.header_len;
1590
1591         for (;;) {
1592                 int m = res;
1593
1594                 if (m < 68)
1595                         return 68;
1596
1597                 spin_lock_bh(&x->lock);
1598                 if (x->km.state == XFRM_STATE_VALID &&
1599                     x->type && x->type->get_max_size)
1600                         m = x->type->get_max_size(x, m);
1601                 else
1602                         m += x->props.header_len;
1603                 spin_unlock_bh(&x->lock);
1604
1605                 if (m <= mtu)
1606                         break;
1607                 res -= (m - mtu);
1608         }
1609
1610         return res;
1611 }
1612
1613 int xfrm_init_state(struct xfrm_state *x)
1614 {
1615         struct xfrm_state_afinfo *afinfo;
1616         int family = x->props.family;
1617         int err;
1618
1619         err = -EAFNOSUPPORT;
1620         afinfo = xfrm_state_get_afinfo(family);
1621         if (!afinfo)
1622                 goto error;
1623
1624         err = 0;
1625         if (afinfo->init_flags)
1626                 err = afinfo->init_flags(x);
1627
1628         xfrm_state_put_afinfo(afinfo);
1629
1630         if (err)
1631                 goto error;
1632
1633         err = -EPROTONOSUPPORT;
1634         x->type = xfrm_get_type(x->id.proto, family);
1635         if (x->type == NULL)
1636                 goto error;
1637
1638         err = x->type->init_state(x);
1639         if (err)
1640                 goto error;
1641
1642         x->mode = xfrm_get_mode(x->props.mode, family);
1643         if (x->mode == NULL)
1644                 goto error;
1645
1646         x->km.state = XFRM_STATE_VALID;
1647
1648 error:
1649         return err;
1650 }
1651
1652 EXPORT_SYMBOL(xfrm_init_state);
1653  
1654 void __init xfrm_state_init(void)
1655 {
1656         unsigned int sz;
1657
1658         sz = sizeof(struct hlist_head) * 8;
1659
1660         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1661         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1662         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1663         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1664                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1665         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1666
1667         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1668 }
1669