]> Pileus Git - ~andy/linux/blob - net/xfrm/xfrm_state.c
[XFRM]: Convert xfrm_state hash linkage to hlists.
[~andy/linux] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22
23 struct sock *xfrm_nl;
24 EXPORT_SYMBOL(xfrm_nl);
25
26 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
27 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
28
29 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
31
32 /* Each xfrm_state may be linked to two tables:
33
34    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
35    2. Hash table by daddr to find what SAs exist for given
36       destination/tunnel endpoint. (output)
37  */
38
39 static DEFINE_SPINLOCK(xfrm_state_lock);
40
41 #define XFRM_DST_HSIZE          1024
42
43 /* Hash table to find appropriate SA towards given target (endpoint
44  * of tunnel or destination of transport mode) allowed by selector.
45  *
46  * Main use is finding SA after policy selected tunnel or transport mode.
47  * Also, it can be used by ah/esp icmp error handler to find offending SA.
48  */
49 static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE];
50 static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE];
51 static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE];
52
53 static __inline__
54 unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
55 {
56         unsigned h;
57         h = ntohl(addr->a4);
58         h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
59         return h;
60 }
61
62 static __inline__
63 unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
64 {
65         unsigned h;
66         h = ntohl(addr->a6[2]^addr->a6[3]);
67         h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
68         return h;
69 }
70
71 static __inline__
72 unsigned __xfrm4_src_hash(xfrm_address_t *addr)
73 {
74         return __xfrm4_dst_hash(addr);
75 }
76
77 static __inline__
78 unsigned __xfrm6_src_hash(xfrm_address_t *addr)
79 {
80         return __xfrm6_dst_hash(addr);
81 }
82
83 static __inline__
84 unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
85 {
86         switch (family) {
87         case AF_INET:
88                 return __xfrm4_src_hash(addr);
89         case AF_INET6:
90                 return __xfrm6_src_hash(addr);
91         }
92         return 0;
93 }
94
95 static __inline__
96 unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
97 {
98         switch (family) {
99         case AF_INET:
100                 return __xfrm4_dst_hash(addr);
101         case AF_INET6:
102                 return __xfrm6_dst_hash(addr);
103         }
104         return 0;
105 }
106
107 static __inline__
108 unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
109 {
110         unsigned h;
111         h = ntohl(addr->a4^spi^proto);
112         h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
113         return h;
114 }
115
116 static __inline__
117 unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
118 {
119         unsigned h;
120         h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
121         h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
122         return h;
123 }
124
125 static __inline__
126 unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
127 {
128         switch (family) {
129         case AF_INET:
130                 return __xfrm4_spi_hash(addr, spi, proto);
131         case AF_INET6:
132                 return __xfrm6_spi_hash(addr, spi, proto);
133         }
134         return 0;       /*XXX*/
135 }
136
137 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
138 EXPORT_SYMBOL(km_waitq);
139
140 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
141 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
142
143 static struct work_struct xfrm_state_gc_work;
144 static HLIST_HEAD(xfrm_state_gc_list);
145 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
146
147 static int xfrm_state_gc_flush_bundles;
148
149 int __xfrm_state_delete(struct xfrm_state *x);
150
151 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
152 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
153
154 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
155 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
156
157 static void xfrm_state_gc_destroy(struct xfrm_state *x)
158 {
159         if (del_timer(&x->timer))
160                 BUG();
161         if (del_timer(&x->rtimer))
162                 BUG();
163         kfree(x->aalg);
164         kfree(x->ealg);
165         kfree(x->calg);
166         kfree(x->encap);
167         kfree(x->coaddr);
168         if (x->mode)
169                 xfrm_put_mode(x->mode);
170         if (x->type) {
171                 x->type->destructor(x);
172                 xfrm_put_type(x->type);
173         }
174         security_xfrm_state_free(x);
175         kfree(x);
176 }
177
178 static void xfrm_state_gc_task(void *data)
179 {
180         struct xfrm_state *x;
181         struct hlist_node *entry, *tmp;
182         struct hlist_head gc_list;
183
184         if (xfrm_state_gc_flush_bundles) {
185                 xfrm_state_gc_flush_bundles = 0;
186                 xfrm_flush_bundles();
187         }
188
189         spin_lock_bh(&xfrm_state_gc_lock);
190         gc_list.first = xfrm_state_gc_list.first;
191         INIT_HLIST_HEAD(&xfrm_state_gc_list);
192         spin_unlock_bh(&xfrm_state_gc_lock);
193
194         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
195                 xfrm_state_gc_destroy(x);
196
197         wake_up(&km_waitq);
198 }
199
200 static inline unsigned long make_jiffies(long secs)
201 {
202         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
203                 return MAX_SCHEDULE_TIMEOUT-1;
204         else
205                 return secs*HZ;
206 }
207
208 static void xfrm_timer_handler(unsigned long data)
209 {
210         struct xfrm_state *x = (struct xfrm_state*)data;
211         unsigned long now = (unsigned long)xtime.tv_sec;
212         long next = LONG_MAX;
213         int warn = 0;
214
215         spin_lock(&x->lock);
216         if (x->km.state == XFRM_STATE_DEAD)
217                 goto out;
218         if (x->km.state == XFRM_STATE_EXPIRED)
219                 goto expired;
220         if (x->lft.hard_add_expires_seconds) {
221                 long tmo = x->lft.hard_add_expires_seconds +
222                         x->curlft.add_time - now;
223                 if (tmo <= 0)
224                         goto expired;
225                 if (tmo < next)
226                         next = tmo;
227         }
228         if (x->lft.hard_use_expires_seconds) {
229                 long tmo = x->lft.hard_use_expires_seconds +
230                         (x->curlft.use_time ? : now) - now;
231                 if (tmo <= 0)
232                         goto expired;
233                 if (tmo < next)
234                         next = tmo;
235         }
236         if (x->km.dying)
237                 goto resched;
238         if (x->lft.soft_add_expires_seconds) {
239                 long tmo = x->lft.soft_add_expires_seconds +
240                         x->curlft.add_time - now;
241                 if (tmo <= 0)
242                         warn = 1;
243                 else if (tmo < next)
244                         next = tmo;
245         }
246         if (x->lft.soft_use_expires_seconds) {
247                 long tmo = x->lft.soft_use_expires_seconds +
248                         (x->curlft.use_time ? : now) - now;
249                 if (tmo <= 0)
250                         warn = 1;
251                 else if (tmo < next)
252                         next = tmo;
253         }
254
255         x->km.dying = warn;
256         if (warn)
257                 km_state_expired(x, 0, 0);
258 resched:
259         if (next != LONG_MAX &&
260             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
261                 xfrm_state_hold(x);
262         goto out;
263
264 expired:
265         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
266                 x->km.state = XFRM_STATE_EXPIRED;
267                 wake_up(&km_waitq);
268                 next = 2;
269                 goto resched;
270         }
271         if (!__xfrm_state_delete(x) && x->id.spi)
272                 km_state_expired(x, 1, 0);
273
274 out:
275         spin_unlock(&x->lock);
276         xfrm_state_put(x);
277 }
278
279 static void xfrm_replay_timer_handler(unsigned long data);
280
281 struct xfrm_state *xfrm_state_alloc(void)
282 {
283         struct xfrm_state *x;
284
285         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
286
287         if (x) {
288                 atomic_set(&x->refcnt, 1);
289                 atomic_set(&x->tunnel_users, 0);
290                 INIT_HLIST_NODE(&x->bydst);
291                 INIT_HLIST_NODE(&x->bysrc);
292                 INIT_HLIST_NODE(&x->byspi);
293                 init_timer(&x->timer);
294                 x->timer.function = xfrm_timer_handler;
295                 x->timer.data     = (unsigned long)x;
296                 init_timer(&x->rtimer);
297                 x->rtimer.function = xfrm_replay_timer_handler;
298                 x->rtimer.data     = (unsigned long)x;
299                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
300                 x->lft.soft_byte_limit = XFRM_INF;
301                 x->lft.soft_packet_limit = XFRM_INF;
302                 x->lft.hard_byte_limit = XFRM_INF;
303                 x->lft.hard_packet_limit = XFRM_INF;
304                 x->replay_maxage = 0;
305                 x->replay_maxdiff = 0;
306                 spin_lock_init(&x->lock);
307         }
308         return x;
309 }
310 EXPORT_SYMBOL(xfrm_state_alloc);
311
312 void __xfrm_state_destroy(struct xfrm_state *x)
313 {
314         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
315
316         spin_lock_bh(&xfrm_state_gc_lock);
317         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
318         spin_unlock_bh(&xfrm_state_gc_lock);
319         schedule_work(&xfrm_state_gc_work);
320 }
321 EXPORT_SYMBOL(__xfrm_state_destroy);
322
323 int __xfrm_state_delete(struct xfrm_state *x)
324 {
325         int err = -ESRCH;
326
327         if (x->km.state != XFRM_STATE_DEAD) {
328                 x->km.state = XFRM_STATE_DEAD;
329                 spin_lock(&xfrm_state_lock);
330                 hlist_del(&x->bydst);
331                 __xfrm_state_put(x);
332                 hlist_del(&x->bysrc);
333                 __xfrm_state_put(x);
334                 if (x->id.spi) {
335                         hlist_del(&x->byspi);
336                         __xfrm_state_put(x);
337                 }
338                 spin_unlock(&xfrm_state_lock);
339                 if (del_timer(&x->timer))
340                         __xfrm_state_put(x);
341                 if (del_timer(&x->rtimer))
342                         __xfrm_state_put(x);
343
344                 /* The number two in this test is the reference
345                  * mentioned in the comment below plus the reference
346                  * our caller holds.  A larger value means that
347                  * there are DSTs attached to this xfrm_state.
348                  */
349                 if (atomic_read(&x->refcnt) > 2) {
350                         xfrm_state_gc_flush_bundles = 1;
351                         schedule_work(&xfrm_state_gc_work);
352                 }
353
354                 /* All xfrm_state objects are created by xfrm_state_alloc.
355                  * The xfrm_state_alloc call gives a reference, and that
356                  * is what we are dropping here.
357                  */
358                 __xfrm_state_put(x);
359                 err = 0;
360         }
361
362         return err;
363 }
364 EXPORT_SYMBOL(__xfrm_state_delete);
365
366 int xfrm_state_delete(struct xfrm_state *x)
367 {
368         int err;
369
370         spin_lock_bh(&x->lock);
371         err = __xfrm_state_delete(x);
372         spin_unlock_bh(&x->lock);
373
374         return err;
375 }
376 EXPORT_SYMBOL(xfrm_state_delete);
377
378 void xfrm_state_flush(u8 proto)
379 {
380         int i;
381
382         spin_lock_bh(&xfrm_state_lock);
383         for (i = 0; i < XFRM_DST_HSIZE; i++) {
384                 struct hlist_node *entry;
385                 struct xfrm_state *x;
386 restart:
387                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
388                         if (!xfrm_state_kern(x) &&
389                             xfrm_id_proto_match(x->id.proto, proto)) {
390                                 xfrm_state_hold(x);
391                                 spin_unlock_bh(&xfrm_state_lock);
392
393                                 xfrm_state_delete(x);
394                                 xfrm_state_put(x);
395
396                                 spin_lock_bh(&xfrm_state_lock);
397                                 goto restart;
398                         }
399                 }
400         }
401         spin_unlock_bh(&xfrm_state_lock);
402         wake_up(&km_waitq);
403 }
404 EXPORT_SYMBOL(xfrm_state_flush);
405
406 static int
407 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
408                   struct xfrm_tmpl *tmpl,
409                   xfrm_address_t *daddr, xfrm_address_t *saddr,
410                   unsigned short family)
411 {
412         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
413         if (!afinfo)
414                 return -1;
415         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
416         xfrm_state_put_afinfo(afinfo);
417         return 0;
418 }
419
420 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
421 {
422         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
423         struct xfrm_state *x;
424         struct hlist_node *entry;
425
426         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
427                 if (x->props.family != family ||
428                     x->id.spi       != spi ||
429                     x->id.proto     != proto)
430                         continue;
431
432                 switch (family) {
433                 case AF_INET:
434                         if (x->id.daddr.a4 != daddr->a4)
435                                 continue;
436                         break;
437                 case AF_INET6:
438                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
439                                              (struct in6_addr *)
440                                              x->id.daddr.a6))
441                                 continue;
442                         break;
443                 };
444
445                 xfrm_state_hold(x);
446                 return x;
447         }
448
449         return NULL;
450 }
451
452 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
453 {
454         unsigned int h = xfrm_src_hash(saddr, family);
455         struct xfrm_state *x;
456         struct hlist_node *entry;
457
458         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
459                 if (x->props.family != family ||
460                     x->id.proto     != proto)
461                         continue;
462
463                 switch (family) {
464                 case AF_INET:
465                         if (x->id.daddr.a4 != daddr->a4 ||
466                             x->props.saddr.a4 != saddr->a4)
467                                 continue;
468                         break;
469                 case AF_INET6:
470                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
471                                              (struct in6_addr *)
472                                              x->id.daddr.a6) ||
473                             !ipv6_addr_equal((struct in6_addr *)saddr,
474                                              (struct in6_addr *)
475                                              x->props.saddr.a6))
476                                 continue;
477                         break;
478                 };
479
480                 xfrm_state_hold(x);
481                 return x;
482         }
483
484         return NULL;
485 }
486
487 static inline struct xfrm_state *
488 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
489 {
490         if (use_spi)
491                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
492                                            x->id.proto, family);
493         else
494                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
495                                                   &x->props.saddr,
496                                                   x->id.proto, family);
497 }
498
499 struct xfrm_state *
500 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
501                 struct flowi *fl, struct xfrm_tmpl *tmpl,
502                 struct xfrm_policy *pol, int *err,
503                 unsigned short family)
504 {
505         unsigned int h = xfrm_dst_hash(daddr, family);
506         struct hlist_node *entry;
507         struct xfrm_state *x, *x0;
508         int acquire_in_progress = 0;
509         int error = 0;
510         struct xfrm_state *best = NULL;
511         
512         spin_lock_bh(&xfrm_state_lock);
513         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
514                 if (x->props.family == family &&
515                     x->props.reqid == tmpl->reqid &&
516                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
517                     xfrm_state_addr_check(x, daddr, saddr, family) &&
518                     tmpl->mode == x->props.mode &&
519                     tmpl->id.proto == x->id.proto &&
520                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
521                         /* Resolution logic:
522                            1. There is a valid state with matching selector.
523                               Done.
524                            2. Valid state with inappropriate selector. Skip.
525
526                            Entering area of "sysdeps".
527
528                            3. If state is not valid, selector is temporary,
529                               it selects only session which triggered
530                               previous resolution. Key manager will do
531                               something to install a state with proper
532                               selector.
533                          */
534                         if (x->km.state == XFRM_STATE_VALID) {
535                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
536                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
537                                         continue;
538                                 if (!best ||
539                                     best->km.dying > x->km.dying ||
540                                     (best->km.dying == x->km.dying &&
541                                      best->curlft.add_time < x->curlft.add_time))
542                                         best = x;
543                         } else if (x->km.state == XFRM_STATE_ACQ) {
544                                 acquire_in_progress = 1;
545                         } else if (x->km.state == XFRM_STATE_ERROR ||
546                                    x->km.state == XFRM_STATE_EXPIRED) {
547                                 if (xfrm_selector_match(&x->sel, fl, family) &&
548                                     security_xfrm_state_pol_flow_match(x, pol, fl))
549                                         error = -ESRCH;
550                         }
551                 }
552         }
553
554         x = best;
555         if (!x && !error && !acquire_in_progress) {
556                 if (tmpl->id.spi &&
557                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
558                                               tmpl->id.proto, family)) != NULL) {
559                         xfrm_state_put(x0);
560                         error = -EEXIST;
561                         goto out;
562                 }
563                 x = xfrm_state_alloc();
564                 if (x == NULL) {
565                         error = -ENOMEM;
566                         goto out;
567                 }
568                 /* Initialize temporary selector matching only
569                  * to current session. */
570                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
571
572                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
573                 if (error) {
574                         x->km.state = XFRM_STATE_DEAD;
575                         xfrm_state_put(x);
576                         x = NULL;
577                         goto out;
578                 }
579
580                 if (km_query(x, tmpl, pol) == 0) {
581                         x->km.state = XFRM_STATE_ACQ;
582                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
583                         xfrm_state_hold(x);
584                         h = xfrm_src_hash(saddr, family);
585                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
586                         xfrm_state_hold(x);
587                         if (x->id.spi) {
588                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
589                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
590                                 xfrm_state_hold(x);
591                         }
592                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
593                         xfrm_state_hold(x);
594                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
595                         add_timer(&x->timer);
596                 } else {
597                         x->km.state = XFRM_STATE_DEAD;
598                         xfrm_state_put(x);
599                         x = NULL;
600                         error = -ESRCH;
601                 }
602         }
603 out:
604         if (x)
605                 xfrm_state_hold(x);
606         else
607                 *err = acquire_in_progress ? -EAGAIN : error;
608         spin_unlock_bh(&xfrm_state_lock);
609         return x;
610 }
611
612 static void __xfrm_state_insert(struct xfrm_state *x)
613 {
614         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
615
616         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
617         xfrm_state_hold(x);
618
619         h = xfrm_src_hash(&x->props.saddr, x->props.family);
620
621         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
622         xfrm_state_hold(x);
623
624         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
625                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
626                                   x->props.family);
627
628                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
629                 xfrm_state_hold(x);
630         }
631
632         if (!mod_timer(&x->timer, jiffies + HZ))
633                 xfrm_state_hold(x);
634
635         if (x->replay_maxage &&
636             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
637                 xfrm_state_hold(x);
638
639         wake_up(&km_waitq);
640 }
641
642 void xfrm_state_insert(struct xfrm_state *x)
643 {
644         spin_lock_bh(&xfrm_state_lock);
645         __xfrm_state_insert(x);
646         spin_unlock_bh(&xfrm_state_lock);
647
648         xfrm_flush_all_bundles();
649 }
650 EXPORT_SYMBOL(xfrm_state_insert);
651
652 /* xfrm_state_lock is held */
653 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
654 {
655         unsigned int h = xfrm_dst_hash(daddr, family);
656         struct hlist_node *entry;
657         struct xfrm_state *x;
658
659         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
660                 if (x->props.reqid  != reqid ||
661                     x->props.mode   != mode ||
662                     x->props.family != family ||
663                     x->km.state     != XFRM_STATE_ACQ ||
664                     x->id.spi       != 0)
665                         continue;
666
667                 switch (family) {
668                 case AF_INET:
669                         if (x->id.daddr.a4    != daddr->a4 ||
670                             x->props.saddr.a4 != saddr->a4)
671                                 continue;
672                         break;
673                 case AF_INET6:
674                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
675                                              (struct in6_addr *)daddr) ||
676                             !ipv6_addr_equal((struct in6_addr *)
677                                              x->props.saddr.a6,
678                                              (struct in6_addr *)saddr))
679                                 continue;
680                         break;
681                 };
682
683                 xfrm_state_hold(x);
684                 return x;
685         }
686
687         if (!create)
688                 return NULL;
689
690         x = xfrm_state_alloc();
691         if (likely(x)) {
692                 switch (family) {
693                 case AF_INET:
694                         x->sel.daddr.a4 = daddr->a4;
695                         x->sel.saddr.a4 = saddr->a4;
696                         x->sel.prefixlen_d = 32;
697                         x->sel.prefixlen_s = 32;
698                         x->props.saddr.a4 = saddr->a4;
699                         x->id.daddr.a4 = daddr->a4;
700                         break;
701
702                 case AF_INET6:
703                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
704                                        (struct in6_addr *)daddr);
705                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
706                                        (struct in6_addr *)saddr);
707                         x->sel.prefixlen_d = 128;
708                         x->sel.prefixlen_s = 128;
709                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
710                                        (struct in6_addr *)saddr);
711                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
712                                        (struct in6_addr *)daddr);
713                         break;
714                 };
715
716                 x->km.state = XFRM_STATE_ACQ;
717                 x->id.proto = proto;
718                 x->props.family = family;
719                 x->props.mode = mode;
720                 x->props.reqid = reqid;
721                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
722                 xfrm_state_hold(x);
723                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
724                 add_timer(&x->timer);
725                 xfrm_state_hold(x);
726                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
727                 h = xfrm_src_hash(saddr, family);
728                 xfrm_state_hold(x);
729                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
730                 wake_up(&km_waitq);
731         }
732
733         return x;
734 }
735
736 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
737
738 int xfrm_state_add(struct xfrm_state *x)
739 {
740         struct xfrm_state *x1;
741         int family;
742         int err;
743         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
744
745         family = x->props.family;
746
747         spin_lock_bh(&xfrm_state_lock);
748
749         x1 = __xfrm_state_locate(x, use_spi, family);
750         if (x1) {
751                 xfrm_state_put(x1);
752                 x1 = NULL;
753                 err = -EEXIST;
754                 goto out;
755         }
756
757         if (use_spi && x->km.seq) {
758                 x1 = __xfrm_find_acq_byseq(x->km.seq);
759                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
760                         xfrm_state_put(x1);
761                         x1 = NULL;
762                 }
763         }
764
765         if (use_spi && !x1)
766                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
767                                      x->id.proto,
768                                      &x->id.daddr, &x->props.saddr, 0);
769
770         __xfrm_state_insert(x);
771         err = 0;
772
773 out:
774         spin_unlock_bh(&xfrm_state_lock);
775
776         if (!err)
777                 xfrm_flush_all_bundles();
778
779         if (x1) {
780                 xfrm_state_delete(x1);
781                 xfrm_state_put(x1);
782         }
783
784         return err;
785 }
786 EXPORT_SYMBOL(xfrm_state_add);
787
788 int xfrm_state_update(struct xfrm_state *x)
789 {
790         struct xfrm_state *x1;
791         int err;
792         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
793
794         spin_lock_bh(&xfrm_state_lock);
795         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
796
797         err = -ESRCH;
798         if (!x1)
799                 goto out;
800
801         if (xfrm_state_kern(x1)) {
802                 xfrm_state_put(x1);
803                 err = -EEXIST;
804                 goto out;
805         }
806
807         if (x1->km.state == XFRM_STATE_ACQ) {
808                 __xfrm_state_insert(x);
809                 x = NULL;
810         }
811         err = 0;
812
813 out:
814         spin_unlock_bh(&xfrm_state_lock);
815
816         if (err)
817                 return err;
818
819         if (!x) {
820                 xfrm_state_delete(x1);
821                 xfrm_state_put(x1);
822                 return 0;
823         }
824
825         err = -EINVAL;
826         spin_lock_bh(&x1->lock);
827         if (likely(x1->km.state == XFRM_STATE_VALID)) {
828                 if (x->encap && x1->encap)
829                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
830                 if (x->coaddr && x1->coaddr) {
831                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
832                 }
833                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
834                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
835                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
836                 x1->km.dying = 0;
837
838                 if (!mod_timer(&x1->timer, jiffies + HZ))
839                         xfrm_state_hold(x1);
840                 if (x1->curlft.use_time)
841                         xfrm_state_check_expire(x1);
842
843                 err = 0;
844         }
845         spin_unlock_bh(&x1->lock);
846
847         xfrm_state_put(x1);
848
849         return err;
850 }
851 EXPORT_SYMBOL(xfrm_state_update);
852
853 int xfrm_state_check_expire(struct xfrm_state *x)
854 {
855         if (!x->curlft.use_time)
856                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
857
858         if (x->km.state != XFRM_STATE_VALID)
859                 return -EINVAL;
860
861         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
862             x->curlft.packets >= x->lft.hard_packet_limit) {
863                 x->km.state = XFRM_STATE_EXPIRED;
864                 if (!mod_timer(&x->timer, jiffies))
865                         xfrm_state_hold(x);
866                 return -EINVAL;
867         }
868
869         if (!x->km.dying &&
870             (x->curlft.bytes >= x->lft.soft_byte_limit ||
871              x->curlft.packets >= x->lft.soft_packet_limit)) {
872                 x->km.dying = 1;
873                 km_state_expired(x, 0, 0);
874         }
875         return 0;
876 }
877 EXPORT_SYMBOL(xfrm_state_check_expire);
878
879 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
880 {
881         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
882                 - skb_headroom(skb);
883
884         if (nhead > 0)
885                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
886
887         /* Check tail too... */
888         return 0;
889 }
890
891 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
892 {
893         int err = xfrm_state_check_expire(x);
894         if (err < 0)
895                 goto err;
896         err = xfrm_state_check_space(x, skb);
897 err:
898         return err;
899 }
900 EXPORT_SYMBOL(xfrm_state_check);
901
902 struct xfrm_state *
903 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
904                   unsigned short family)
905 {
906         struct xfrm_state *x;
907
908         spin_lock_bh(&xfrm_state_lock);
909         x = __xfrm_state_lookup(daddr, spi, proto, family);
910         spin_unlock_bh(&xfrm_state_lock);
911         return x;
912 }
913 EXPORT_SYMBOL(xfrm_state_lookup);
914
915 struct xfrm_state *
916 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
917                          u8 proto, unsigned short family)
918 {
919         struct xfrm_state *x;
920
921         spin_lock_bh(&xfrm_state_lock);
922         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
923         spin_unlock_bh(&xfrm_state_lock);
924         return x;
925 }
926 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
927
928 struct xfrm_state *
929 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
930               xfrm_address_t *daddr, xfrm_address_t *saddr, 
931               int create, unsigned short family)
932 {
933         struct xfrm_state *x;
934
935         spin_lock_bh(&xfrm_state_lock);
936         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
937         spin_unlock_bh(&xfrm_state_lock);
938
939         return x;
940 }
941 EXPORT_SYMBOL(xfrm_find_acq);
942
943 #ifdef CONFIG_XFRM_SUB_POLICY
944 int
945 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
946                unsigned short family)
947 {
948         int err = 0;
949         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
950         if (!afinfo)
951                 return -EAFNOSUPPORT;
952
953         spin_lock_bh(&xfrm_state_lock);
954         if (afinfo->tmpl_sort)
955                 err = afinfo->tmpl_sort(dst, src, n);
956         spin_unlock_bh(&xfrm_state_lock);
957         xfrm_state_put_afinfo(afinfo);
958         return err;
959 }
960 EXPORT_SYMBOL(xfrm_tmpl_sort);
961
962 int
963 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
964                 unsigned short family)
965 {
966         int err = 0;
967         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
968         if (!afinfo)
969                 return -EAFNOSUPPORT;
970
971         spin_lock_bh(&xfrm_state_lock);
972         if (afinfo->state_sort)
973                 err = afinfo->state_sort(dst, src, n);
974         spin_unlock_bh(&xfrm_state_lock);
975         xfrm_state_put_afinfo(afinfo);
976         return err;
977 }
978 EXPORT_SYMBOL(xfrm_state_sort);
979 #endif
980
981 /* Silly enough, but I'm lazy to build resolution list */
982
983 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
984 {
985         int i;
986
987         for (i = 0; i < XFRM_DST_HSIZE; i++) {
988                 struct hlist_node *entry;
989                 struct xfrm_state *x;
990
991                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
992                         if (x->km.seq == seq &&
993                             x->km.state == XFRM_STATE_ACQ) {
994                                 xfrm_state_hold(x);
995                                 return x;
996                         }
997                 }
998         }
999         return NULL;
1000 }
1001
1002 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1003 {
1004         struct xfrm_state *x;
1005
1006         spin_lock_bh(&xfrm_state_lock);
1007         x = __xfrm_find_acq_byseq(seq);
1008         spin_unlock_bh(&xfrm_state_lock);
1009         return x;
1010 }
1011 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1012
1013 u32 xfrm_get_acqseq(void)
1014 {
1015         u32 res;
1016         static u32 acqseq;
1017         static DEFINE_SPINLOCK(acqseq_lock);
1018
1019         spin_lock_bh(&acqseq_lock);
1020         res = (++acqseq ? : ++acqseq);
1021         spin_unlock_bh(&acqseq_lock);
1022         return res;
1023 }
1024 EXPORT_SYMBOL(xfrm_get_acqseq);
1025
1026 void
1027 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1028 {
1029         u32 h;
1030         struct xfrm_state *x0;
1031
1032         if (x->id.spi)
1033                 return;
1034
1035         if (minspi == maxspi) {
1036                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1037                 if (x0) {
1038                         xfrm_state_put(x0);
1039                         return;
1040                 }
1041                 x->id.spi = minspi;
1042         } else {
1043                 u32 spi = 0;
1044                 minspi = ntohl(minspi);
1045                 maxspi = ntohl(maxspi);
1046                 for (h=0; h<maxspi-minspi+1; h++) {
1047                         spi = minspi + net_random()%(maxspi-minspi+1);
1048                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1049                         if (x0 == NULL) {
1050                                 x->id.spi = htonl(spi);
1051                                 break;
1052                         }
1053                         xfrm_state_put(x0);
1054                 }
1055         }
1056         if (x->id.spi) {
1057                 spin_lock_bh(&xfrm_state_lock);
1058                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1059                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1060                 xfrm_state_hold(x);
1061                 spin_unlock_bh(&xfrm_state_lock);
1062                 wake_up(&km_waitq);
1063         }
1064 }
1065 EXPORT_SYMBOL(xfrm_alloc_spi);
1066
1067 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1068                     void *data)
1069 {
1070         int i;
1071         struct xfrm_state *x;
1072         struct hlist_node *entry;
1073         int count = 0;
1074         int err = 0;
1075
1076         spin_lock_bh(&xfrm_state_lock);
1077         for (i = 0; i < XFRM_DST_HSIZE; i++) {
1078                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1079                         if (xfrm_id_proto_match(x->id.proto, proto))
1080                                 count++;
1081                 }
1082         }
1083         if (count == 0) {
1084                 err = -ENOENT;
1085                 goto out;
1086         }
1087
1088         for (i = 0; i < XFRM_DST_HSIZE; i++) {
1089                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1090                         if (!xfrm_id_proto_match(x->id.proto, proto))
1091                                 continue;
1092                         err = func(x, --count, data);
1093                         if (err)
1094                                 goto out;
1095                 }
1096         }
1097 out:
1098         spin_unlock_bh(&xfrm_state_lock);
1099         return err;
1100 }
1101 EXPORT_SYMBOL(xfrm_state_walk);
1102
1103
1104 void xfrm_replay_notify(struct xfrm_state *x, int event)
1105 {
1106         struct km_event c;
1107         /* we send notify messages in case
1108          *  1. we updated on of the sequence numbers, and the seqno difference
1109          *     is at least x->replay_maxdiff, in this case we also update the
1110          *     timeout of our timer function
1111          *  2. if x->replay_maxage has elapsed since last update,
1112          *     and there were changes
1113          *
1114          *  The state structure must be locked!
1115          */
1116
1117         switch (event) {
1118         case XFRM_REPLAY_UPDATE:
1119                 if (x->replay_maxdiff &&
1120                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1121                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1122                         if (x->xflags & XFRM_TIME_DEFER)
1123                                 event = XFRM_REPLAY_TIMEOUT;
1124                         else
1125                                 return;
1126                 }
1127
1128                 break;
1129
1130         case XFRM_REPLAY_TIMEOUT:
1131                 if ((x->replay.seq == x->preplay.seq) &&
1132                     (x->replay.bitmap == x->preplay.bitmap) &&
1133                     (x->replay.oseq == x->preplay.oseq)) {
1134                         x->xflags |= XFRM_TIME_DEFER;
1135                         return;
1136                 }
1137
1138                 break;
1139         }
1140
1141         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1142         c.event = XFRM_MSG_NEWAE;
1143         c.data.aevent = event;
1144         km_state_notify(x, &c);
1145
1146         if (x->replay_maxage &&
1147             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1148                 xfrm_state_hold(x);
1149                 x->xflags &= ~XFRM_TIME_DEFER;
1150         }
1151 }
1152 EXPORT_SYMBOL(xfrm_replay_notify);
1153
1154 static void xfrm_replay_timer_handler(unsigned long data)
1155 {
1156         struct xfrm_state *x = (struct xfrm_state*)data;
1157
1158         spin_lock(&x->lock);
1159
1160         if (x->km.state == XFRM_STATE_VALID) {
1161                 if (xfrm_aevent_is_on())
1162                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1163                 else
1164                         x->xflags |= XFRM_TIME_DEFER;
1165         }
1166
1167         spin_unlock(&x->lock);
1168         xfrm_state_put(x);
1169 }
1170
1171 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1172 {
1173         u32 diff;
1174
1175         seq = ntohl(seq);
1176
1177         if (unlikely(seq == 0))
1178                 return -EINVAL;
1179
1180         if (likely(seq > x->replay.seq))
1181                 return 0;
1182
1183         diff = x->replay.seq - seq;
1184         if (diff >= x->props.replay_window) {
1185                 x->stats.replay_window++;
1186                 return -EINVAL;
1187         }
1188
1189         if (x->replay.bitmap & (1U << diff)) {
1190                 x->stats.replay++;
1191                 return -EINVAL;
1192         }
1193         return 0;
1194 }
1195 EXPORT_SYMBOL(xfrm_replay_check);
1196
1197 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1198 {
1199         u32 diff;
1200
1201         seq = ntohl(seq);
1202
1203         if (seq > x->replay.seq) {
1204                 diff = seq - x->replay.seq;
1205                 if (diff < x->props.replay_window)
1206                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1207                 else
1208                         x->replay.bitmap = 1;
1209                 x->replay.seq = seq;
1210         } else {
1211                 diff = x->replay.seq - seq;
1212                 x->replay.bitmap |= (1U << diff);
1213         }
1214
1215         if (xfrm_aevent_is_on())
1216                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1217 }
1218 EXPORT_SYMBOL(xfrm_replay_advance);
1219
1220 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1221 static DEFINE_RWLOCK(xfrm_km_lock);
1222
1223 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1224 {
1225         struct xfrm_mgr *km;
1226
1227         read_lock(&xfrm_km_lock);
1228         list_for_each_entry(km, &xfrm_km_list, list)
1229                 if (km->notify_policy)
1230                         km->notify_policy(xp, dir, c);
1231         read_unlock(&xfrm_km_lock);
1232 }
1233
1234 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1235 {
1236         struct xfrm_mgr *km;
1237         read_lock(&xfrm_km_lock);
1238         list_for_each_entry(km, &xfrm_km_list, list)
1239                 if (km->notify)
1240                         km->notify(x, c);
1241         read_unlock(&xfrm_km_lock);
1242 }
1243
1244 EXPORT_SYMBOL(km_policy_notify);
1245 EXPORT_SYMBOL(km_state_notify);
1246
1247 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1248 {
1249         struct km_event c;
1250
1251         c.data.hard = hard;
1252         c.pid = pid;
1253         c.event = XFRM_MSG_EXPIRE;
1254         km_state_notify(x, &c);
1255
1256         if (hard)
1257                 wake_up(&km_waitq);
1258 }
1259
1260 EXPORT_SYMBOL(km_state_expired);
1261 /*
1262  * We send to all registered managers regardless of failure
1263  * We are happy with one success
1264 */
1265 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1266 {
1267         int err = -EINVAL, acqret;
1268         struct xfrm_mgr *km;
1269
1270         read_lock(&xfrm_km_lock);
1271         list_for_each_entry(km, &xfrm_km_list, list) {
1272                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1273                 if (!acqret)
1274                         err = acqret;
1275         }
1276         read_unlock(&xfrm_km_lock);
1277         return err;
1278 }
1279 EXPORT_SYMBOL(km_query);
1280
1281 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1282 {
1283         int err = -EINVAL;
1284         struct xfrm_mgr *km;
1285
1286         read_lock(&xfrm_km_lock);
1287         list_for_each_entry(km, &xfrm_km_list, list) {
1288                 if (km->new_mapping)
1289                         err = km->new_mapping(x, ipaddr, sport);
1290                 if (!err)
1291                         break;
1292         }
1293         read_unlock(&xfrm_km_lock);
1294         return err;
1295 }
1296 EXPORT_SYMBOL(km_new_mapping);
1297
1298 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1299 {
1300         struct km_event c;
1301
1302         c.data.hard = hard;
1303         c.pid = pid;
1304         c.event = XFRM_MSG_POLEXPIRE;
1305         km_policy_notify(pol, dir, &c);
1306
1307         if (hard)
1308                 wake_up(&km_waitq);
1309 }
1310 EXPORT_SYMBOL(km_policy_expired);
1311
1312 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1313 {
1314         int err = -EINVAL;
1315         int ret;
1316         struct xfrm_mgr *km;
1317
1318         read_lock(&xfrm_km_lock);
1319         list_for_each_entry(km, &xfrm_km_list, list) {
1320                 if (km->report) {
1321                         ret = km->report(proto, sel, addr);
1322                         if (!ret)
1323                                 err = ret;
1324                 }
1325         }
1326         read_unlock(&xfrm_km_lock);
1327         return err;
1328 }
1329 EXPORT_SYMBOL(km_report);
1330
1331 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1332 {
1333         int err;
1334         u8 *data;
1335         struct xfrm_mgr *km;
1336         struct xfrm_policy *pol = NULL;
1337
1338         if (optlen <= 0 || optlen > PAGE_SIZE)
1339                 return -EMSGSIZE;
1340
1341         data = kmalloc(optlen, GFP_KERNEL);
1342         if (!data)
1343                 return -ENOMEM;
1344
1345         err = -EFAULT;
1346         if (copy_from_user(data, optval, optlen))
1347                 goto out;
1348
1349         err = -EINVAL;
1350         read_lock(&xfrm_km_lock);
1351         list_for_each_entry(km, &xfrm_km_list, list) {
1352                 pol = km->compile_policy(sk, optname, data,
1353                                          optlen, &err);
1354                 if (err >= 0)
1355                         break;
1356         }
1357         read_unlock(&xfrm_km_lock);
1358
1359         if (err >= 0) {
1360                 xfrm_sk_policy_insert(sk, err, pol);
1361                 xfrm_pol_put(pol);
1362                 err = 0;
1363         }
1364
1365 out:
1366         kfree(data);
1367         return err;
1368 }
1369 EXPORT_SYMBOL(xfrm_user_policy);
1370
1371 int xfrm_register_km(struct xfrm_mgr *km)
1372 {
1373         write_lock_bh(&xfrm_km_lock);
1374         list_add_tail(&km->list, &xfrm_km_list);
1375         write_unlock_bh(&xfrm_km_lock);
1376         return 0;
1377 }
1378 EXPORT_SYMBOL(xfrm_register_km);
1379
1380 int xfrm_unregister_km(struct xfrm_mgr *km)
1381 {
1382         write_lock_bh(&xfrm_km_lock);
1383         list_del(&km->list);
1384         write_unlock_bh(&xfrm_km_lock);
1385         return 0;
1386 }
1387 EXPORT_SYMBOL(xfrm_unregister_km);
1388
1389 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1390 {
1391         int err = 0;
1392         if (unlikely(afinfo == NULL))
1393                 return -EINVAL;
1394         if (unlikely(afinfo->family >= NPROTO))
1395                 return -EAFNOSUPPORT;
1396         write_lock_bh(&xfrm_state_afinfo_lock);
1397         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1398                 err = -ENOBUFS;
1399         else
1400                 xfrm_state_afinfo[afinfo->family] = afinfo;
1401         write_unlock_bh(&xfrm_state_afinfo_lock);
1402         return err;
1403 }
1404 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1405
1406 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1407 {
1408         int err = 0;
1409         if (unlikely(afinfo == NULL))
1410                 return -EINVAL;
1411         if (unlikely(afinfo->family >= NPROTO))
1412                 return -EAFNOSUPPORT;
1413         write_lock_bh(&xfrm_state_afinfo_lock);
1414         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1415                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1416                         err = -EINVAL;
1417                 else
1418                         xfrm_state_afinfo[afinfo->family] = NULL;
1419         }
1420         write_unlock_bh(&xfrm_state_afinfo_lock);
1421         return err;
1422 }
1423 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1424
1425 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1426 {
1427         struct xfrm_state_afinfo *afinfo;
1428         if (unlikely(family >= NPROTO))
1429                 return NULL;
1430         read_lock(&xfrm_state_afinfo_lock);
1431         afinfo = xfrm_state_afinfo[family];
1432         if (unlikely(!afinfo))
1433                 read_unlock(&xfrm_state_afinfo_lock);
1434         return afinfo;
1435 }
1436
1437 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1438 {
1439         read_unlock(&xfrm_state_afinfo_lock);
1440 }
1441
1442 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1443 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1444 {
1445         if (x->tunnel) {
1446                 struct xfrm_state *t = x->tunnel;
1447
1448                 if (atomic_read(&t->tunnel_users) == 2)
1449                         xfrm_state_delete(t);
1450                 atomic_dec(&t->tunnel_users);
1451                 xfrm_state_put(t);
1452                 x->tunnel = NULL;
1453         }
1454 }
1455 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1456
1457 /*
1458  * This function is NOT optimal.  For example, with ESP it will give an
1459  * MTU that's usually two bytes short of being optimal.  However, it will
1460  * usually give an answer that's a multiple of 4 provided the input is
1461  * also a multiple of 4.
1462  */
1463 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1464 {
1465         int res = mtu;
1466
1467         res -= x->props.header_len;
1468
1469         for (;;) {
1470                 int m = res;
1471
1472                 if (m < 68)
1473                         return 68;
1474
1475                 spin_lock_bh(&x->lock);
1476                 if (x->km.state == XFRM_STATE_VALID &&
1477                     x->type && x->type->get_max_size)
1478                         m = x->type->get_max_size(x, m);
1479                 else
1480                         m += x->props.header_len;
1481                 spin_unlock_bh(&x->lock);
1482
1483                 if (m <= mtu)
1484                         break;
1485                 res -= (m - mtu);
1486         }
1487
1488         return res;
1489 }
1490
1491 int xfrm_init_state(struct xfrm_state *x)
1492 {
1493         struct xfrm_state_afinfo *afinfo;
1494         int family = x->props.family;
1495         int err;
1496
1497         err = -EAFNOSUPPORT;
1498         afinfo = xfrm_state_get_afinfo(family);
1499         if (!afinfo)
1500                 goto error;
1501
1502         err = 0;
1503         if (afinfo->init_flags)
1504                 err = afinfo->init_flags(x);
1505
1506         xfrm_state_put_afinfo(afinfo);
1507
1508         if (err)
1509                 goto error;
1510
1511         err = -EPROTONOSUPPORT;
1512         x->type = xfrm_get_type(x->id.proto, family);
1513         if (x->type == NULL)
1514                 goto error;
1515
1516         err = x->type->init_state(x);
1517         if (err)
1518                 goto error;
1519
1520         x->mode = xfrm_get_mode(x->props.mode, family);
1521         if (x->mode == NULL)
1522                 goto error;
1523
1524         x->km.state = XFRM_STATE_VALID;
1525
1526 error:
1527         return err;
1528 }
1529
1530 EXPORT_SYMBOL(xfrm_init_state);
1531  
1532 void __init xfrm_state_init(void)
1533 {
1534         int i;
1535
1536         for (i=0; i<XFRM_DST_HSIZE; i++) {
1537                 INIT_HLIST_HEAD(&xfrm_state_bydst[i]);
1538                 INIT_HLIST_HEAD(&xfrm_state_bysrc[i]);
1539                 INIT_HLIST_HEAD(&xfrm_state_byspi[i]);
1540         }
1541         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1542 }
1543