]> Pileus Git - ~andy/linux/blob - net/xfrm/xfrm_state.c
[XFRM]: Pull xfrm_state_by{spi,src} hash table knowledge out of afinfo.
[~andy/linux] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <asm/uaccess.h>
22
23 struct sock *xfrm_nl;
24 EXPORT_SYMBOL(xfrm_nl);
25
26 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
27 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
28
29 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
31
32 /* Each xfrm_state may be linked to two tables:
33
34    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
35    2. Hash table by daddr to find what SAs exist for given
36       destination/tunnel endpoint. (output)
37  */
38
39 static DEFINE_SPINLOCK(xfrm_state_lock);
40
41 #define XFRM_DST_HSIZE          1024
42
43 /* Hash table to find appropriate SA towards given target (endpoint
44  * of tunnel or destination of transport mode) allowed by selector.
45  *
46  * Main use is finding SA after policy selected tunnel or transport mode.
47  * Also, it can be used by ah/esp icmp error handler to find offending SA.
48  */
49 static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
50 static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE];
51 static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
52
53 static __inline__
54 unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
55 {
56         unsigned h;
57         h = ntohl(addr->a4);
58         h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
59         return h;
60 }
61
62 static __inline__
63 unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
64 {
65         unsigned h;
66         h = ntohl(addr->a6[2]^addr->a6[3]);
67         h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
68         return h;
69 }
70
71 static __inline__
72 unsigned __xfrm4_src_hash(xfrm_address_t *addr)
73 {
74         return __xfrm4_dst_hash(addr);
75 }
76
77 static __inline__
78 unsigned __xfrm6_src_hash(xfrm_address_t *addr)
79 {
80         return __xfrm6_dst_hash(addr);
81 }
82
83 static __inline__
84 unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
85 {
86         switch (family) {
87         case AF_INET:
88                 return __xfrm4_src_hash(addr);
89         case AF_INET6:
90                 return __xfrm6_src_hash(addr);
91         }
92         return 0;
93 }
94
95 static __inline__
96 unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
97 {
98         switch (family) {
99         case AF_INET:
100                 return __xfrm4_dst_hash(addr);
101         case AF_INET6:
102                 return __xfrm6_dst_hash(addr);
103         }
104         return 0;
105 }
106
107 static __inline__
108 unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
109 {
110         unsigned h;
111         h = ntohl(addr->a4^spi^proto);
112         h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
113         return h;
114 }
115
116 static __inline__
117 unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
118 {
119         unsigned h;
120         h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
121         h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
122         return h;
123 }
124
125 static __inline__
126 unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
127 {
128         switch (family) {
129         case AF_INET:
130                 return __xfrm4_spi_hash(addr, spi, proto);
131         case AF_INET6:
132                 return __xfrm6_spi_hash(addr, spi, proto);
133         }
134         return 0;       /*XXX*/
135 }
136
137 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
138 EXPORT_SYMBOL(km_waitq);
139
140 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
141 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
142
143 static struct work_struct xfrm_state_gc_work;
144 static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
145 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
146
147 static int xfrm_state_gc_flush_bundles;
148
149 int __xfrm_state_delete(struct xfrm_state *x);
150
151 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
152 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
153
154 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
155 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
156
157 static void xfrm_state_gc_destroy(struct xfrm_state *x)
158 {
159         if (del_timer(&x->timer))
160                 BUG();
161         if (del_timer(&x->rtimer))
162                 BUG();
163         kfree(x->aalg);
164         kfree(x->ealg);
165         kfree(x->calg);
166         kfree(x->encap);
167         kfree(x->coaddr);
168         if (x->mode)
169                 xfrm_put_mode(x->mode);
170         if (x->type) {
171                 x->type->destructor(x);
172                 xfrm_put_type(x->type);
173         }
174         security_xfrm_state_free(x);
175         kfree(x);
176 }
177
178 static void xfrm_state_gc_task(void *data)
179 {
180         struct xfrm_state *x;
181         struct list_head *entry, *tmp;
182         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
183
184         if (xfrm_state_gc_flush_bundles) {
185                 xfrm_state_gc_flush_bundles = 0;
186                 xfrm_flush_bundles();
187         }
188
189         spin_lock_bh(&xfrm_state_gc_lock);
190         list_splice_init(&xfrm_state_gc_list, &gc_list);
191         spin_unlock_bh(&xfrm_state_gc_lock);
192
193         list_for_each_safe(entry, tmp, &gc_list) {
194                 x = list_entry(entry, struct xfrm_state, bydst);
195                 xfrm_state_gc_destroy(x);
196         }
197         wake_up(&km_waitq);
198 }
199
200 static inline unsigned long make_jiffies(long secs)
201 {
202         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
203                 return MAX_SCHEDULE_TIMEOUT-1;
204         else
205                 return secs*HZ;
206 }
207
208 static void xfrm_timer_handler(unsigned long data)
209 {
210         struct xfrm_state *x = (struct xfrm_state*)data;
211         unsigned long now = (unsigned long)xtime.tv_sec;
212         long next = LONG_MAX;
213         int warn = 0;
214
215         spin_lock(&x->lock);
216         if (x->km.state == XFRM_STATE_DEAD)
217                 goto out;
218         if (x->km.state == XFRM_STATE_EXPIRED)
219                 goto expired;
220         if (x->lft.hard_add_expires_seconds) {
221                 long tmo = x->lft.hard_add_expires_seconds +
222                         x->curlft.add_time - now;
223                 if (tmo <= 0)
224                         goto expired;
225                 if (tmo < next)
226                         next = tmo;
227         }
228         if (x->lft.hard_use_expires_seconds) {
229                 long tmo = x->lft.hard_use_expires_seconds +
230                         (x->curlft.use_time ? : now) - now;
231                 if (tmo <= 0)
232                         goto expired;
233                 if (tmo < next)
234                         next = tmo;
235         }
236         if (x->km.dying)
237                 goto resched;
238         if (x->lft.soft_add_expires_seconds) {
239                 long tmo = x->lft.soft_add_expires_seconds +
240                         x->curlft.add_time - now;
241                 if (tmo <= 0)
242                         warn = 1;
243                 else if (tmo < next)
244                         next = tmo;
245         }
246         if (x->lft.soft_use_expires_seconds) {
247                 long tmo = x->lft.soft_use_expires_seconds +
248                         (x->curlft.use_time ? : now) - now;
249                 if (tmo <= 0)
250                         warn = 1;
251                 else if (tmo < next)
252                         next = tmo;
253         }
254
255         x->km.dying = warn;
256         if (warn)
257                 km_state_expired(x, 0, 0);
258 resched:
259         if (next != LONG_MAX &&
260             !mod_timer(&x->timer, jiffies + make_jiffies(next)))
261                 xfrm_state_hold(x);
262         goto out;
263
264 expired:
265         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
266                 x->km.state = XFRM_STATE_EXPIRED;
267                 wake_up(&km_waitq);
268                 next = 2;
269                 goto resched;
270         }
271         if (!__xfrm_state_delete(x) && x->id.spi)
272                 km_state_expired(x, 1, 0);
273
274 out:
275         spin_unlock(&x->lock);
276         xfrm_state_put(x);
277 }
278
279 static void xfrm_replay_timer_handler(unsigned long data);
280
281 struct xfrm_state *xfrm_state_alloc(void)
282 {
283         struct xfrm_state *x;
284
285         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
286
287         if (x) {
288                 atomic_set(&x->refcnt, 1);
289                 atomic_set(&x->tunnel_users, 0);
290                 INIT_LIST_HEAD(&x->bydst);
291                 INIT_LIST_HEAD(&x->bysrc);
292                 INIT_LIST_HEAD(&x->byspi);
293                 init_timer(&x->timer);
294                 x->timer.function = xfrm_timer_handler;
295                 x->timer.data     = (unsigned long)x;
296                 init_timer(&x->rtimer);
297                 x->rtimer.function = xfrm_replay_timer_handler;
298                 x->rtimer.data     = (unsigned long)x;
299                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
300                 x->lft.soft_byte_limit = XFRM_INF;
301                 x->lft.soft_packet_limit = XFRM_INF;
302                 x->lft.hard_byte_limit = XFRM_INF;
303                 x->lft.hard_packet_limit = XFRM_INF;
304                 x->replay_maxage = 0;
305                 x->replay_maxdiff = 0;
306                 spin_lock_init(&x->lock);
307         }
308         return x;
309 }
310 EXPORT_SYMBOL(xfrm_state_alloc);
311
312 void __xfrm_state_destroy(struct xfrm_state *x)
313 {
314         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
315
316         spin_lock_bh(&xfrm_state_gc_lock);
317         list_add(&x->bydst, &xfrm_state_gc_list);
318         spin_unlock_bh(&xfrm_state_gc_lock);
319         schedule_work(&xfrm_state_gc_work);
320 }
321 EXPORT_SYMBOL(__xfrm_state_destroy);
322
323 int __xfrm_state_delete(struct xfrm_state *x)
324 {
325         int err = -ESRCH;
326
327         if (x->km.state != XFRM_STATE_DEAD) {
328                 x->km.state = XFRM_STATE_DEAD;
329                 spin_lock(&xfrm_state_lock);
330                 list_del(&x->bydst);
331                 __xfrm_state_put(x);
332                 list_del(&x->bysrc);
333                 __xfrm_state_put(x);
334                 if (x->id.spi) {
335                         list_del(&x->byspi);
336                         __xfrm_state_put(x);
337                 }
338                 spin_unlock(&xfrm_state_lock);
339                 if (del_timer(&x->timer))
340                         __xfrm_state_put(x);
341                 if (del_timer(&x->rtimer))
342                         __xfrm_state_put(x);
343
344                 /* The number two in this test is the reference
345                  * mentioned in the comment below plus the reference
346                  * our caller holds.  A larger value means that
347                  * there are DSTs attached to this xfrm_state.
348                  */
349                 if (atomic_read(&x->refcnt) > 2) {
350                         xfrm_state_gc_flush_bundles = 1;
351                         schedule_work(&xfrm_state_gc_work);
352                 }
353
354                 /* All xfrm_state objects are created by xfrm_state_alloc.
355                  * The xfrm_state_alloc call gives a reference, and that
356                  * is what we are dropping here.
357                  */
358                 __xfrm_state_put(x);
359                 err = 0;
360         }
361
362         return err;
363 }
364 EXPORT_SYMBOL(__xfrm_state_delete);
365
366 int xfrm_state_delete(struct xfrm_state *x)
367 {
368         int err;
369
370         spin_lock_bh(&x->lock);
371         err = __xfrm_state_delete(x);
372         spin_unlock_bh(&x->lock);
373
374         return err;
375 }
376 EXPORT_SYMBOL(xfrm_state_delete);
377
378 void xfrm_state_flush(u8 proto)
379 {
380         int i;
381         struct xfrm_state *x;
382
383         spin_lock_bh(&xfrm_state_lock);
384         for (i = 0; i < XFRM_DST_HSIZE; i++) {
385 restart:
386                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
387                         if (!xfrm_state_kern(x) &&
388                             xfrm_id_proto_match(x->id.proto, proto)) {
389                                 xfrm_state_hold(x);
390                                 spin_unlock_bh(&xfrm_state_lock);
391
392                                 xfrm_state_delete(x);
393                                 xfrm_state_put(x);
394
395                                 spin_lock_bh(&xfrm_state_lock);
396                                 goto restart;
397                         }
398                 }
399         }
400         spin_unlock_bh(&xfrm_state_lock);
401         wake_up(&km_waitq);
402 }
403 EXPORT_SYMBOL(xfrm_state_flush);
404
405 static int
406 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
407                   struct xfrm_tmpl *tmpl,
408                   xfrm_address_t *daddr, xfrm_address_t *saddr,
409                   unsigned short family)
410 {
411         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
412         if (!afinfo)
413                 return -1;
414         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
415         xfrm_state_put_afinfo(afinfo);
416         return 0;
417 }
418
419 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
420 {
421         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
422         struct xfrm_state *x;
423
424         list_for_each_entry(x, xfrm_state_byspi+h, byspi) {
425                 if (x->props.family != family ||
426                     x->id.spi       != spi ||
427                     x->id.proto     != proto)
428                         continue;
429
430                 switch (family) {
431                 case AF_INET:
432                         if (x->id.daddr.a4 != daddr->a4)
433                                 continue;
434                         break;
435                 case AF_INET6:
436                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
437                                              (struct in6_addr *)
438                                              x->id.daddr.a6))
439                                 continue;
440                         break;
441                 };
442
443                 xfrm_state_hold(x);
444                 return x;
445         }
446
447         return NULL;
448 }
449
450 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
451 {
452         unsigned int h = xfrm_src_hash(saddr, family);
453         struct xfrm_state *x;
454
455         list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) {
456                 if (x->props.family != family ||
457                     x->id.proto     != proto)
458                         continue;
459
460                 switch (family) {
461                 case AF_INET:
462                         if (x->id.daddr.a4 != daddr->a4 ||
463                             x->props.saddr.a4 != saddr->a4)
464                                 continue;
465                         break;
466                 case AF_INET6:
467                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
468                                              (struct in6_addr *)
469                                              x->id.daddr.a6) ||
470                             !ipv6_addr_equal((struct in6_addr *)saddr,
471                                              (struct in6_addr *)
472                                              x->props.saddr.a6))
473                                 continue;
474                         break;
475                 };
476
477                 xfrm_state_hold(x);
478                 return x;
479         }
480
481         return NULL;
482 }
483
484 static inline struct xfrm_state *
485 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
486 {
487         if (use_spi)
488                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
489                                            x->id.proto, family);
490         else
491                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
492                                                   &x->props.saddr,
493                                                   x->id.proto, family);
494 }
495
496 struct xfrm_state *
497 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
498                 struct flowi *fl, struct xfrm_tmpl *tmpl,
499                 struct xfrm_policy *pol, int *err,
500                 unsigned short family)
501 {
502         unsigned h = xfrm_dst_hash(daddr, family);
503         struct xfrm_state *x, *x0;
504         int acquire_in_progress = 0;
505         int error = 0;
506         struct xfrm_state *best = NULL;
507         
508         spin_lock_bh(&xfrm_state_lock);
509         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
510                 if (x->props.family == family &&
511                     x->props.reqid == tmpl->reqid &&
512                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
513                     xfrm_state_addr_check(x, daddr, saddr, family) &&
514                     tmpl->mode == x->props.mode &&
515                     tmpl->id.proto == x->id.proto &&
516                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
517                         /* Resolution logic:
518                            1. There is a valid state with matching selector.
519                               Done.
520                            2. Valid state with inappropriate selector. Skip.
521
522                            Entering area of "sysdeps".
523
524                            3. If state is not valid, selector is temporary,
525                               it selects only session which triggered
526                               previous resolution. Key manager will do
527                               something to install a state with proper
528                               selector.
529                          */
530                         if (x->km.state == XFRM_STATE_VALID) {
531                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
532                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
533                                         continue;
534                                 if (!best ||
535                                     best->km.dying > x->km.dying ||
536                                     (best->km.dying == x->km.dying &&
537                                      best->curlft.add_time < x->curlft.add_time))
538                                         best = x;
539                         } else if (x->km.state == XFRM_STATE_ACQ) {
540                                 acquire_in_progress = 1;
541                         } else if (x->km.state == XFRM_STATE_ERROR ||
542                                    x->km.state == XFRM_STATE_EXPIRED) {
543                                 if (xfrm_selector_match(&x->sel, fl, family) &&
544                                     security_xfrm_state_pol_flow_match(x, pol, fl))
545                                         error = -ESRCH;
546                         }
547                 }
548         }
549
550         x = best;
551         if (!x && !error && !acquire_in_progress) {
552                 if (tmpl->id.spi &&
553                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
554                                               tmpl->id.proto, family)) != NULL) {
555                         xfrm_state_put(x0);
556                         error = -EEXIST;
557                         goto out;
558                 }
559                 x = xfrm_state_alloc();
560                 if (x == NULL) {
561                         error = -ENOMEM;
562                         goto out;
563                 }
564                 /* Initialize temporary selector matching only
565                  * to current session. */
566                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
567
568                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
569                 if (error) {
570                         x->km.state = XFRM_STATE_DEAD;
571                         xfrm_state_put(x);
572                         x = NULL;
573                         goto out;
574                 }
575
576                 if (km_query(x, tmpl, pol) == 0) {
577                         x->km.state = XFRM_STATE_ACQ;
578                         list_add_tail(&x->bydst, xfrm_state_bydst+h);
579                         xfrm_state_hold(x);
580                         list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
581                         xfrm_state_hold(x);
582                         if (x->id.spi) {
583                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
584                                 list_add(&x->byspi, xfrm_state_byspi+h);
585                                 xfrm_state_hold(x);
586                         }
587                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
588                         xfrm_state_hold(x);
589                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
590                         add_timer(&x->timer);
591                 } else {
592                         x->km.state = XFRM_STATE_DEAD;
593                         xfrm_state_put(x);
594                         x = NULL;
595                         error = -ESRCH;
596                 }
597         }
598 out:
599         if (x)
600                 xfrm_state_hold(x);
601         else
602                 *err = acquire_in_progress ? -EAGAIN : error;
603         spin_unlock_bh(&xfrm_state_lock);
604         return x;
605 }
606
607 static void __xfrm_state_insert(struct xfrm_state *x)
608 {
609         unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
610
611         list_add(&x->bydst, xfrm_state_bydst+h);
612         xfrm_state_hold(x);
613
614         h = xfrm_src_hash(&x->props.saddr, x->props.family);
615
616         list_add(&x->bysrc, xfrm_state_bysrc+h);
617         xfrm_state_hold(x);
618
619         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
620                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
621                                   x->props.family);
622
623                 list_add(&x->byspi, xfrm_state_byspi+h);
624                 xfrm_state_hold(x);
625         }
626
627         if (!mod_timer(&x->timer, jiffies + HZ))
628                 xfrm_state_hold(x);
629
630         if (x->replay_maxage &&
631             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
632                 xfrm_state_hold(x);
633
634         wake_up(&km_waitq);
635 }
636
637 void xfrm_state_insert(struct xfrm_state *x)
638 {
639         spin_lock_bh(&xfrm_state_lock);
640         __xfrm_state_insert(x);
641         spin_unlock_bh(&xfrm_state_lock);
642
643         xfrm_flush_all_bundles();
644 }
645 EXPORT_SYMBOL(xfrm_state_insert);
646
647 /* xfrm_state_lock is held */
648 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
649 {
650         unsigned int h = xfrm_dst_hash(daddr, family);
651         struct xfrm_state *x;
652
653         list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
654                 if (x->props.reqid  != reqid ||
655                     x->props.mode   != mode ||
656                     x->props.family != family ||
657                     x->km.state     != XFRM_STATE_ACQ ||
658                     x->id.spi       != 0)
659                         continue;
660
661                 switch (family) {
662                 case AF_INET:
663                         if (x->id.daddr.a4    != daddr->a4 ||
664                             x->props.saddr.a4 != saddr->a4)
665                                 continue;
666                         break;
667                 case AF_INET6:
668                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
669                                              (struct in6_addr *)daddr) ||
670                             !ipv6_addr_equal((struct in6_addr *)
671                                              x->props.saddr.a6,
672                                              (struct in6_addr *)saddr))
673                                 continue;
674                         break;
675                 };
676
677                 xfrm_state_hold(x);
678                 return x;
679         }
680
681         if (!create)
682                 return NULL;
683
684         x = xfrm_state_alloc();
685         if (likely(x)) {
686                 switch (family) {
687                 case AF_INET:
688                         x->sel.daddr.a4 = daddr->a4;
689                         x->sel.saddr.a4 = saddr->a4;
690                         x->sel.prefixlen_d = 32;
691                         x->sel.prefixlen_s = 32;
692                         x->props.saddr.a4 = saddr->a4;
693                         x->id.daddr.a4 = daddr->a4;
694                         break;
695
696                 case AF_INET6:
697                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
698                                        (struct in6_addr *)daddr);
699                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
700                                        (struct in6_addr *)saddr);
701                         x->sel.prefixlen_d = 128;
702                         x->sel.prefixlen_s = 128;
703                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
704                                        (struct in6_addr *)saddr);
705                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
706                                        (struct in6_addr *)daddr);
707                         break;
708                 };
709
710                 x->km.state = XFRM_STATE_ACQ;
711                 x->id.proto = proto;
712                 x->props.family = family;
713                 x->props.mode = mode;
714                 x->props.reqid = reqid;
715                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
716                 xfrm_state_hold(x);
717                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
718                 add_timer(&x->timer);
719                 xfrm_state_hold(x);
720                 list_add_tail(&x->bydst, xfrm_state_bydst+h);
721                 h = xfrm_src_hash(saddr, family);
722                 xfrm_state_hold(x);
723                 list_add_tail(&x->bysrc, xfrm_state_bysrc+h);
724                 wake_up(&km_waitq);
725         }
726
727         return x;
728 }
729
730 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
731
732 int xfrm_state_add(struct xfrm_state *x)
733 {
734         struct xfrm_state *x1;
735         int family;
736         int err;
737         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
738
739         family = x->props.family;
740
741         spin_lock_bh(&xfrm_state_lock);
742
743         x1 = __xfrm_state_locate(x, use_spi, family);
744         if (x1) {
745                 xfrm_state_put(x1);
746                 x1 = NULL;
747                 err = -EEXIST;
748                 goto out;
749         }
750
751         if (use_spi && x->km.seq) {
752                 x1 = __xfrm_find_acq_byseq(x->km.seq);
753                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
754                         xfrm_state_put(x1);
755                         x1 = NULL;
756                 }
757         }
758
759         if (use_spi && !x1)
760                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
761                                      x->id.proto,
762                                      &x->id.daddr, &x->props.saddr, 0);
763
764         __xfrm_state_insert(x);
765         err = 0;
766
767 out:
768         spin_unlock_bh(&xfrm_state_lock);
769
770         if (!err)
771                 xfrm_flush_all_bundles();
772
773         if (x1) {
774                 xfrm_state_delete(x1);
775                 xfrm_state_put(x1);
776         }
777
778         return err;
779 }
780 EXPORT_SYMBOL(xfrm_state_add);
781
782 int xfrm_state_update(struct xfrm_state *x)
783 {
784         struct xfrm_state *x1;
785         int err;
786         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
787
788         spin_lock_bh(&xfrm_state_lock);
789         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
790
791         err = -ESRCH;
792         if (!x1)
793                 goto out;
794
795         if (xfrm_state_kern(x1)) {
796                 xfrm_state_put(x1);
797                 err = -EEXIST;
798                 goto out;
799         }
800
801         if (x1->km.state == XFRM_STATE_ACQ) {
802                 __xfrm_state_insert(x);
803                 x = NULL;
804         }
805         err = 0;
806
807 out:
808         spin_unlock_bh(&xfrm_state_lock);
809
810         if (err)
811                 return err;
812
813         if (!x) {
814                 xfrm_state_delete(x1);
815                 xfrm_state_put(x1);
816                 return 0;
817         }
818
819         err = -EINVAL;
820         spin_lock_bh(&x1->lock);
821         if (likely(x1->km.state == XFRM_STATE_VALID)) {
822                 if (x->encap && x1->encap)
823                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
824                 if (x->coaddr && x1->coaddr) {
825                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
826                 }
827                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
828                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
829                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
830                 x1->km.dying = 0;
831
832                 if (!mod_timer(&x1->timer, jiffies + HZ))
833                         xfrm_state_hold(x1);
834                 if (x1->curlft.use_time)
835                         xfrm_state_check_expire(x1);
836
837                 err = 0;
838         }
839         spin_unlock_bh(&x1->lock);
840
841         xfrm_state_put(x1);
842
843         return err;
844 }
845 EXPORT_SYMBOL(xfrm_state_update);
846
847 int xfrm_state_check_expire(struct xfrm_state *x)
848 {
849         if (!x->curlft.use_time)
850                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
851
852         if (x->km.state != XFRM_STATE_VALID)
853                 return -EINVAL;
854
855         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
856             x->curlft.packets >= x->lft.hard_packet_limit) {
857                 x->km.state = XFRM_STATE_EXPIRED;
858                 if (!mod_timer(&x->timer, jiffies))
859                         xfrm_state_hold(x);
860                 return -EINVAL;
861         }
862
863         if (!x->km.dying &&
864             (x->curlft.bytes >= x->lft.soft_byte_limit ||
865              x->curlft.packets >= x->lft.soft_packet_limit)) {
866                 x->km.dying = 1;
867                 km_state_expired(x, 0, 0);
868         }
869         return 0;
870 }
871 EXPORT_SYMBOL(xfrm_state_check_expire);
872
873 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
874 {
875         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
876                 - skb_headroom(skb);
877
878         if (nhead > 0)
879                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
880
881         /* Check tail too... */
882         return 0;
883 }
884
885 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
886 {
887         int err = xfrm_state_check_expire(x);
888         if (err < 0)
889                 goto err;
890         err = xfrm_state_check_space(x, skb);
891 err:
892         return err;
893 }
894 EXPORT_SYMBOL(xfrm_state_check);
895
896 struct xfrm_state *
897 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
898                   unsigned short family)
899 {
900         struct xfrm_state *x;
901
902         spin_lock_bh(&xfrm_state_lock);
903         x = __xfrm_state_lookup(daddr, spi, proto, family);
904         spin_unlock_bh(&xfrm_state_lock);
905         return x;
906 }
907 EXPORT_SYMBOL(xfrm_state_lookup);
908
909 struct xfrm_state *
910 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
911                          u8 proto, unsigned short family)
912 {
913         struct xfrm_state *x;
914
915         spin_lock_bh(&xfrm_state_lock);
916         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
917         spin_unlock_bh(&xfrm_state_lock);
918         return x;
919 }
920 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
921
922 struct xfrm_state *
923 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
924               xfrm_address_t *daddr, xfrm_address_t *saddr, 
925               int create, unsigned short family)
926 {
927         struct xfrm_state *x;
928
929         spin_lock_bh(&xfrm_state_lock);
930         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
931         spin_unlock_bh(&xfrm_state_lock);
932
933         return x;
934 }
935 EXPORT_SYMBOL(xfrm_find_acq);
936
937 #ifdef CONFIG_XFRM_SUB_POLICY
938 int
939 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
940                unsigned short family)
941 {
942         int err = 0;
943         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
944         if (!afinfo)
945                 return -EAFNOSUPPORT;
946
947         spin_lock_bh(&xfrm_state_lock);
948         if (afinfo->tmpl_sort)
949                 err = afinfo->tmpl_sort(dst, src, n);
950         spin_unlock_bh(&xfrm_state_lock);
951         xfrm_state_put_afinfo(afinfo);
952         return err;
953 }
954 EXPORT_SYMBOL(xfrm_tmpl_sort);
955
956 int
957 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
958                 unsigned short family)
959 {
960         int err = 0;
961         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
962         if (!afinfo)
963                 return -EAFNOSUPPORT;
964
965         spin_lock_bh(&xfrm_state_lock);
966         if (afinfo->state_sort)
967                 err = afinfo->state_sort(dst, src, n);
968         spin_unlock_bh(&xfrm_state_lock);
969         xfrm_state_put_afinfo(afinfo);
970         return err;
971 }
972 EXPORT_SYMBOL(xfrm_state_sort);
973 #endif
974
975 /* Silly enough, but I'm lazy to build resolution list */
976
977 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
978 {
979         int i;
980         struct xfrm_state *x;
981
982         for (i = 0; i < XFRM_DST_HSIZE; i++) {
983                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
984                         if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) {
985                                 xfrm_state_hold(x);
986                                 return x;
987                         }
988                 }
989         }
990         return NULL;
991 }
992
993 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
994 {
995         struct xfrm_state *x;
996
997         spin_lock_bh(&xfrm_state_lock);
998         x = __xfrm_find_acq_byseq(seq);
999         spin_unlock_bh(&xfrm_state_lock);
1000         return x;
1001 }
1002 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1003
1004 u32 xfrm_get_acqseq(void)
1005 {
1006         u32 res;
1007         static u32 acqseq;
1008         static DEFINE_SPINLOCK(acqseq_lock);
1009
1010         spin_lock_bh(&acqseq_lock);
1011         res = (++acqseq ? : ++acqseq);
1012         spin_unlock_bh(&acqseq_lock);
1013         return res;
1014 }
1015 EXPORT_SYMBOL(xfrm_get_acqseq);
1016
1017 void
1018 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1019 {
1020         u32 h;
1021         struct xfrm_state *x0;
1022
1023         if (x->id.spi)
1024                 return;
1025
1026         if (minspi == maxspi) {
1027                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1028                 if (x0) {
1029                         xfrm_state_put(x0);
1030                         return;
1031                 }
1032                 x->id.spi = minspi;
1033         } else {
1034                 u32 spi = 0;
1035                 minspi = ntohl(minspi);
1036                 maxspi = ntohl(maxspi);
1037                 for (h=0; h<maxspi-minspi+1; h++) {
1038                         spi = minspi + net_random()%(maxspi-minspi+1);
1039                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1040                         if (x0 == NULL) {
1041                                 x->id.spi = htonl(spi);
1042                                 break;
1043                         }
1044                         xfrm_state_put(x0);
1045                 }
1046         }
1047         if (x->id.spi) {
1048                 spin_lock_bh(&xfrm_state_lock);
1049                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1050                 list_add(&x->byspi, xfrm_state_byspi+h);
1051                 xfrm_state_hold(x);
1052                 spin_unlock_bh(&xfrm_state_lock);
1053                 wake_up(&km_waitq);
1054         }
1055 }
1056 EXPORT_SYMBOL(xfrm_alloc_spi);
1057
1058 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1059                     void *data)
1060 {
1061         int i;
1062         struct xfrm_state *x;
1063         int count = 0;
1064         int err = 0;
1065
1066         spin_lock_bh(&xfrm_state_lock);
1067         for (i = 0; i < XFRM_DST_HSIZE; i++) {
1068                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
1069                         if (xfrm_id_proto_match(x->id.proto, proto))
1070                                 count++;
1071                 }
1072         }
1073         if (count == 0) {
1074                 err = -ENOENT;
1075                 goto out;
1076         }
1077
1078         for (i = 0; i < XFRM_DST_HSIZE; i++) {
1079                 list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
1080                         if (!xfrm_id_proto_match(x->id.proto, proto))
1081                                 continue;
1082                         err = func(x, --count, data);
1083                         if (err)
1084                                 goto out;
1085                 }
1086         }
1087 out:
1088         spin_unlock_bh(&xfrm_state_lock);
1089         return err;
1090 }
1091 EXPORT_SYMBOL(xfrm_state_walk);
1092
1093
1094 void xfrm_replay_notify(struct xfrm_state *x, int event)
1095 {
1096         struct km_event c;
1097         /* we send notify messages in case
1098          *  1. we updated on of the sequence numbers, and the seqno difference
1099          *     is at least x->replay_maxdiff, in this case we also update the
1100          *     timeout of our timer function
1101          *  2. if x->replay_maxage has elapsed since last update,
1102          *     and there were changes
1103          *
1104          *  The state structure must be locked!
1105          */
1106
1107         switch (event) {
1108         case XFRM_REPLAY_UPDATE:
1109                 if (x->replay_maxdiff &&
1110                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1111                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1112                         if (x->xflags & XFRM_TIME_DEFER)
1113                                 event = XFRM_REPLAY_TIMEOUT;
1114                         else
1115                                 return;
1116                 }
1117
1118                 break;
1119
1120         case XFRM_REPLAY_TIMEOUT:
1121                 if ((x->replay.seq == x->preplay.seq) &&
1122                     (x->replay.bitmap == x->preplay.bitmap) &&
1123                     (x->replay.oseq == x->preplay.oseq)) {
1124                         x->xflags |= XFRM_TIME_DEFER;
1125                         return;
1126                 }
1127
1128                 break;
1129         }
1130
1131         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1132         c.event = XFRM_MSG_NEWAE;
1133         c.data.aevent = event;
1134         km_state_notify(x, &c);
1135
1136         if (x->replay_maxage &&
1137             !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1138                 xfrm_state_hold(x);
1139                 x->xflags &= ~XFRM_TIME_DEFER;
1140         }
1141 }
1142 EXPORT_SYMBOL(xfrm_replay_notify);
1143
1144 static void xfrm_replay_timer_handler(unsigned long data)
1145 {
1146         struct xfrm_state *x = (struct xfrm_state*)data;
1147
1148         spin_lock(&x->lock);
1149
1150         if (x->km.state == XFRM_STATE_VALID) {
1151                 if (xfrm_aevent_is_on())
1152                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1153                 else
1154                         x->xflags |= XFRM_TIME_DEFER;
1155         }
1156
1157         spin_unlock(&x->lock);
1158         xfrm_state_put(x);
1159 }
1160
1161 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1162 {
1163         u32 diff;
1164
1165         seq = ntohl(seq);
1166
1167         if (unlikely(seq == 0))
1168                 return -EINVAL;
1169
1170         if (likely(seq > x->replay.seq))
1171                 return 0;
1172
1173         diff = x->replay.seq - seq;
1174         if (diff >= x->props.replay_window) {
1175                 x->stats.replay_window++;
1176                 return -EINVAL;
1177         }
1178
1179         if (x->replay.bitmap & (1U << diff)) {
1180                 x->stats.replay++;
1181                 return -EINVAL;
1182         }
1183         return 0;
1184 }
1185 EXPORT_SYMBOL(xfrm_replay_check);
1186
1187 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1188 {
1189         u32 diff;
1190
1191         seq = ntohl(seq);
1192
1193         if (seq > x->replay.seq) {
1194                 diff = seq - x->replay.seq;
1195                 if (diff < x->props.replay_window)
1196                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1197                 else
1198                         x->replay.bitmap = 1;
1199                 x->replay.seq = seq;
1200         } else {
1201                 diff = x->replay.seq - seq;
1202                 x->replay.bitmap |= (1U << diff);
1203         }
1204
1205         if (xfrm_aevent_is_on())
1206                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1207 }
1208 EXPORT_SYMBOL(xfrm_replay_advance);
1209
1210 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1211 static DEFINE_RWLOCK(xfrm_km_lock);
1212
1213 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1214 {
1215         struct xfrm_mgr *km;
1216
1217         read_lock(&xfrm_km_lock);
1218         list_for_each_entry(km, &xfrm_km_list, list)
1219                 if (km->notify_policy)
1220                         km->notify_policy(xp, dir, c);
1221         read_unlock(&xfrm_km_lock);
1222 }
1223
1224 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1225 {
1226         struct xfrm_mgr *km;
1227         read_lock(&xfrm_km_lock);
1228         list_for_each_entry(km, &xfrm_km_list, list)
1229                 if (km->notify)
1230                         km->notify(x, c);
1231         read_unlock(&xfrm_km_lock);
1232 }
1233
1234 EXPORT_SYMBOL(km_policy_notify);
1235 EXPORT_SYMBOL(km_state_notify);
1236
1237 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1238 {
1239         struct km_event c;
1240
1241         c.data.hard = hard;
1242         c.pid = pid;
1243         c.event = XFRM_MSG_EXPIRE;
1244         km_state_notify(x, &c);
1245
1246         if (hard)
1247                 wake_up(&km_waitq);
1248 }
1249
1250 EXPORT_SYMBOL(km_state_expired);
1251 /*
1252  * We send to all registered managers regardless of failure
1253  * We are happy with one success
1254 */
1255 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1256 {
1257         int err = -EINVAL, acqret;
1258         struct xfrm_mgr *km;
1259
1260         read_lock(&xfrm_km_lock);
1261         list_for_each_entry(km, &xfrm_km_list, list) {
1262                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1263                 if (!acqret)
1264                         err = acqret;
1265         }
1266         read_unlock(&xfrm_km_lock);
1267         return err;
1268 }
1269 EXPORT_SYMBOL(km_query);
1270
1271 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1272 {
1273         int err = -EINVAL;
1274         struct xfrm_mgr *km;
1275
1276         read_lock(&xfrm_km_lock);
1277         list_for_each_entry(km, &xfrm_km_list, list) {
1278                 if (km->new_mapping)
1279                         err = km->new_mapping(x, ipaddr, sport);
1280                 if (!err)
1281                         break;
1282         }
1283         read_unlock(&xfrm_km_lock);
1284         return err;
1285 }
1286 EXPORT_SYMBOL(km_new_mapping);
1287
1288 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1289 {
1290         struct km_event c;
1291
1292         c.data.hard = hard;
1293         c.pid = pid;
1294         c.event = XFRM_MSG_POLEXPIRE;
1295         km_policy_notify(pol, dir, &c);
1296
1297         if (hard)
1298                 wake_up(&km_waitq);
1299 }
1300 EXPORT_SYMBOL(km_policy_expired);
1301
1302 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1303 {
1304         int err = -EINVAL;
1305         int ret;
1306         struct xfrm_mgr *km;
1307
1308         read_lock(&xfrm_km_lock);
1309         list_for_each_entry(km, &xfrm_km_list, list) {
1310                 if (km->report) {
1311                         ret = km->report(proto, sel, addr);
1312                         if (!ret)
1313                                 err = ret;
1314                 }
1315         }
1316         read_unlock(&xfrm_km_lock);
1317         return err;
1318 }
1319 EXPORT_SYMBOL(km_report);
1320
1321 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1322 {
1323         int err;
1324         u8 *data;
1325         struct xfrm_mgr *km;
1326         struct xfrm_policy *pol = NULL;
1327
1328         if (optlen <= 0 || optlen > PAGE_SIZE)
1329                 return -EMSGSIZE;
1330
1331         data = kmalloc(optlen, GFP_KERNEL);
1332         if (!data)
1333                 return -ENOMEM;
1334
1335         err = -EFAULT;
1336         if (copy_from_user(data, optval, optlen))
1337                 goto out;
1338
1339         err = -EINVAL;
1340         read_lock(&xfrm_km_lock);
1341         list_for_each_entry(km, &xfrm_km_list, list) {
1342                 pol = km->compile_policy(sk, optname, data,
1343                                          optlen, &err);
1344                 if (err >= 0)
1345                         break;
1346         }
1347         read_unlock(&xfrm_km_lock);
1348
1349         if (err >= 0) {
1350                 xfrm_sk_policy_insert(sk, err, pol);
1351                 xfrm_pol_put(pol);
1352                 err = 0;
1353         }
1354
1355 out:
1356         kfree(data);
1357         return err;
1358 }
1359 EXPORT_SYMBOL(xfrm_user_policy);
1360
1361 int xfrm_register_km(struct xfrm_mgr *km)
1362 {
1363         write_lock_bh(&xfrm_km_lock);
1364         list_add_tail(&km->list, &xfrm_km_list);
1365         write_unlock_bh(&xfrm_km_lock);
1366         return 0;
1367 }
1368 EXPORT_SYMBOL(xfrm_register_km);
1369
1370 int xfrm_unregister_km(struct xfrm_mgr *km)
1371 {
1372         write_lock_bh(&xfrm_km_lock);
1373         list_del(&km->list);
1374         write_unlock_bh(&xfrm_km_lock);
1375         return 0;
1376 }
1377 EXPORT_SYMBOL(xfrm_unregister_km);
1378
1379 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1380 {
1381         int err = 0;
1382         if (unlikely(afinfo == NULL))
1383                 return -EINVAL;
1384         if (unlikely(afinfo->family >= NPROTO))
1385                 return -EAFNOSUPPORT;
1386         write_lock_bh(&xfrm_state_afinfo_lock);
1387         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1388                 err = -ENOBUFS;
1389         else
1390                 xfrm_state_afinfo[afinfo->family] = afinfo;
1391         write_unlock_bh(&xfrm_state_afinfo_lock);
1392         return err;
1393 }
1394 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1395
1396 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1397 {
1398         int err = 0;
1399         if (unlikely(afinfo == NULL))
1400                 return -EINVAL;
1401         if (unlikely(afinfo->family >= NPROTO))
1402                 return -EAFNOSUPPORT;
1403         write_lock_bh(&xfrm_state_afinfo_lock);
1404         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1405                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1406                         err = -EINVAL;
1407                 else
1408                         xfrm_state_afinfo[afinfo->family] = NULL;
1409         }
1410         write_unlock_bh(&xfrm_state_afinfo_lock);
1411         return err;
1412 }
1413 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1414
1415 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1416 {
1417         struct xfrm_state_afinfo *afinfo;
1418         if (unlikely(family >= NPROTO))
1419                 return NULL;
1420         read_lock(&xfrm_state_afinfo_lock);
1421         afinfo = xfrm_state_afinfo[family];
1422         if (unlikely(!afinfo))
1423                 read_unlock(&xfrm_state_afinfo_lock);
1424         return afinfo;
1425 }
1426
1427 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1428 {
1429         read_unlock(&xfrm_state_afinfo_lock);
1430 }
1431
1432 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1433 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1434 {
1435         if (x->tunnel) {
1436                 struct xfrm_state *t = x->tunnel;
1437
1438                 if (atomic_read(&t->tunnel_users) == 2)
1439                         xfrm_state_delete(t);
1440                 atomic_dec(&t->tunnel_users);
1441                 xfrm_state_put(t);
1442                 x->tunnel = NULL;
1443         }
1444 }
1445 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1446
1447 /*
1448  * This function is NOT optimal.  For example, with ESP it will give an
1449  * MTU that's usually two bytes short of being optimal.  However, it will
1450  * usually give an answer that's a multiple of 4 provided the input is
1451  * also a multiple of 4.
1452  */
1453 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1454 {
1455         int res = mtu;
1456
1457         res -= x->props.header_len;
1458
1459         for (;;) {
1460                 int m = res;
1461
1462                 if (m < 68)
1463                         return 68;
1464
1465                 spin_lock_bh(&x->lock);
1466                 if (x->km.state == XFRM_STATE_VALID &&
1467                     x->type && x->type->get_max_size)
1468                         m = x->type->get_max_size(x, m);
1469                 else
1470                         m += x->props.header_len;
1471                 spin_unlock_bh(&x->lock);
1472
1473                 if (m <= mtu)
1474                         break;
1475                 res -= (m - mtu);
1476         }
1477
1478         return res;
1479 }
1480
1481 int xfrm_init_state(struct xfrm_state *x)
1482 {
1483         struct xfrm_state_afinfo *afinfo;
1484         int family = x->props.family;
1485         int err;
1486
1487         err = -EAFNOSUPPORT;
1488         afinfo = xfrm_state_get_afinfo(family);
1489         if (!afinfo)
1490                 goto error;
1491
1492         err = 0;
1493         if (afinfo->init_flags)
1494                 err = afinfo->init_flags(x);
1495
1496         xfrm_state_put_afinfo(afinfo);
1497
1498         if (err)
1499                 goto error;
1500
1501         err = -EPROTONOSUPPORT;
1502         x->type = xfrm_get_type(x->id.proto, family);
1503         if (x->type == NULL)
1504                 goto error;
1505
1506         err = x->type->init_state(x);
1507         if (err)
1508                 goto error;
1509
1510         x->mode = xfrm_get_mode(x->props.mode, family);
1511         if (x->mode == NULL)
1512                 goto error;
1513
1514         x->km.state = XFRM_STATE_VALID;
1515
1516 error:
1517         return err;
1518 }
1519
1520 EXPORT_SYMBOL(xfrm_init_state);
1521  
1522 void __init xfrm_state_init(void)
1523 {
1524         int i;
1525
1526         for (i=0; i<XFRM_DST_HSIZE; i++) {
1527                 INIT_LIST_HEAD(&xfrm_state_bydst[i]);
1528                 INIT_LIST_HEAD(&xfrm_state_bysrc[i]);
1529                 INIT_LIST_HEAD(&xfrm_state_byspi[i]);
1530         }
1531         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1532 }
1533