]> Pileus Git - ~andy/linux/blob - net/xfrm/xfrm_state.c
[XFRM]: Hash xfrm_state objects by source address too.
[~andy/linux] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
59 {
60         return ntohl(addr->a4);
61 }
62
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
64 {
65         return ntohl(addr->a6[2]^addr->a6[3]);
66 }
67
68 static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
69 {
70         return ntohl(daddr->a4 ^ saddr->a4);
71 }
72
73 static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
74 {
75         return ntohl(daddr->a6[2] ^ daddr->a6[3] ^
76                      saddr->a6[2] ^ saddr->a6[3]);
77 }
78
79 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr,
80                                            xfrm_address_t *saddr,
81                                            u32 reqid, unsigned short family,
82                                            unsigned int hmask)
83 {
84         unsigned int h = family ^ reqid;
85         switch (family) {
86         case AF_INET:
87                 h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
88                 break;
89         case AF_INET6:
90                 h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
91                 break;
92         };
93         return (h ^ (h >> 16)) & hmask;
94 }
95
96 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
97                                          xfrm_address_t *saddr,
98                                          u32 reqid,
99                                          unsigned short family)
100 {
101         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
102 }
103
104 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
105                                        unsigned int hmask)
106 {
107         unsigned int h = family;
108         switch (family) {
109         case AF_INET:
110                 h ^= __xfrm4_addr_hash(addr);
111                 break;
112         case AF_INET6:
113                 h ^= __xfrm6_addr_hash(addr);
114                 break;
115         };
116         return (h ^ (h >> 16)) & hmask;
117 }
118
119 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
120 {
121         return __xfrm_src_hash(addr, family, xfrm_state_hmask);
122 }
123
124 static inline unsigned int
125 __xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto,
126                 unsigned short family, unsigned int hmask)
127 {
128         unsigned int h = spi ^ proto;
129         switch (family) {
130         case AF_INET:
131                 h ^= __xfrm4_addr_hash(daddr);
132                 break;
133         case AF_INET6:
134                 h ^= __xfrm6_addr_hash(daddr);
135                 break;
136         }
137         return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
138 }
139
140 static inline unsigned int
141 xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
142 {
143         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
144 }
145
146 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
147 {
148         struct hlist_head *n;
149
150         if (sz <= PAGE_SIZE)
151                 n = kmalloc(sz, GFP_KERNEL);
152         else if (hashdist)
153                 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
154         else
155                 n = (struct hlist_head *)
156                         __get_free_pages(GFP_KERNEL, get_order(sz));
157
158         if (n)
159                 memset(n, 0, sz);
160
161         return n;
162 }
163
164 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
165 {
166         if (sz <= PAGE_SIZE)
167                 kfree(n);
168         else if (hashdist)
169                 vfree(n);
170         else
171                 free_pages((unsigned long)n, get_order(sz));
172 }
173
174 static void xfrm_hash_transfer(struct hlist_head *list,
175                                struct hlist_head *ndsttable,
176                                struct hlist_head *nsrctable,
177                                struct hlist_head *nspitable,
178                                unsigned int nhashmask)
179 {
180         struct hlist_node *entry, *tmp;
181         struct xfrm_state *x;
182
183         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
184                 unsigned int h;
185
186                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
187                                     x->props.reqid, x->props.family,
188                                     nhashmask);
189                 hlist_add_head(&x->bydst, ndsttable+h);
190
191                 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
192                                     nhashmask);
193                 hlist_add_head(&x->bysrc, nsrctable+h);
194
195                 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
196                                     x->props.family, nhashmask);
197                 hlist_add_head(&x->byspi, nspitable+h);
198         }
199 }
200
201 static unsigned long xfrm_hash_new_size(void)
202 {
203         return ((xfrm_state_hmask + 1) << 1) *
204                 sizeof(struct hlist_head);
205 }
206
207 static DEFINE_MUTEX(hash_resize_mutex);
208
209 static void xfrm_hash_resize(void *__unused)
210 {
211         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
212         unsigned long nsize, osize;
213         unsigned int nhashmask, ohashmask;
214         int i;
215
216         mutex_lock(&hash_resize_mutex);
217
218         nsize = xfrm_hash_new_size();
219         ndst = xfrm_state_hash_alloc(nsize);
220         if (!ndst)
221                 goto out_unlock;
222         nsrc = xfrm_state_hash_alloc(nsize);
223         if (!nsrc) {
224                 xfrm_state_hash_free(ndst, nsize);
225                 goto out_unlock;
226         }
227         nspi = xfrm_state_hash_alloc(nsize);
228         if (!nspi) {
229                 xfrm_state_hash_free(ndst, nsize);
230                 xfrm_state_hash_free(nsrc, nsize);
231                 goto out_unlock;
232         }
233
234         spin_lock_bh(&xfrm_state_lock);
235
236         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
237         for (i = xfrm_state_hmask; i >= 0; i--)
238                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
239                                    nhashmask);
240
241         odst = xfrm_state_bydst;
242         osrc = xfrm_state_bysrc;
243         ospi = xfrm_state_byspi;
244         ohashmask = xfrm_state_hmask;
245
246         xfrm_state_bydst = ndst;
247         xfrm_state_bysrc = nsrc;
248         xfrm_state_byspi = nspi;
249         xfrm_state_hmask = nhashmask;
250
251         spin_unlock_bh(&xfrm_state_lock);
252
253         osize = (ohashmask + 1) * sizeof(struct hlist_head);
254         xfrm_state_hash_free(odst, osize);
255         xfrm_state_hash_free(osrc, osize);
256         xfrm_state_hash_free(ospi, osize);
257
258 out_unlock:
259         mutex_unlock(&hash_resize_mutex);
260 }
261
262 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
263
264 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
265 EXPORT_SYMBOL(km_waitq);
266
267 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
268 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
269
270 static struct work_struct xfrm_state_gc_work;
271 static HLIST_HEAD(xfrm_state_gc_list);
272 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
273
274 int __xfrm_state_delete(struct xfrm_state *x);
275
276 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
277 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
278
279 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
280 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
281
282 static void xfrm_state_gc_destroy(struct xfrm_state *x)
283 {
284         del_timer_sync(&x->timer);
285         del_timer_sync(&x->rtimer);
286         kfree(x->aalg);
287         kfree(x->ealg);
288         kfree(x->calg);
289         kfree(x->encap);
290         kfree(x->coaddr);
291         if (x->mode)
292                 xfrm_put_mode(x->mode);
293         if (x->type) {
294                 x->type->destructor(x);
295                 xfrm_put_type(x->type);
296         }
297         security_xfrm_state_free(x);
298         kfree(x);
299 }
300
301 static void xfrm_state_gc_task(void *data)
302 {
303         struct xfrm_state *x;
304         struct hlist_node *entry, *tmp;
305         struct hlist_head gc_list;
306
307         spin_lock_bh(&xfrm_state_gc_lock);
308         gc_list.first = xfrm_state_gc_list.first;
309         INIT_HLIST_HEAD(&xfrm_state_gc_list);
310         spin_unlock_bh(&xfrm_state_gc_lock);
311
312         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
313                 xfrm_state_gc_destroy(x);
314
315         wake_up(&km_waitq);
316 }
317
318 static inline unsigned long make_jiffies(long secs)
319 {
320         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
321                 return MAX_SCHEDULE_TIMEOUT-1;
322         else
323                 return secs*HZ;
324 }
325
326 static void xfrm_timer_handler(unsigned long data)
327 {
328         struct xfrm_state *x = (struct xfrm_state*)data;
329         unsigned long now = (unsigned long)xtime.tv_sec;
330         long next = LONG_MAX;
331         int warn = 0;
332
333         spin_lock(&x->lock);
334         if (x->km.state == XFRM_STATE_DEAD)
335                 goto out;
336         if (x->km.state == XFRM_STATE_EXPIRED)
337                 goto expired;
338         if (x->lft.hard_add_expires_seconds) {
339                 long tmo = x->lft.hard_add_expires_seconds +
340                         x->curlft.add_time - now;
341                 if (tmo <= 0)
342                         goto expired;
343                 if (tmo < next)
344                         next = tmo;
345         }
346         if (x->lft.hard_use_expires_seconds) {
347                 long tmo = x->lft.hard_use_expires_seconds +
348                         (x->curlft.use_time ? : now) - now;
349                 if (tmo <= 0)
350                         goto expired;
351                 if (tmo < next)
352                         next = tmo;
353         }
354         if (x->km.dying)
355                 goto resched;
356         if (x->lft.soft_add_expires_seconds) {
357                 long tmo = x->lft.soft_add_expires_seconds +
358                         x->curlft.add_time - now;
359                 if (tmo <= 0)
360                         warn = 1;
361                 else if (tmo < next)
362                         next = tmo;
363         }
364         if (x->lft.soft_use_expires_seconds) {
365                 long tmo = x->lft.soft_use_expires_seconds +
366                         (x->curlft.use_time ? : now) - now;
367                 if (tmo <= 0)
368                         warn = 1;
369                 else if (tmo < next)
370                         next = tmo;
371         }
372
373         x->km.dying = warn;
374         if (warn)
375                 km_state_expired(x, 0, 0);
376 resched:
377         if (next != LONG_MAX)
378                 mod_timer(&x->timer, jiffies + make_jiffies(next));
379
380         goto out;
381
382 expired:
383         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
384                 x->km.state = XFRM_STATE_EXPIRED;
385                 wake_up(&km_waitq);
386                 next = 2;
387                 goto resched;
388         }
389         if (!__xfrm_state_delete(x) && x->id.spi)
390                 km_state_expired(x, 1, 0);
391
392 out:
393         spin_unlock(&x->lock);
394 }
395
396 static void xfrm_replay_timer_handler(unsigned long data);
397
398 struct xfrm_state *xfrm_state_alloc(void)
399 {
400         struct xfrm_state *x;
401
402         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
403
404         if (x) {
405                 atomic_set(&x->refcnt, 1);
406                 atomic_set(&x->tunnel_users, 0);
407                 INIT_HLIST_NODE(&x->bydst);
408                 INIT_HLIST_NODE(&x->bysrc);
409                 INIT_HLIST_NODE(&x->byspi);
410                 init_timer(&x->timer);
411                 x->timer.function = xfrm_timer_handler;
412                 x->timer.data     = (unsigned long)x;
413                 init_timer(&x->rtimer);
414                 x->rtimer.function = xfrm_replay_timer_handler;
415                 x->rtimer.data     = (unsigned long)x;
416                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
417                 x->lft.soft_byte_limit = XFRM_INF;
418                 x->lft.soft_packet_limit = XFRM_INF;
419                 x->lft.hard_byte_limit = XFRM_INF;
420                 x->lft.hard_packet_limit = XFRM_INF;
421                 x->replay_maxage = 0;
422                 x->replay_maxdiff = 0;
423                 spin_lock_init(&x->lock);
424         }
425         return x;
426 }
427 EXPORT_SYMBOL(xfrm_state_alloc);
428
429 void __xfrm_state_destroy(struct xfrm_state *x)
430 {
431         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
432
433         spin_lock_bh(&xfrm_state_gc_lock);
434         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
435         spin_unlock_bh(&xfrm_state_gc_lock);
436         schedule_work(&xfrm_state_gc_work);
437 }
438 EXPORT_SYMBOL(__xfrm_state_destroy);
439
440 int __xfrm_state_delete(struct xfrm_state *x)
441 {
442         int err = -ESRCH;
443
444         if (x->km.state != XFRM_STATE_DEAD) {
445                 x->km.state = XFRM_STATE_DEAD;
446                 spin_lock(&xfrm_state_lock);
447                 hlist_del(&x->bydst);
448                 hlist_del(&x->bysrc);
449                 if (x->id.spi)
450                         hlist_del(&x->byspi);
451                 xfrm_state_num--;
452                 spin_unlock(&xfrm_state_lock);
453
454                 /* All xfrm_state objects are created by xfrm_state_alloc.
455                  * The xfrm_state_alloc call gives a reference, and that
456                  * is what we are dropping here.
457                  */
458                 __xfrm_state_put(x);
459                 err = 0;
460         }
461
462         return err;
463 }
464 EXPORT_SYMBOL(__xfrm_state_delete);
465
466 int xfrm_state_delete(struct xfrm_state *x)
467 {
468         int err;
469
470         spin_lock_bh(&x->lock);
471         err = __xfrm_state_delete(x);
472         spin_unlock_bh(&x->lock);
473
474         return err;
475 }
476 EXPORT_SYMBOL(xfrm_state_delete);
477
478 void xfrm_state_flush(u8 proto)
479 {
480         int i;
481
482         spin_lock_bh(&xfrm_state_lock);
483         for (i = 0; i < xfrm_state_hmask; i++) {
484                 struct hlist_node *entry;
485                 struct xfrm_state *x;
486 restart:
487                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
488                         if (!xfrm_state_kern(x) &&
489                             xfrm_id_proto_match(x->id.proto, proto)) {
490                                 xfrm_state_hold(x);
491                                 spin_unlock_bh(&xfrm_state_lock);
492
493                                 xfrm_state_delete(x);
494                                 xfrm_state_put(x);
495
496                                 spin_lock_bh(&xfrm_state_lock);
497                                 goto restart;
498                         }
499                 }
500         }
501         spin_unlock_bh(&xfrm_state_lock);
502         wake_up(&km_waitq);
503 }
504 EXPORT_SYMBOL(xfrm_state_flush);
505
506 static int
507 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
508                   struct xfrm_tmpl *tmpl,
509                   xfrm_address_t *daddr, xfrm_address_t *saddr,
510                   unsigned short family)
511 {
512         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
513         if (!afinfo)
514                 return -1;
515         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
516         xfrm_state_put_afinfo(afinfo);
517         return 0;
518 }
519
520 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
521 {
522         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
523         struct xfrm_state *x;
524         struct hlist_node *entry;
525
526         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
527                 if (x->props.family != family ||
528                     x->id.spi       != spi ||
529                     x->id.proto     != proto)
530                         continue;
531
532                 switch (family) {
533                 case AF_INET:
534                         if (x->id.daddr.a4 != daddr->a4)
535                                 continue;
536                         break;
537                 case AF_INET6:
538                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
539                                              (struct in6_addr *)
540                                              x->id.daddr.a6))
541                                 continue;
542                         break;
543                 };
544
545                 xfrm_state_hold(x);
546                 return x;
547         }
548
549         return NULL;
550 }
551
552 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
553 {
554         unsigned int h = xfrm_src_hash(saddr, family);
555         struct xfrm_state *x;
556         struct hlist_node *entry;
557
558         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
559                 if (x->props.family != family ||
560                     x->id.proto     != proto)
561                         continue;
562
563                 switch (family) {
564                 case AF_INET:
565                         if (x->id.daddr.a4 != daddr->a4 ||
566                             x->props.saddr.a4 != saddr->a4)
567                                 continue;
568                         break;
569                 case AF_INET6:
570                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
571                                              (struct in6_addr *)
572                                              x->id.daddr.a6) ||
573                             !ipv6_addr_equal((struct in6_addr *)saddr,
574                                              (struct in6_addr *)
575                                              x->props.saddr.a6))
576                                 continue;
577                         break;
578                 };
579
580                 xfrm_state_hold(x);
581                 return x;
582         }
583
584         return NULL;
585 }
586
587 static inline struct xfrm_state *
588 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
589 {
590         if (use_spi)
591                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
592                                            x->id.proto, family);
593         else
594                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
595                                                   &x->props.saddr,
596                                                   x->id.proto, family);
597 }
598
599 struct xfrm_state *
600 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
601                 struct flowi *fl, struct xfrm_tmpl *tmpl,
602                 struct xfrm_policy *pol, int *err,
603                 unsigned short family)
604 {
605         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
606         struct hlist_node *entry;
607         struct xfrm_state *x, *x0;
608         int acquire_in_progress = 0;
609         int error = 0;
610         struct xfrm_state *best = NULL;
611         
612         spin_lock_bh(&xfrm_state_lock);
613         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
614                 if (x->props.family == family &&
615                     x->props.reqid == tmpl->reqid &&
616                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
617                     xfrm_state_addr_check(x, daddr, saddr, family) &&
618                     tmpl->mode == x->props.mode &&
619                     tmpl->id.proto == x->id.proto &&
620                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
621                         /* Resolution logic:
622                            1. There is a valid state with matching selector.
623                               Done.
624                            2. Valid state with inappropriate selector. Skip.
625
626                            Entering area of "sysdeps".
627
628                            3. If state is not valid, selector is temporary,
629                               it selects only session which triggered
630                               previous resolution. Key manager will do
631                               something to install a state with proper
632                               selector.
633                          */
634                         if (x->km.state == XFRM_STATE_VALID) {
635                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
636                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
637                                         continue;
638                                 if (!best ||
639                                     best->km.dying > x->km.dying ||
640                                     (best->km.dying == x->km.dying &&
641                                      best->curlft.add_time < x->curlft.add_time))
642                                         best = x;
643                         } else if (x->km.state == XFRM_STATE_ACQ) {
644                                 acquire_in_progress = 1;
645                         } else if (x->km.state == XFRM_STATE_ERROR ||
646                                    x->km.state == XFRM_STATE_EXPIRED) {
647                                 if (xfrm_selector_match(&x->sel, fl, family) &&
648                                     security_xfrm_state_pol_flow_match(x, pol, fl))
649                                         error = -ESRCH;
650                         }
651                 }
652         }
653
654         x = best;
655         if (!x && !error && !acquire_in_progress) {
656                 if (tmpl->id.spi &&
657                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
658                                               tmpl->id.proto, family)) != NULL) {
659                         xfrm_state_put(x0);
660                         error = -EEXIST;
661                         goto out;
662                 }
663                 x = xfrm_state_alloc();
664                 if (x == NULL) {
665                         error = -ENOMEM;
666                         goto out;
667                 }
668                 /* Initialize temporary selector matching only
669                  * to current session. */
670                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
671
672                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
673                 if (error) {
674                         x->km.state = XFRM_STATE_DEAD;
675                         xfrm_state_put(x);
676                         x = NULL;
677                         goto out;
678                 }
679
680                 if (km_query(x, tmpl, pol) == 0) {
681                         x->km.state = XFRM_STATE_ACQ;
682                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
683                         h = xfrm_src_hash(saddr, family);
684                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
685                         if (x->id.spi) {
686                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
687                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
688                         }
689                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
690                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
691                         add_timer(&x->timer);
692                 } else {
693                         x->km.state = XFRM_STATE_DEAD;
694                         xfrm_state_put(x);
695                         x = NULL;
696                         error = -ESRCH;
697                 }
698         }
699 out:
700         if (x)
701                 xfrm_state_hold(x);
702         else
703                 *err = acquire_in_progress ? -EAGAIN : error;
704         spin_unlock_bh(&xfrm_state_lock);
705         return x;
706 }
707
708 static void __xfrm_state_insert(struct xfrm_state *x)
709 {
710         unsigned int h;
711
712         x->genid = ++xfrm_state_genid;
713
714         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
715                           x->props.reqid, x->props.family);
716         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
717
718         h = xfrm_src_hash(&x->props.saddr, x->props.family);
719         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
720
721         if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
722                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
723                                   x->props.family);
724
725                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
726         }
727
728         mod_timer(&x->timer, jiffies + HZ);
729         if (x->replay_maxage)
730                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
731
732         wake_up(&km_waitq);
733
734         xfrm_state_num++;
735
736         if (x->bydst.next != NULL &&
737             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
738             xfrm_state_num > xfrm_state_hmask)
739                 schedule_work(&xfrm_hash_work);
740 }
741
742 /* xfrm_state_lock is held */
743 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
744 {
745         unsigned short family = xnew->props.family;
746         u32 reqid = xnew->props.reqid;
747         struct xfrm_state *x;
748         struct hlist_node *entry;
749         unsigned int h;
750
751         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
752         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
753                 if (x->props.family     == family &&
754                     x->props.reqid      == reqid &&
755                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
756                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
757                         x->genid = xfrm_state_genid;
758         }
759 }
760
761 void xfrm_state_insert(struct xfrm_state *x)
762 {
763         spin_lock_bh(&xfrm_state_lock);
764         __xfrm_state_bump_genids(x);
765         __xfrm_state_insert(x);
766         spin_unlock_bh(&xfrm_state_lock);
767 }
768 EXPORT_SYMBOL(xfrm_state_insert);
769
770 /* xfrm_state_lock is held */
771 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
772 {
773         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
774         struct hlist_node *entry;
775         struct xfrm_state *x;
776
777         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
778                 if (x->props.reqid  != reqid ||
779                     x->props.mode   != mode ||
780                     x->props.family != family ||
781                     x->km.state     != XFRM_STATE_ACQ ||
782                     x->id.spi       != 0)
783                         continue;
784
785                 switch (family) {
786                 case AF_INET:
787                         if (x->id.daddr.a4    != daddr->a4 ||
788                             x->props.saddr.a4 != saddr->a4)
789                                 continue;
790                         break;
791                 case AF_INET6:
792                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
793                                              (struct in6_addr *)daddr) ||
794                             !ipv6_addr_equal((struct in6_addr *)
795                                              x->props.saddr.a6,
796                                              (struct in6_addr *)saddr))
797                                 continue;
798                         break;
799                 };
800
801                 xfrm_state_hold(x);
802                 return x;
803         }
804
805         if (!create)
806                 return NULL;
807
808         x = xfrm_state_alloc();
809         if (likely(x)) {
810                 switch (family) {
811                 case AF_INET:
812                         x->sel.daddr.a4 = daddr->a4;
813                         x->sel.saddr.a4 = saddr->a4;
814                         x->sel.prefixlen_d = 32;
815                         x->sel.prefixlen_s = 32;
816                         x->props.saddr.a4 = saddr->a4;
817                         x->id.daddr.a4 = daddr->a4;
818                         break;
819
820                 case AF_INET6:
821                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
822                                        (struct in6_addr *)daddr);
823                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
824                                        (struct in6_addr *)saddr);
825                         x->sel.prefixlen_d = 128;
826                         x->sel.prefixlen_s = 128;
827                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
828                                        (struct in6_addr *)saddr);
829                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
830                                        (struct in6_addr *)daddr);
831                         break;
832                 };
833
834                 x->km.state = XFRM_STATE_ACQ;
835                 x->id.proto = proto;
836                 x->props.family = family;
837                 x->props.mode = mode;
838                 x->props.reqid = reqid;
839                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
840                 xfrm_state_hold(x);
841                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
842                 add_timer(&x->timer);
843                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
844                 h = xfrm_src_hash(saddr, family);
845                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
846                 wake_up(&km_waitq);
847         }
848
849         return x;
850 }
851
852 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
853
854 int xfrm_state_add(struct xfrm_state *x)
855 {
856         struct xfrm_state *x1;
857         int family;
858         int err;
859         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
860
861         family = x->props.family;
862
863         spin_lock_bh(&xfrm_state_lock);
864
865         x1 = __xfrm_state_locate(x, use_spi, family);
866         if (x1) {
867                 xfrm_state_put(x1);
868                 x1 = NULL;
869                 err = -EEXIST;
870                 goto out;
871         }
872
873         if (use_spi && x->km.seq) {
874                 x1 = __xfrm_find_acq_byseq(x->km.seq);
875                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
876                         xfrm_state_put(x1);
877                         x1 = NULL;
878                 }
879         }
880
881         if (use_spi && !x1)
882                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
883                                      x->id.proto,
884                                      &x->id.daddr, &x->props.saddr, 0);
885
886         __xfrm_state_bump_genids(x);
887         __xfrm_state_insert(x);
888         err = 0;
889
890 out:
891         spin_unlock_bh(&xfrm_state_lock);
892
893         if (x1) {
894                 xfrm_state_delete(x1);
895                 xfrm_state_put(x1);
896         }
897
898         return err;
899 }
900 EXPORT_SYMBOL(xfrm_state_add);
901
902 int xfrm_state_update(struct xfrm_state *x)
903 {
904         struct xfrm_state *x1;
905         int err;
906         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
907
908         spin_lock_bh(&xfrm_state_lock);
909         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
910
911         err = -ESRCH;
912         if (!x1)
913                 goto out;
914
915         if (xfrm_state_kern(x1)) {
916                 xfrm_state_put(x1);
917                 err = -EEXIST;
918                 goto out;
919         }
920
921         if (x1->km.state == XFRM_STATE_ACQ) {
922                 __xfrm_state_insert(x);
923                 x = NULL;
924         }
925         err = 0;
926
927 out:
928         spin_unlock_bh(&xfrm_state_lock);
929
930         if (err)
931                 return err;
932
933         if (!x) {
934                 xfrm_state_delete(x1);
935                 xfrm_state_put(x1);
936                 return 0;
937         }
938
939         err = -EINVAL;
940         spin_lock_bh(&x1->lock);
941         if (likely(x1->km.state == XFRM_STATE_VALID)) {
942                 if (x->encap && x1->encap)
943                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
944                 if (x->coaddr && x1->coaddr) {
945                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
946                 }
947                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
948                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
949                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
950                 x1->km.dying = 0;
951
952                 mod_timer(&x1->timer, jiffies + HZ);
953                 if (x1->curlft.use_time)
954                         xfrm_state_check_expire(x1);
955
956                 err = 0;
957         }
958         spin_unlock_bh(&x1->lock);
959
960         xfrm_state_put(x1);
961
962         return err;
963 }
964 EXPORT_SYMBOL(xfrm_state_update);
965
966 int xfrm_state_check_expire(struct xfrm_state *x)
967 {
968         if (!x->curlft.use_time)
969                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
970
971         if (x->km.state != XFRM_STATE_VALID)
972                 return -EINVAL;
973
974         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
975             x->curlft.packets >= x->lft.hard_packet_limit) {
976                 x->km.state = XFRM_STATE_EXPIRED;
977                 mod_timer(&x->timer, jiffies);
978                 return -EINVAL;
979         }
980
981         if (!x->km.dying &&
982             (x->curlft.bytes >= x->lft.soft_byte_limit ||
983              x->curlft.packets >= x->lft.soft_packet_limit)) {
984                 x->km.dying = 1;
985                 km_state_expired(x, 0, 0);
986         }
987         return 0;
988 }
989 EXPORT_SYMBOL(xfrm_state_check_expire);
990
991 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
992 {
993         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
994                 - skb_headroom(skb);
995
996         if (nhead > 0)
997                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
998
999         /* Check tail too... */
1000         return 0;
1001 }
1002
1003 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1004 {
1005         int err = xfrm_state_check_expire(x);
1006         if (err < 0)
1007                 goto err;
1008         err = xfrm_state_check_space(x, skb);
1009 err:
1010         return err;
1011 }
1012 EXPORT_SYMBOL(xfrm_state_check);
1013
1014 struct xfrm_state *
1015 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1016                   unsigned short family)
1017 {
1018         struct xfrm_state *x;
1019
1020         spin_lock_bh(&xfrm_state_lock);
1021         x = __xfrm_state_lookup(daddr, spi, proto, family);
1022         spin_unlock_bh(&xfrm_state_lock);
1023         return x;
1024 }
1025 EXPORT_SYMBOL(xfrm_state_lookup);
1026
1027 struct xfrm_state *
1028 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1029                          u8 proto, unsigned short family)
1030 {
1031         struct xfrm_state *x;
1032
1033         spin_lock_bh(&xfrm_state_lock);
1034         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1035         spin_unlock_bh(&xfrm_state_lock);
1036         return x;
1037 }
1038 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1039
1040 struct xfrm_state *
1041 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
1042               xfrm_address_t *daddr, xfrm_address_t *saddr, 
1043               int create, unsigned short family)
1044 {
1045         struct xfrm_state *x;
1046
1047         spin_lock_bh(&xfrm_state_lock);
1048         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1049         spin_unlock_bh(&xfrm_state_lock);
1050
1051         return x;
1052 }
1053 EXPORT_SYMBOL(xfrm_find_acq);
1054
1055 #ifdef CONFIG_XFRM_SUB_POLICY
1056 int
1057 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1058                unsigned short family)
1059 {
1060         int err = 0;
1061         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1062         if (!afinfo)
1063                 return -EAFNOSUPPORT;
1064
1065         spin_lock_bh(&xfrm_state_lock);
1066         if (afinfo->tmpl_sort)
1067                 err = afinfo->tmpl_sort(dst, src, n);
1068         spin_unlock_bh(&xfrm_state_lock);
1069         xfrm_state_put_afinfo(afinfo);
1070         return err;
1071 }
1072 EXPORT_SYMBOL(xfrm_tmpl_sort);
1073
1074 int
1075 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1076                 unsigned short family)
1077 {
1078         int err = 0;
1079         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1080         if (!afinfo)
1081                 return -EAFNOSUPPORT;
1082
1083         spin_lock_bh(&xfrm_state_lock);
1084         if (afinfo->state_sort)
1085                 err = afinfo->state_sort(dst, src, n);
1086         spin_unlock_bh(&xfrm_state_lock);
1087         xfrm_state_put_afinfo(afinfo);
1088         return err;
1089 }
1090 EXPORT_SYMBOL(xfrm_state_sort);
1091 #endif
1092
1093 /* Silly enough, but I'm lazy to build resolution list */
1094
1095 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1096 {
1097         int i;
1098
1099         for (i = 0; i <= xfrm_state_hmask; i++) {
1100                 struct hlist_node *entry;
1101                 struct xfrm_state *x;
1102
1103                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1104                         if (x->km.seq == seq &&
1105                             x->km.state == XFRM_STATE_ACQ) {
1106                                 xfrm_state_hold(x);
1107                                 return x;
1108                         }
1109                 }
1110         }
1111         return NULL;
1112 }
1113
1114 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1115 {
1116         struct xfrm_state *x;
1117
1118         spin_lock_bh(&xfrm_state_lock);
1119         x = __xfrm_find_acq_byseq(seq);
1120         spin_unlock_bh(&xfrm_state_lock);
1121         return x;
1122 }
1123 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1124
1125 u32 xfrm_get_acqseq(void)
1126 {
1127         u32 res;
1128         static u32 acqseq;
1129         static DEFINE_SPINLOCK(acqseq_lock);
1130
1131         spin_lock_bh(&acqseq_lock);
1132         res = (++acqseq ? : ++acqseq);
1133         spin_unlock_bh(&acqseq_lock);
1134         return res;
1135 }
1136 EXPORT_SYMBOL(xfrm_get_acqseq);
1137
1138 void
1139 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1140 {
1141         unsigned int h;
1142         struct xfrm_state *x0;
1143
1144         if (x->id.spi)
1145                 return;
1146
1147         if (minspi == maxspi) {
1148                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1149                 if (x0) {
1150                         xfrm_state_put(x0);
1151                         return;
1152                 }
1153                 x->id.spi = minspi;
1154         } else {
1155                 u32 spi = 0;
1156                 minspi = ntohl(minspi);
1157                 maxspi = ntohl(maxspi);
1158                 for (h=0; h<maxspi-minspi+1; h++) {
1159                         spi = minspi + net_random()%(maxspi-minspi+1);
1160                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1161                         if (x0 == NULL) {
1162                                 x->id.spi = htonl(spi);
1163                                 break;
1164                         }
1165                         xfrm_state_put(x0);
1166                 }
1167         }
1168         if (x->id.spi) {
1169                 spin_lock_bh(&xfrm_state_lock);
1170                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1171                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1172                 spin_unlock_bh(&xfrm_state_lock);
1173                 wake_up(&km_waitq);
1174         }
1175 }
1176 EXPORT_SYMBOL(xfrm_alloc_spi);
1177
1178 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1179                     void *data)
1180 {
1181         int i;
1182         struct xfrm_state *x;
1183         struct hlist_node *entry;
1184         int count = 0;
1185         int err = 0;
1186
1187         spin_lock_bh(&xfrm_state_lock);
1188         for (i = 0; i <= xfrm_state_hmask; i++) {
1189                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1190                         if (xfrm_id_proto_match(x->id.proto, proto))
1191                                 count++;
1192                 }
1193         }
1194         if (count == 0) {
1195                 err = -ENOENT;
1196                 goto out;
1197         }
1198
1199         for (i = 0; i <= xfrm_state_hmask; i++) {
1200                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1201                         if (!xfrm_id_proto_match(x->id.proto, proto))
1202                                 continue;
1203                         err = func(x, --count, data);
1204                         if (err)
1205                                 goto out;
1206                 }
1207         }
1208 out:
1209         spin_unlock_bh(&xfrm_state_lock);
1210         return err;
1211 }
1212 EXPORT_SYMBOL(xfrm_state_walk);
1213
1214
1215 void xfrm_replay_notify(struct xfrm_state *x, int event)
1216 {
1217         struct km_event c;
1218         /* we send notify messages in case
1219          *  1. we updated on of the sequence numbers, and the seqno difference
1220          *     is at least x->replay_maxdiff, in this case we also update the
1221          *     timeout of our timer function
1222          *  2. if x->replay_maxage has elapsed since last update,
1223          *     and there were changes
1224          *
1225          *  The state structure must be locked!
1226          */
1227
1228         switch (event) {
1229         case XFRM_REPLAY_UPDATE:
1230                 if (x->replay_maxdiff &&
1231                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1232                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1233                         if (x->xflags & XFRM_TIME_DEFER)
1234                                 event = XFRM_REPLAY_TIMEOUT;
1235                         else
1236                                 return;
1237                 }
1238
1239                 break;
1240
1241         case XFRM_REPLAY_TIMEOUT:
1242                 if ((x->replay.seq == x->preplay.seq) &&
1243                     (x->replay.bitmap == x->preplay.bitmap) &&
1244                     (x->replay.oseq == x->preplay.oseq)) {
1245                         x->xflags |= XFRM_TIME_DEFER;
1246                         return;
1247                 }
1248
1249                 break;
1250         }
1251
1252         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1253         c.event = XFRM_MSG_NEWAE;
1254         c.data.aevent = event;
1255         km_state_notify(x, &c);
1256
1257         if (x->replay_maxage &&
1258             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1259                 x->xflags &= ~XFRM_TIME_DEFER;
1260 }
1261 EXPORT_SYMBOL(xfrm_replay_notify);
1262
1263 static void xfrm_replay_timer_handler(unsigned long data)
1264 {
1265         struct xfrm_state *x = (struct xfrm_state*)data;
1266
1267         spin_lock(&x->lock);
1268
1269         if (x->km.state == XFRM_STATE_VALID) {
1270                 if (xfrm_aevent_is_on())
1271                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1272                 else
1273                         x->xflags |= XFRM_TIME_DEFER;
1274         }
1275
1276         spin_unlock(&x->lock);
1277 }
1278
1279 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1280 {
1281         u32 diff;
1282
1283         seq = ntohl(seq);
1284
1285         if (unlikely(seq == 0))
1286                 return -EINVAL;
1287
1288         if (likely(seq > x->replay.seq))
1289                 return 0;
1290
1291         diff = x->replay.seq - seq;
1292         if (diff >= x->props.replay_window) {
1293                 x->stats.replay_window++;
1294                 return -EINVAL;
1295         }
1296
1297         if (x->replay.bitmap & (1U << diff)) {
1298                 x->stats.replay++;
1299                 return -EINVAL;
1300         }
1301         return 0;
1302 }
1303 EXPORT_SYMBOL(xfrm_replay_check);
1304
1305 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1306 {
1307         u32 diff;
1308
1309         seq = ntohl(seq);
1310
1311         if (seq > x->replay.seq) {
1312                 diff = seq - x->replay.seq;
1313                 if (diff < x->props.replay_window)
1314                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1315                 else
1316                         x->replay.bitmap = 1;
1317                 x->replay.seq = seq;
1318         } else {
1319                 diff = x->replay.seq - seq;
1320                 x->replay.bitmap |= (1U << diff);
1321         }
1322
1323         if (xfrm_aevent_is_on())
1324                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1325 }
1326 EXPORT_SYMBOL(xfrm_replay_advance);
1327
1328 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1329 static DEFINE_RWLOCK(xfrm_km_lock);
1330
1331 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1332 {
1333         struct xfrm_mgr *km;
1334
1335         read_lock(&xfrm_km_lock);
1336         list_for_each_entry(km, &xfrm_km_list, list)
1337                 if (km->notify_policy)
1338                         km->notify_policy(xp, dir, c);
1339         read_unlock(&xfrm_km_lock);
1340 }
1341
1342 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1343 {
1344         struct xfrm_mgr *km;
1345         read_lock(&xfrm_km_lock);
1346         list_for_each_entry(km, &xfrm_km_list, list)
1347                 if (km->notify)
1348                         km->notify(x, c);
1349         read_unlock(&xfrm_km_lock);
1350 }
1351
1352 EXPORT_SYMBOL(km_policy_notify);
1353 EXPORT_SYMBOL(km_state_notify);
1354
1355 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1356 {
1357         struct km_event c;
1358
1359         c.data.hard = hard;
1360         c.pid = pid;
1361         c.event = XFRM_MSG_EXPIRE;
1362         km_state_notify(x, &c);
1363
1364         if (hard)
1365                 wake_up(&km_waitq);
1366 }
1367
1368 EXPORT_SYMBOL(km_state_expired);
1369 /*
1370  * We send to all registered managers regardless of failure
1371  * We are happy with one success
1372 */
1373 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1374 {
1375         int err = -EINVAL, acqret;
1376         struct xfrm_mgr *km;
1377
1378         read_lock(&xfrm_km_lock);
1379         list_for_each_entry(km, &xfrm_km_list, list) {
1380                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1381                 if (!acqret)
1382                         err = acqret;
1383         }
1384         read_unlock(&xfrm_km_lock);
1385         return err;
1386 }
1387 EXPORT_SYMBOL(km_query);
1388
1389 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1390 {
1391         int err = -EINVAL;
1392         struct xfrm_mgr *km;
1393
1394         read_lock(&xfrm_km_lock);
1395         list_for_each_entry(km, &xfrm_km_list, list) {
1396                 if (km->new_mapping)
1397                         err = km->new_mapping(x, ipaddr, sport);
1398                 if (!err)
1399                         break;
1400         }
1401         read_unlock(&xfrm_km_lock);
1402         return err;
1403 }
1404 EXPORT_SYMBOL(km_new_mapping);
1405
1406 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1407 {
1408         struct km_event c;
1409
1410         c.data.hard = hard;
1411         c.pid = pid;
1412         c.event = XFRM_MSG_POLEXPIRE;
1413         km_policy_notify(pol, dir, &c);
1414
1415         if (hard)
1416                 wake_up(&km_waitq);
1417 }
1418 EXPORT_SYMBOL(km_policy_expired);
1419
1420 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1421 {
1422         int err = -EINVAL;
1423         int ret;
1424         struct xfrm_mgr *km;
1425
1426         read_lock(&xfrm_km_lock);
1427         list_for_each_entry(km, &xfrm_km_list, list) {
1428                 if (km->report) {
1429                         ret = km->report(proto, sel, addr);
1430                         if (!ret)
1431                                 err = ret;
1432                 }
1433         }
1434         read_unlock(&xfrm_km_lock);
1435         return err;
1436 }
1437 EXPORT_SYMBOL(km_report);
1438
1439 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1440 {
1441         int err;
1442         u8 *data;
1443         struct xfrm_mgr *km;
1444         struct xfrm_policy *pol = NULL;
1445
1446         if (optlen <= 0 || optlen > PAGE_SIZE)
1447                 return -EMSGSIZE;
1448
1449         data = kmalloc(optlen, GFP_KERNEL);
1450         if (!data)
1451                 return -ENOMEM;
1452
1453         err = -EFAULT;
1454         if (copy_from_user(data, optval, optlen))
1455                 goto out;
1456
1457         err = -EINVAL;
1458         read_lock(&xfrm_km_lock);
1459         list_for_each_entry(km, &xfrm_km_list, list) {
1460                 pol = km->compile_policy(sk, optname, data,
1461                                          optlen, &err);
1462                 if (err >= 0)
1463                         break;
1464         }
1465         read_unlock(&xfrm_km_lock);
1466
1467         if (err >= 0) {
1468                 xfrm_sk_policy_insert(sk, err, pol);
1469                 xfrm_pol_put(pol);
1470                 err = 0;
1471         }
1472
1473 out:
1474         kfree(data);
1475         return err;
1476 }
1477 EXPORT_SYMBOL(xfrm_user_policy);
1478
1479 int xfrm_register_km(struct xfrm_mgr *km)
1480 {
1481         write_lock_bh(&xfrm_km_lock);
1482         list_add_tail(&km->list, &xfrm_km_list);
1483         write_unlock_bh(&xfrm_km_lock);
1484         return 0;
1485 }
1486 EXPORT_SYMBOL(xfrm_register_km);
1487
1488 int xfrm_unregister_km(struct xfrm_mgr *km)
1489 {
1490         write_lock_bh(&xfrm_km_lock);
1491         list_del(&km->list);
1492         write_unlock_bh(&xfrm_km_lock);
1493         return 0;
1494 }
1495 EXPORT_SYMBOL(xfrm_unregister_km);
1496
1497 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1498 {
1499         int err = 0;
1500         if (unlikely(afinfo == NULL))
1501                 return -EINVAL;
1502         if (unlikely(afinfo->family >= NPROTO))
1503                 return -EAFNOSUPPORT;
1504         write_lock_bh(&xfrm_state_afinfo_lock);
1505         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1506                 err = -ENOBUFS;
1507         else
1508                 xfrm_state_afinfo[afinfo->family] = afinfo;
1509         write_unlock_bh(&xfrm_state_afinfo_lock);
1510         return err;
1511 }
1512 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1513
1514 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1515 {
1516         int err = 0;
1517         if (unlikely(afinfo == NULL))
1518                 return -EINVAL;
1519         if (unlikely(afinfo->family >= NPROTO))
1520                 return -EAFNOSUPPORT;
1521         write_lock_bh(&xfrm_state_afinfo_lock);
1522         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1523                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1524                         err = -EINVAL;
1525                 else
1526                         xfrm_state_afinfo[afinfo->family] = NULL;
1527         }
1528         write_unlock_bh(&xfrm_state_afinfo_lock);
1529         return err;
1530 }
1531 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1532
1533 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1534 {
1535         struct xfrm_state_afinfo *afinfo;
1536         if (unlikely(family >= NPROTO))
1537                 return NULL;
1538         read_lock(&xfrm_state_afinfo_lock);
1539         afinfo = xfrm_state_afinfo[family];
1540         if (unlikely(!afinfo))
1541                 read_unlock(&xfrm_state_afinfo_lock);
1542         return afinfo;
1543 }
1544
1545 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1546 {
1547         read_unlock(&xfrm_state_afinfo_lock);
1548 }
1549
1550 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1551 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1552 {
1553         if (x->tunnel) {
1554                 struct xfrm_state *t = x->tunnel;
1555
1556                 if (atomic_read(&t->tunnel_users) == 2)
1557                         xfrm_state_delete(t);
1558                 atomic_dec(&t->tunnel_users);
1559                 xfrm_state_put(t);
1560                 x->tunnel = NULL;
1561         }
1562 }
1563 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1564
1565 /*
1566  * This function is NOT optimal.  For example, with ESP it will give an
1567  * MTU that's usually two bytes short of being optimal.  However, it will
1568  * usually give an answer that's a multiple of 4 provided the input is
1569  * also a multiple of 4.
1570  */
1571 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1572 {
1573         int res = mtu;
1574
1575         res -= x->props.header_len;
1576
1577         for (;;) {
1578                 int m = res;
1579
1580                 if (m < 68)
1581                         return 68;
1582
1583                 spin_lock_bh(&x->lock);
1584                 if (x->km.state == XFRM_STATE_VALID &&
1585                     x->type && x->type->get_max_size)
1586                         m = x->type->get_max_size(x, m);
1587                 else
1588                         m += x->props.header_len;
1589                 spin_unlock_bh(&x->lock);
1590
1591                 if (m <= mtu)
1592                         break;
1593                 res -= (m - mtu);
1594         }
1595
1596         return res;
1597 }
1598
1599 int xfrm_init_state(struct xfrm_state *x)
1600 {
1601         struct xfrm_state_afinfo *afinfo;
1602         int family = x->props.family;
1603         int err;
1604
1605         err = -EAFNOSUPPORT;
1606         afinfo = xfrm_state_get_afinfo(family);
1607         if (!afinfo)
1608                 goto error;
1609
1610         err = 0;
1611         if (afinfo->init_flags)
1612                 err = afinfo->init_flags(x);
1613
1614         xfrm_state_put_afinfo(afinfo);
1615
1616         if (err)
1617                 goto error;
1618
1619         err = -EPROTONOSUPPORT;
1620         x->type = xfrm_get_type(x->id.proto, family);
1621         if (x->type == NULL)
1622                 goto error;
1623
1624         err = x->type->init_state(x);
1625         if (err)
1626                 goto error;
1627
1628         x->mode = xfrm_get_mode(x->props.mode, family);
1629         if (x->mode == NULL)
1630                 goto error;
1631
1632         x->km.state = XFRM_STATE_VALID;
1633
1634 error:
1635         return err;
1636 }
1637
1638 EXPORT_SYMBOL(xfrm_init_state);
1639  
1640 void __init xfrm_state_init(void)
1641 {
1642         unsigned int sz;
1643
1644         sz = sizeof(struct hlist_head) * 8;
1645
1646         xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1647         xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1648         xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1649         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1650                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1651         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1652
1653         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1654 }
1655