6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
27 EXPORT_SYMBOL(xfrm_nl);
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35 /* Each xfrm_state may be linked to two tables:
37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39 destination/tunnel endpoint. (output)
42 static DEFINE_SPINLOCK(xfrm_state_lock);
44 /* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
58 static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr)
60 return ntohl(addr->a4);
63 static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
65 return ntohl(addr->a6[2]^addr->a6[3]);
68 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr,
69 u32 reqid, unsigned short family,
72 unsigned int h = family ^ reqid;
75 h ^= __xfrm4_addr_hash(addr);
78 h ^= __xfrm6_addr_hash(addr);
81 return (h ^ (h >> 16)) & hmask;
84 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, u32 reqid,
85 unsigned short family)
87 return __xfrm_dst_hash(addr, reqid, family, xfrm_state_hmask);
90 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family,
93 unsigned int h = family;
96 h ^= __xfrm4_addr_hash(addr);
99 h ^= __xfrm6_addr_hash(addr);
102 return (h ^ (h >> 16)) & hmask;
105 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
107 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
110 static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
114 h = ntohl(addr->a4^spi^proto);
115 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
119 static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
123 h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
124 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
129 unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
134 return __xfrm4_spi_hash(addr, spi, proto, hmask);
136 return __xfrm6_spi_hash(addr, spi, proto, hmask);
141 static inline unsigned int
142 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
144 return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
147 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
149 struct hlist_head *n;
152 n = kmalloc(sz, GFP_KERNEL);
154 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
156 n = (struct hlist_head *)
157 __get_free_pages(GFP_KERNEL, get_order(sz));
165 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
172 free_pages((unsigned long)n, get_order(sz));
175 static void xfrm_hash_transfer(struct hlist_head *list,
176 struct hlist_head *ndsttable,
177 struct hlist_head *nsrctable,
178 struct hlist_head *nspitable,
179 unsigned int nhashmask)
181 struct hlist_node *entry, *tmp;
182 struct xfrm_state *x;
184 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
187 h = __xfrm_dst_hash(&x->id.daddr, x->props.reqid,
188 x->props.family, nhashmask);
189 hlist_add_head(&x->bydst, ndsttable+h);
191 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
193 hlist_add_head(&x->bysrc, nsrctable+h);
195 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
196 x->props.family, nhashmask);
197 hlist_add_head(&x->byspi, nspitable+h);
201 static unsigned long xfrm_hash_new_size(void)
203 return ((xfrm_state_hmask + 1) << 1) *
204 sizeof(struct hlist_head);
207 static DEFINE_MUTEX(hash_resize_mutex);
209 static void xfrm_hash_resize(void *__unused)
211 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
212 unsigned long nsize, osize;
213 unsigned int nhashmask, ohashmask;
216 mutex_lock(&hash_resize_mutex);
218 nsize = xfrm_hash_new_size();
219 ndst = xfrm_state_hash_alloc(nsize);
222 nsrc = xfrm_state_hash_alloc(nsize);
224 xfrm_state_hash_free(ndst, nsize);
227 nspi = xfrm_state_hash_alloc(nsize);
229 xfrm_state_hash_free(ndst, nsize);
230 xfrm_state_hash_free(nsrc, nsize);
234 spin_lock_bh(&xfrm_state_lock);
236 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
237 for (i = xfrm_state_hmask; i >= 0; i--)
238 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
241 odst = xfrm_state_bydst;
242 osrc = xfrm_state_bysrc;
243 ospi = xfrm_state_byspi;
244 ohashmask = xfrm_state_hmask;
246 xfrm_state_bydst = ndst;
247 xfrm_state_bysrc = nsrc;
248 xfrm_state_byspi = nspi;
249 xfrm_state_hmask = nhashmask;
251 spin_unlock_bh(&xfrm_state_lock);
253 osize = (ohashmask + 1) * sizeof(struct hlist_head);
254 xfrm_state_hash_free(odst, osize);
255 xfrm_state_hash_free(osrc, osize);
256 xfrm_state_hash_free(ospi, osize);
259 mutex_unlock(&hash_resize_mutex);
262 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
264 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
265 EXPORT_SYMBOL(km_waitq);
267 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
268 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
270 static struct work_struct xfrm_state_gc_work;
271 static HLIST_HEAD(xfrm_state_gc_list);
272 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
274 static int xfrm_state_gc_flush_bundles;
276 int __xfrm_state_delete(struct xfrm_state *x);
278 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
279 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
281 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
282 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
284 static void xfrm_state_gc_destroy(struct xfrm_state *x)
286 if (del_timer(&x->timer))
288 if (del_timer(&x->rtimer))
296 xfrm_put_mode(x->mode);
298 x->type->destructor(x);
299 xfrm_put_type(x->type);
301 security_xfrm_state_free(x);
305 static void xfrm_state_gc_task(void *data)
307 struct xfrm_state *x;
308 struct hlist_node *entry, *tmp;
309 struct hlist_head gc_list;
311 if (xfrm_state_gc_flush_bundles) {
312 xfrm_state_gc_flush_bundles = 0;
313 xfrm_flush_bundles();
316 spin_lock_bh(&xfrm_state_gc_lock);
317 gc_list.first = xfrm_state_gc_list.first;
318 INIT_HLIST_HEAD(&xfrm_state_gc_list);
319 spin_unlock_bh(&xfrm_state_gc_lock);
321 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
322 xfrm_state_gc_destroy(x);
327 static inline unsigned long make_jiffies(long secs)
329 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
330 return MAX_SCHEDULE_TIMEOUT-1;
335 static void xfrm_timer_handler(unsigned long data)
337 struct xfrm_state *x = (struct xfrm_state*)data;
338 unsigned long now = (unsigned long)xtime.tv_sec;
339 long next = LONG_MAX;
343 if (x->km.state == XFRM_STATE_DEAD)
345 if (x->km.state == XFRM_STATE_EXPIRED)
347 if (x->lft.hard_add_expires_seconds) {
348 long tmo = x->lft.hard_add_expires_seconds +
349 x->curlft.add_time - now;
355 if (x->lft.hard_use_expires_seconds) {
356 long tmo = x->lft.hard_use_expires_seconds +
357 (x->curlft.use_time ? : now) - now;
365 if (x->lft.soft_add_expires_seconds) {
366 long tmo = x->lft.soft_add_expires_seconds +
367 x->curlft.add_time - now;
373 if (x->lft.soft_use_expires_seconds) {
374 long tmo = x->lft.soft_use_expires_seconds +
375 (x->curlft.use_time ? : now) - now;
384 km_state_expired(x, 0, 0);
386 if (next != LONG_MAX &&
387 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
392 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
393 x->km.state = XFRM_STATE_EXPIRED;
398 if (!__xfrm_state_delete(x) && x->id.spi)
399 km_state_expired(x, 1, 0);
402 spin_unlock(&x->lock);
406 static void xfrm_replay_timer_handler(unsigned long data);
408 struct xfrm_state *xfrm_state_alloc(void)
410 struct xfrm_state *x;
412 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
415 atomic_set(&x->refcnt, 1);
416 atomic_set(&x->tunnel_users, 0);
417 INIT_HLIST_NODE(&x->bydst);
418 INIT_HLIST_NODE(&x->bysrc);
419 INIT_HLIST_NODE(&x->byspi);
420 init_timer(&x->timer);
421 x->timer.function = xfrm_timer_handler;
422 x->timer.data = (unsigned long)x;
423 init_timer(&x->rtimer);
424 x->rtimer.function = xfrm_replay_timer_handler;
425 x->rtimer.data = (unsigned long)x;
426 x->curlft.add_time = (unsigned long)xtime.tv_sec;
427 x->lft.soft_byte_limit = XFRM_INF;
428 x->lft.soft_packet_limit = XFRM_INF;
429 x->lft.hard_byte_limit = XFRM_INF;
430 x->lft.hard_packet_limit = XFRM_INF;
431 x->replay_maxage = 0;
432 x->replay_maxdiff = 0;
433 spin_lock_init(&x->lock);
437 EXPORT_SYMBOL(xfrm_state_alloc);
439 void __xfrm_state_destroy(struct xfrm_state *x)
441 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
443 spin_lock_bh(&xfrm_state_gc_lock);
444 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
445 spin_unlock_bh(&xfrm_state_gc_lock);
446 schedule_work(&xfrm_state_gc_work);
448 EXPORT_SYMBOL(__xfrm_state_destroy);
450 int __xfrm_state_delete(struct xfrm_state *x)
454 if (x->km.state != XFRM_STATE_DEAD) {
455 x->km.state = XFRM_STATE_DEAD;
456 spin_lock(&xfrm_state_lock);
457 hlist_del(&x->bydst);
459 hlist_del(&x->bysrc);
462 hlist_del(&x->byspi);
466 spin_unlock(&xfrm_state_lock);
467 if (del_timer(&x->timer))
469 if (del_timer(&x->rtimer))
472 /* The number two in this test is the reference
473 * mentioned in the comment below plus the reference
474 * our caller holds. A larger value means that
475 * there are DSTs attached to this xfrm_state.
477 if (atomic_read(&x->refcnt) > 2) {
478 xfrm_state_gc_flush_bundles = 1;
479 schedule_work(&xfrm_state_gc_work);
482 /* All xfrm_state objects are created by xfrm_state_alloc.
483 * The xfrm_state_alloc call gives a reference, and that
484 * is what we are dropping here.
492 EXPORT_SYMBOL(__xfrm_state_delete);
494 int xfrm_state_delete(struct xfrm_state *x)
498 spin_lock_bh(&x->lock);
499 err = __xfrm_state_delete(x);
500 spin_unlock_bh(&x->lock);
504 EXPORT_SYMBOL(xfrm_state_delete);
506 void xfrm_state_flush(u8 proto)
510 spin_lock_bh(&xfrm_state_lock);
511 for (i = 0; i < xfrm_state_hmask; i++) {
512 struct hlist_node *entry;
513 struct xfrm_state *x;
515 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
516 if (!xfrm_state_kern(x) &&
517 xfrm_id_proto_match(x->id.proto, proto)) {
519 spin_unlock_bh(&xfrm_state_lock);
521 xfrm_state_delete(x);
524 spin_lock_bh(&xfrm_state_lock);
529 spin_unlock_bh(&xfrm_state_lock);
532 EXPORT_SYMBOL(xfrm_state_flush);
535 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
536 struct xfrm_tmpl *tmpl,
537 xfrm_address_t *daddr, xfrm_address_t *saddr,
538 unsigned short family)
540 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
543 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
544 xfrm_state_put_afinfo(afinfo);
548 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
550 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
551 struct xfrm_state *x;
552 struct hlist_node *entry;
554 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
555 if (x->props.family != family ||
557 x->id.proto != proto)
562 if (x->id.daddr.a4 != daddr->a4)
566 if (!ipv6_addr_equal((struct in6_addr *)daddr,
580 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
582 unsigned int h = xfrm_src_hash(saddr, family);
583 struct xfrm_state *x;
584 struct hlist_node *entry;
586 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
587 if (x->props.family != family ||
588 x->id.proto != proto)
593 if (x->id.daddr.a4 != daddr->a4 ||
594 x->props.saddr.a4 != saddr->a4)
598 if (!ipv6_addr_equal((struct in6_addr *)daddr,
601 !ipv6_addr_equal((struct in6_addr *)saddr,
615 static inline struct xfrm_state *
616 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
619 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
620 x->id.proto, family);
622 return __xfrm_state_lookup_byaddr(&x->id.daddr,
624 x->id.proto, family);
628 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
629 struct flowi *fl, struct xfrm_tmpl *tmpl,
630 struct xfrm_policy *pol, int *err,
631 unsigned short family)
633 unsigned int h = xfrm_dst_hash(daddr, tmpl->reqid, family);
634 struct hlist_node *entry;
635 struct xfrm_state *x, *x0;
636 int acquire_in_progress = 0;
638 struct xfrm_state *best = NULL;
640 spin_lock_bh(&xfrm_state_lock);
641 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
642 if (x->props.family == family &&
643 x->props.reqid == tmpl->reqid &&
644 !(x->props.flags & XFRM_STATE_WILDRECV) &&
645 xfrm_state_addr_check(x, daddr, saddr, family) &&
646 tmpl->mode == x->props.mode &&
647 tmpl->id.proto == x->id.proto &&
648 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
650 1. There is a valid state with matching selector.
652 2. Valid state with inappropriate selector. Skip.
654 Entering area of "sysdeps".
656 3. If state is not valid, selector is temporary,
657 it selects only session which triggered
658 previous resolution. Key manager will do
659 something to install a state with proper
662 if (x->km.state == XFRM_STATE_VALID) {
663 if (!xfrm_selector_match(&x->sel, fl, family) ||
664 !security_xfrm_state_pol_flow_match(x, pol, fl))
667 best->km.dying > x->km.dying ||
668 (best->km.dying == x->km.dying &&
669 best->curlft.add_time < x->curlft.add_time))
671 } else if (x->km.state == XFRM_STATE_ACQ) {
672 acquire_in_progress = 1;
673 } else if (x->km.state == XFRM_STATE_ERROR ||
674 x->km.state == XFRM_STATE_EXPIRED) {
675 if (xfrm_selector_match(&x->sel, fl, family) &&
676 security_xfrm_state_pol_flow_match(x, pol, fl))
683 if (!x && !error && !acquire_in_progress) {
685 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
686 tmpl->id.proto, family)) != NULL) {
691 x = xfrm_state_alloc();
696 /* Initialize temporary selector matching only
697 * to current session. */
698 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
700 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
702 x->km.state = XFRM_STATE_DEAD;
708 if (km_query(x, tmpl, pol) == 0) {
709 x->km.state = XFRM_STATE_ACQ;
710 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
712 h = xfrm_src_hash(saddr, family);
713 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
716 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
717 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
720 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
722 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
723 add_timer(&x->timer);
725 x->km.state = XFRM_STATE_DEAD;
735 *err = acquire_in_progress ? -EAGAIN : error;
736 spin_unlock_bh(&xfrm_state_lock);
740 static void __xfrm_state_insert(struct xfrm_state *x)
744 x->genid = ++xfrm_state_genid;
746 h = xfrm_dst_hash(&x->id.daddr, x->props.reqid, x->props.family);
747 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
750 h = xfrm_src_hash(&x->props.saddr, x->props.family);
751 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
754 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
755 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
758 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
762 if (!mod_timer(&x->timer, jiffies + HZ))
765 if (x->replay_maxage &&
766 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
773 if (x->bydst.next != NULL &&
774 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
775 xfrm_state_num > xfrm_state_hmask)
776 schedule_work(&xfrm_hash_work);
779 void xfrm_state_insert(struct xfrm_state *x)
781 spin_lock_bh(&xfrm_state_lock);
782 __xfrm_state_insert(x);
783 spin_unlock_bh(&xfrm_state_lock);
785 xfrm_flush_all_bundles();
787 EXPORT_SYMBOL(xfrm_state_insert);
789 /* xfrm_state_lock is held */
790 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
792 unsigned int h = xfrm_dst_hash(daddr, reqid, family);
793 struct hlist_node *entry;
794 struct xfrm_state *x;
796 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
797 if (x->props.reqid != reqid ||
798 x->props.mode != mode ||
799 x->props.family != family ||
800 x->km.state != XFRM_STATE_ACQ ||
806 if (x->id.daddr.a4 != daddr->a4 ||
807 x->props.saddr.a4 != saddr->a4)
811 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
812 (struct in6_addr *)daddr) ||
813 !ipv6_addr_equal((struct in6_addr *)
815 (struct in6_addr *)saddr))
827 x = xfrm_state_alloc();
831 x->sel.daddr.a4 = daddr->a4;
832 x->sel.saddr.a4 = saddr->a4;
833 x->sel.prefixlen_d = 32;
834 x->sel.prefixlen_s = 32;
835 x->props.saddr.a4 = saddr->a4;
836 x->id.daddr.a4 = daddr->a4;
840 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
841 (struct in6_addr *)daddr);
842 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
843 (struct in6_addr *)saddr);
844 x->sel.prefixlen_d = 128;
845 x->sel.prefixlen_s = 128;
846 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
847 (struct in6_addr *)saddr);
848 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
849 (struct in6_addr *)daddr);
853 x->km.state = XFRM_STATE_ACQ;
855 x->props.family = family;
856 x->props.mode = mode;
857 x->props.reqid = reqid;
858 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
860 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
861 add_timer(&x->timer);
863 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
864 h = xfrm_src_hash(saddr, family);
866 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
873 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
875 int xfrm_state_add(struct xfrm_state *x)
877 struct xfrm_state *x1;
880 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
882 family = x->props.family;
884 spin_lock_bh(&xfrm_state_lock);
886 x1 = __xfrm_state_locate(x, use_spi, family);
894 if (use_spi && x->km.seq) {
895 x1 = __xfrm_find_acq_byseq(x->km.seq);
896 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
903 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
905 &x->id.daddr, &x->props.saddr, 0);
907 __xfrm_state_insert(x);
911 spin_unlock_bh(&xfrm_state_lock);
914 xfrm_flush_all_bundles();
917 xfrm_state_delete(x1);
923 EXPORT_SYMBOL(xfrm_state_add);
925 int xfrm_state_update(struct xfrm_state *x)
927 struct xfrm_state *x1;
929 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
931 spin_lock_bh(&xfrm_state_lock);
932 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
938 if (xfrm_state_kern(x1)) {
944 if (x1->km.state == XFRM_STATE_ACQ) {
945 __xfrm_state_insert(x);
951 spin_unlock_bh(&xfrm_state_lock);
957 xfrm_state_delete(x1);
963 spin_lock_bh(&x1->lock);
964 if (likely(x1->km.state == XFRM_STATE_VALID)) {
965 if (x->encap && x1->encap)
966 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
967 if (x->coaddr && x1->coaddr) {
968 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
970 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
971 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
972 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
975 if (!mod_timer(&x1->timer, jiffies + HZ))
977 if (x1->curlft.use_time)
978 xfrm_state_check_expire(x1);
982 spin_unlock_bh(&x1->lock);
988 EXPORT_SYMBOL(xfrm_state_update);
990 int xfrm_state_check_expire(struct xfrm_state *x)
992 if (!x->curlft.use_time)
993 x->curlft.use_time = (unsigned long)xtime.tv_sec;
995 if (x->km.state != XFRM_STATE_VALID)
998 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
999 x->curlft.packets >= x->lft.hard_packet_limit) {
1000 x->km.state = XFRM_STATE_EXPIRED;
1001 if (!mod_timer(&x->timer, jiffies))
1007 (x->curlft.bytes >= x->lft.soft_byte_limit ||
1008 x->curlft.packets >= x->lft.soft_packet_limit)) {
1010 km_state_expired(x, 0, 0);
1014 EXPORT_SYMBOL(xfrm_state_check_expire);
1016 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1018 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1019 - skb_headroom(skb);
1022 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1024 /* Check tail too... */
1028 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1030 int err = xfrm_state_check_expire(x);
1033 err = xfrm_state_check_space(x, skb);
1037 EXPORT_SYMBOL(xfrm_state_check);
1040 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1041 unsigned short family)
1043 struct xfrm_state *x;
1045 spin_lock_bh(&xfrm_state_lock);
1046 x = __xfrm_state_lookup(daddr, spi, proto, family);
1047 spin_unlock_bh(&xfrm_state_lock);
1050 EXPORT_SYMBOL(xfrm_state_lookup);
1053 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1054 u8 proto, unsigned short family)
1056 struct xfrm_state *x;
1058 spin_lock_bh(&xfrm_state_lock);
1059 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1060 spin_unlock_bh(&xfrm_state_lock);
1063 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1066 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1067 xfrm_address_t *daddr, xfrm_address_t *saddr,
1068 int create, unsigned short family)
1070 struct xfrm_state *x;
1072 spin_lock_bh(&xfrm_state_lock);
1073 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1074 spin_unlock_bh(&xfrm_state_lock);
1078 EXPORT_SYMBOL(xfrm_find_acq);
1080 #ifdef CONFIG_XFRM_SUB_POLICY
1082 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1083 unsigned short family)
1086 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1088 return -EAFNOSUPPORT;
1090 spin_lock_bh(&xfrm_state_lock);
1091 if (afinfo->tmpl_sort)
1092 err = afinfo->tmpl_sort(dst, src, n);
1093 spin_unlock_bh(&xfrm_state_lock);
1094 xfrm_state_put_afinfo(afinfo);
1097 EXPORT_SYMBOL(xfrm_tmpl_sort);
1100 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1101 unsigned short family)
1104 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1106 return -EAFNOSUPPORT;
1108 spin_lock_bh(&xfrm_state_lock);
1109 if (afinfo->state_sort)
1110 err = afinfo->state_sort(dst, src, n);
1111 spin_unlock_bh(&xfrm_state_lock);
1112 xfrm_state_put_afinfo(afinfo);
1115 EXPORT_SYMBOL(xfrm_state_sort);
1118 /* Silly enough, but I'm lazy to build resolution list */
1120 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1124 for (i = 0; i <= xfrm_state_hmask; i++) {
1125 struct hlist_node *entry;
1126 struct xfrm_state *x;
1128 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1129 if (x->km.seq == seq &&
1130 x->km.state == XFRM_STATE_ACQ) {
1139 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1141 struct xfrm_state *x;
1143 spin_lock_bh(&xfrm_state_lock);
1144 x = __xfrm_find_acq_byseq(seq);
1145 spin_unlock_bh(&xfrm_state_lock);
1148 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1150 u32 xfrm_get_acqseq(void)
1154 static DEFINE_SPINLOCK(acqseq_lock);
1156 spin_lock_bh(&acqseq_lock);
1157 res = (++acqseq ? : ++acqseq);
1158 spin_unlock_bh(&acqseq_lock);
1161 EXPORT_SYMBOL(xfrm_get_acqseq);
1164 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1167 struct xfrm_state *x0;
1172 if (minspi == maxspi) {
1173 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1181 minspi = ntohl(minspi);
1182 maxspi = ntohl(maxspi);
1183 for (h=0; h<maxspi-minspi+1; h++) {
1184 spi = minspi + net_random()%(maxspi-minspi+1);
1185 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1187 x->id.spi = htonl(spi);
1194 spin_lock_bh(&xfrm_state_lock);
1195 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1196 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1198 spin_unlock_bh(&xfrm_state_lock);
1202 EXPORT_SYMBOL(xfrm_alloc_spi);
1204 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1208 struct xfrm_state *x;
1209 struct hlist_node *entry;
1213 spin_lock_bh(&xfrm_state_lock);
1214 for (i = 0; i <= xfrm_state_hmask; i++) {
1215 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1216 if (xfrm_id_proto_match(x->id.proto, proto))
1225 for (i = 0; i <= xfrm_state_hmask; i++) {
1226 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1227 if (!xfrm_id_proto_match(x->id.proto, proto))
1229 err = func(x, --count, data);
1235 spin_unlock_bh(&xfrm_state_lock);
1238 EXPORT_SYMBOL(xfrm_state_walk);
1241 void xfrm_replay_notify(struct xfrm_state *x, int event)
1244 /* we send notify messages in case
1245 * 1. we updated on of the sequence numbers, and the seqno difference
1246 * is at least x->replay_maxdiff, in this case we also update the
1247 * timeout of our timer function
1248 * 2. if x->replay_maxage has elapsed since last update,
1249 * and there were changes
1251 * The state structure must be locked!
1255 case XFRM_REPLAY_UPDATE:
1256 if (x->replay_maxdiff &&
1257 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1258 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1259 if (x->xflags & XFRM_TIME_DEFER)
1260 event = XFRM_REPLAY_TIMEOUT;
1267 case XFRM_REPLAY_TIMEOUT:
1268 if ((x->replay.seq == x->preplay.seq) &&
1269 (x->replay.bitmap == x->preplay.bitmap) &&
1270 (x->replay.oseq == x->preplay.oseq)) {
1271 x->xflags |= XFRM_TIME_DEFER;
1278 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1279 c.event = XFRM_MSG_NEWAE;
1280 c.data.aevent = event;
1281 km_state_notify(x, &c);
1283 if (x->replay_maxage &&
1284 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1286 x->xflags &= ~XFRM_TIME_DEFER;
1289 EXPORT_SYMBOL(xfrm_replay_notify);
1291 static void xfrm_replay_timer_handler(unsigned long data)
1293 struct xfrm_state *x = (struct xfrm_state*)data;
1295 spin_lock(&x->lock);
1297 if (x->km.state == XFRM_STATE_VALID) {
1298 if (xfrm_aevent_is_on())
1299 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1301 x->xflags |= XFRM_TIME_DEFER;
1304 spin_unlock(&x->lock);
1308 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1314 if (unlikely(seq == 0))
1317 if (likely(seq > x->replay.seq))
1320 diff = x->replay.seq - seq;
1321 if (diff >= x->props.replay_window) {
1322 x->stats.replay_window++;
1326 if (x->replay.bitmap & (1U << diff)) {
1332 EXPORT_SYMBOL(xfrm_replay_check);
1334 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1340 if (seq > x->replay.seq) {
1341 diff = seq - x->replay.seq;
1342 if (diff < x->props.replay_window)
1343 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1345 x->replay.bitmap = 1;
1346 x->replay.seq = seq;
1348 diff = x->replay.seq - seq;
1349 x->replay.bitmap |= (1U << diff);
1352 if (xfrm_aevent_is_on())
1353 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1355 EXPORT_SYMBOL(xfrm_replay_advance);
1357 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1358 static DEFINE_RWLOCK(xfrm_km_lock);
1360 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1362 struct xfrm_mgr *km;
1364 read_lock(&xfrm_km_lock);
1365 list_for_each_entry(km, &xfrm_km_list, list)
1366 if (km->notify_policy)
1367 km->notify_policy(xp, dir, c);
1368 read_unlock(&xfrm_km_lock);
1371 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1373 struct xfrm_mgr *km;
1374 read_lock(&xfrm_km_lock);
1375 list_for_each_entry(km, &xfrm_km_list, list)
1378 read_unlock(&xfrm_km_lock);
1381 EXPORT_SYMBOL(km_policy_notify);
1382 EXPORT_SYMBOL(km_state_notify);
1384 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1390 c.event = XFRM_MSG_EXPIRE;
1391 km_state_notify(x, &c);
1397 EXPORT_SYMBOL(km_state_expired);
1399 * We send to all registered managers regardless of failure
1400 * We are happy with one success
1402 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1404 int err = -EINVAL, acqret;
1405 struct xfrm_mgr *km;
1407 read_lock(&xfrm_km_lock);
1408 list_for_each_entry(km, &xfrm_km_list, list) {
1409 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1413 read_unlock(&xfrm_km_lock);
1416 EXPORT_SYMBOL(km_query);
1418 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1421 struct xfrm_mgr *km;
1423 read_lock(&xfrm_km_lock);
1424 list_for_each_entry(km, &xfrm_km_list, list) {
1425 if (km->new_mapping)
1426 err = km->new_mapping(x, ipaddr, sport);
1430 read_unlock(&xfrm_km_lock);
1433 EXPORT_SYMBOL(km_new_mapping);
1435 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1441 c.event = XFRM_MSG_POLEXPIRE;
1442 km_policy_notify(pol, dir, &c);
1447 EXPORT_SYMBOL(km_policy_expired);
1449 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1453 struct xfrm_mgr *km;
1455 read_lock(&xfrm_km_lock);
1456 list_for_each_entry(km, &xfrm_km_list, list) {
1458 ret = km->report(proto, sel, addr);
1463 read_unlock(&xfrm_km_lock);
1466 EXPORT_SYMBOL(km_report);
1468 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1472 struct xfrm_mgr *km;
1473 struct xfrm_policy *pol = NULL;
1475 if (optlen <= 0 || optlen > PAGE_SIZE)
1478 data = kmalloc(optlen, GFP_KERNEL);
1483 if (copy_from_user(data, optval, optlen))
1487 read_lock(&xfrm_km_lock);
1488 list_for_each_entry(km, &xfrm_km_list, list) {
1489 pol = km->compile_policy(sk, optname, data,
1494 read_unlock(&xfrm_km_lock);
1497 xfrm_sk_policy_insert(sk, err, pol);
1506 EXPORT_SYMBOL(xfrm_user_policy);
1508 int xfrm_register_km(struct xfrm_mgr *km)
1510 write_lock_bh(&xfrm_km_lock);
1511 list_add_tail(&km->list, &xfrm_km_list);
1512 write_unlock_bh(&xfrm_km_lock);
1515 EXPORT_SYMBOL(xfrm_register_km);
1517 int xfrm_unregister_km(struct xfrm_mgr *km)
1519 write_lock_bh(&xfrm_km_lock);
1520 list_del(&km->list);
1521 write_unlock_bh(&xfrm_km_lock);
1524 EXPORT_SYMBOL(xfrm_unregister_km);
1526 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1529 if (unlikely(afinfo == NULL))
1531 if (unlikely(afinfo->family >= NPROTO))
1532 return -EAFNOSUPPORT;
1533 write_lock_bh(&xfrm_state_afinfo_lock);
1534 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1537 xfrm_state_afinfo[afinfo->family] = afinfo;
1538 write_unlock_bh(&xfrm_state_afinfo_lock);
1541 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1543 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1546 if (unlikely(afinfo == NULL))
1548 if (unlikely(afinfo->family >= NPROTO))
1549 return -EAFNOSUPPORT;
1550 write_lock_bh(&xfrm_state_afinfo_lock);
1551 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1552 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1555 xfrm_state_afinfo[afinfo->family] = NULL;
1557 write_unlock_bh(&xfrm_state_afinfo_lock);
1560 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1562 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1564 struct xfrm_state_afinfo *afinfo;
1565 if (unlikely(family >= NPROTO))
1567 read_lock(&xfrm_state_afinfo_lock);
1568 afinfo = xfrm_state_afinfo[family];
1569 if (unlikely(!afinfo))
1570 read_unlock(&xfrm_state_afinfo_lock);
1574 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1576 read_unlock(&xfrm_state_afinfo_lock);
1579 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1580 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1583 struct xfrm_state *t = x->tunnel;
1585 if (atomic_read(&t->tunnel_users) == 2)
1586 xfrm_state_delete(t);
1587 atomic_dec(&t->tunnel_users);
1592 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1595 * This function is NOT optimal. For example, with ESP it will give an
1596 * MTU that's usually two bytes short of being optimal. However, it will
1597 * usually give an answer that's a multiple of 4 provided the input is
1598 * also a multiple of 4.
1600 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1604 res -= x->props.header_len;
1612 spin_lock_bh(&x->lock);
1613 if (x->km.state == XFRM_STATE_VALID &&
1614 x->type && x->type->get_max_size)
1615 m = x->type->get_max_size(x, m);
1617 m += x->props.header_len;
1618 spin_unlock_bh(&x->lock);
1628 int xfrm_init_state(struct xfrm_state *x)
1630 struct xfrm_state_afinfo *afinfo;
1631 int family = x->props.family;
1634 err = -EAFNOSUPPORT;
1635 afinfo = xfrm_state_get_afinfo(family);
1640 if (afinfo->init_flags)
1641 err = afinfo->init_flags(x);
1643 xfrm_state_put_afinfo(afinfo);
1648 err = -EPROTONOSUPPORT;
1649 x->type = xfrm_get_type(x->id.proto, family);
1650 if (x->type == NULL)
1653 err = x->type->init_state(x);
1657 x->mode = xfrm_get_mode(x->props.mode, family);
1658 if (x->mode == NULL)
1661 x->km.state = XFRM_STATE_VALID;
1667 EXPORT_SYMBOL(xfrm_init_state);
1669 void __init xfrm_state_init(void)
1673 sz = sizeof(struct hlist_head) * 8;
1675 xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1676 xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1677 xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1678 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1679 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1680 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1682 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);