3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
75 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
76 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
77 struct request_sock *req);
79 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
80 static void __tcp_v6_send_check(struct sk_buff *skb,
81 const struct in6_addr *saddr,
82 const struct in6_addr *daddr);
84 static const struct inet_connection_sock_af_ops ipv6_mapped;
85 static const struct inet_connection_sock_af_ops ipv6_specific;
86 #ifdef CONFIG_TCP_MD5SIG
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
88 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
90 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
91 const struct in6_addr *addr)
97 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
99 struct dst_entry *dst = skb_dst(skb);
100 const struct rt6_info *rt = (const struct rt6_info *)dst;
104 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
106 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
109 static void tcp_v6_hash(struct sock *sk)
111 if (sk->sk_state != TCP_CLOSE) {
112 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
117 __inet6_hash(sk, NULL);
122 static __inline__ __sum16 tcp_v6_check(int len,
123 const struct in6_addr *saddr,
124 const struct in6_addr *daddr,
127 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
130 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
132 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
133 ipv6_hdr(skb)->saddr.s6_addr32,
135 tcp_hdr(skb)->source);
138 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
141 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
142 struct inet_sock *inet = inet_sk(sk);
143 struct inet_connection_sock *icsk = inet_csk(sk);
144 struct ipv6_pinfo *np = inet6_sk(sk);
145 struct tcp_sock *tp = tcp_sk(sk);
146 struct in6_addr *saddr = NULL, *final_p, final;
149 struct dst_entry *dst;
153 if (addr_len < SIN6_LEN_RFC2133)
156 if (usin->sin6_family != AF_INET6)
157 return -EAFNOSUPPORT;
159 memset(&fl6, 0, sizeof(fl6));
162 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
163 IP6_ECN_flow_init(fl6.flowlabel);
164 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
165 struct ip6_flowlabel *flowlabel;
166 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
167 if (flowlabel == NULL)
169 usin->sin6_addr = flowlabel->dst;
170 fl6_sock_release(flowlabel);
175 * connect() to INADDR_ANY means loopback (BSD'ism).
178 if(ipv6_addr_any(&usin->sin6_addr))
179 usin->sin6_addr.s6_addr[15] = 0x1;
181 addr_type = ipv6_addr_type(&usin->sin6_addr);
183 if(addr_type & IPV6_ADDR_MULTICAST)
186 if (addr_type&IPV6_ADDR_LINKLOCAL) {
187 if (addr_len >= sizeof(struct sockaddr_in6) &&
188 usin->sin6_scope_id) {
189 /* If interface is set while binding, indices
192 if (sk->sk_bound_dev_if &&
193 sk->sk_bound_dev_if != usin->sin6_scope_id)
196 sk->sk_bound_dev_if = usin->sin6_scope_id;
199 /* Connect to link-local address requires an interface */
200 if (!sk->sk_bound_dev_if)
204 if (tp->rx_opt.ts_recent_stamp &&
205 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
206 tp->rx_opt.ts_recent = 0;
207 tp->rx_opt.ts_recent_stamp = 0;
211 np->daddr = usin->sin6_addr;
212 np->flow_label = fl6.flowlabel;
218 if (addr_type == IPV6_ADDR_MAPPED) {
219 u32 exthdrlen = icsk->icsk_ext_hdr_len;
220 struct sockaddr_in sin;
222 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
224 if (__ipv6_only_sock(sk))
227 sin.sin_family = AF_INET;
228 sin.sin_port = usin->sin6_port;
229 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
231 icsk->icsk_af_ops = &ipv6_mapped;
232 sk->sk_backlog_rcv = tcp_v4_do_rcv;
233 #ifdef CONFIG_TCP_MD5SIG
234 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
237 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
240 icsk->icsk_ext_hdr_len = exthdrlen;
241 icsk->icsk_af_ops = &ipv6_specific;
242 sk->sk_backlog_rcv = tcp_v6_do_rcv;
243 #ifdef CONFIG_TCP_MD5SIG
244 tp->af_specific = &tcp_sock_ipv6_specific;
248 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
249 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
256 if (!ipv6_addr_any(&np->rcv_saddr))
257 saddr = &np->rcv_saddr;
259 fl6.flowi6_proto = IPPROTO_TCP;
260 fl6.daddr = np->daddr;
261 fl6.saddr = saddr ? *saddr : np->saddr;
262 fl6.flowi6_oif = sk->sk_bound_dev_if;
263 fl6.flowi6_mark = sk->sk_mark;
264 fl6.fl6_dport = usin->sin6_port;
265 fl6.fl6_sport = inet->inet_sport;
267 final_p = fl6_update_dst(&fl6, np->opt, &final);
269 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
271 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
279 np->rcv_saddr = *saddr;
282 /* set the source address */
284 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
286 sk->sk_gso_type = SKB_GSO_TCPV6;
287 __ip6_dst_store(sk, dst, NULL, NULL);
289 rt = (struct rt6_info *) dst;
290 if (tcp_death_row.sysctl_tw_recycle &&
291 !tp->rx_opt.ts_recent_stamp &&
292 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
293 tcp_fetch_timewait_stamp(sk, dst);
295 icsk->icsk_ext_hdr_len = 0;
297 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
300 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302 inet->inet_dport = usin->sin6_port;
304 tcp_set_state(sk, TCP_SYN_SENT);
305 err = inet6_hash_connect(&tcp_death_row, sk);
310 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
315 err = tcp_connect(sk);
322 tcp_set_state(sk, TCP_CLOSE);
325 inet->inet_dport = 0;
326 sk->sk_route_caps = 0;
330 static void tcp_v6_mtu_reduced(struct sock *sk)
332 struct dst_entry *dst;
334 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
337 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
341 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
342 tcp_sync_mss(sk, dst_mtu(dst));
343 tcp_simple_retransmit(sk);
347 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
348 u8 type, u8 code, int offset, __be32 info)
350 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
351 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
352 struct ipv6_pinfo *np;
357 struct net *net = dev_net(skb->dev);
359 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
360 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
363 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
368 if (sk->sk_state == TCP_TIME_WAIT) {
369 inet_twsk_put(inet_twsk(sk));
374 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
375 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
377 if (sk->sk_state == TCP_CLOSE)
380 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
381 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
386 seq = ntohl(th->seq);
387 if (sk->sk_state != TCP_LISTEN &&
388 !between(seq, tp->snd_una, tp->snd_nxt)) {
389 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
395 if (type == NDISC_REDIRECT) {
396 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
399 dst->ops->redirect(dst, sk, skb);
402 if (type == ICMPV6_PKT_TOOBIG) {
403 tp->mtu_info = ntohl(info);
404 if (!sock_owned_by_user(sk))
405 tcp_v6_mtu_reduced(sk);
406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
412 icmpv6_err_convert(type, code, &err);
414 /* Might be for an request_sock */
415 switch (sk->sk_state) {
416 struct request_sock *req, **prev;
418 if (sock_owned_by_user(sk))
421 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
422 &hdr->saddr, inet6_iif(skb));
426 /* ICMPs are not backlogged, hence we cannot get
427 * an established socket here.
429 WARN_ON(req->sk != NULL);
431 if (seq != tcp_rsk(req)->snt_isn) {
432 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
436 inet_csk_reqsk_queue_drop(sk, req, prev);
440 case TCP_SYN_RECV: /* Cannot happen.
441 It can, it SYNs are crossed. --ANK */
442 if (!sock_owned_by_user(sk)) {
444 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
448 sk->sk_err_soft = err;
452 if (!sock_owned_by_user(sk) && np->recverr) {
454 sk->sk_error_report(sk);
456 sk->sk_err_soft = err;
464 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
466 struct request_sock *req,
467 struct request_values *rvp,
470 struct inet6_request_sock *treq = inet6_rsk(req);
471 struct ipv6_pinfo *np = inet6_sk(sk);
472 struct sk_buff * skb;
475 /* First, grab a route. */
476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
479 skb = tcp_make_synack(sk, dst, req, rvp, NULL);
482 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
484 fl6->daddr = treq->rmt_addr;
485 skb_set_queue_mapping(skb, queue_mapping);
486 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
487 err = net_xmit_eval(err);
494 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
495 struct request_values *rvp)
500 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
502 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
506 static void tcp_v6_reqsk_destructor(struct request_sock *req)
508 kfree_skb(inet6_rsk(req)->pktopts);
511 #ifdef CONFIG_TCP_MD5SIG
512 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
513 const struct in6_addr *addr)
515 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
518 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
519 struct sock *addr_sk)
521 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
524 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
525 struct request_sock *req)
527 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
530 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
533 struct tcp_md5sig cmd;
534 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
536 if (optlen < sizeof(cmd))
539 if (copy_from_user(&cmd, optval, sizeof(cmd)))
542 if (sin6->sin6_family != AF_INET6)
545 if (!cmd.tcpm_keylen) {
546 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
547 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
549 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
553 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
556 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
557 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
558 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
560 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
561 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
564 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
565 const struct in6_addr *daddr,
566 const struct in6_addr *saddr, int nbytes)
568 struct tcp6_pseudohdr *bp;
569 struct scatterlist sg;
571 bp = &hp->md5_blk.ip6;
572 /* 1. TCP pseudo-header (RFC2460) */
575 bp->protocol = cpu_to_be32(IPPROTO_TCP);
576 bp->len = cpu_to_be32(nbytes);
578 sg_init_one(&sg, bp, sizeof(*bp));
579 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
582 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
583 const struct in6_addr *daddr, struct in6_addr *saddr,
584 const struct tcphdr *th)
586 struct tcp_md5sig_pool *hp;
587 struct hash_desc *desc;
589 hp = tcp_get_md5sig_pool();
591 goto clear_hash_noput;
592 desc = &hp->md5_desc;
594 if (crypto_hash_init(desc))
596 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
598 if (tcp_md5_hash_header(hp, th))
600 if (tcp_md5_hash_key(hp, key))
602 if (crypto_hash_final(desc, md5_hash))
605 tcp_put_md5sig_pool();
609 tcp_put_md5sig_pool();
611 memset(md5_hash, 0, 16);
615 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
616 const struct sock *sk,
617 const struct request_sock *req,
618 const struct sk_buff *skb)
620 const struct in6_addr *saddr, *daddr;
621 struct tcp_md5sig_pool *hp;
622 struct hash_desc *desc;
623 const struct tcphdr *th = tcp_hdr(skb);
626 saddr = &inet6_sk(sk)->saddr;
627 daddr = &inet6_sk(sk)->daddr;
629 saddr = &inet6_rsk(req)->loc_addr;
630 daddr = &inet6_rsk(req)->rmt_addr;
632 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
633 saddr = &ip6h->saddr;
634 daddr = &ip6h->daddr;
637 hp = tcp_get_md5sig_pool();
639 goto clear_hash_noput;
640 desc = &hp->md5_desc;
642 if (crypto_hash_init(desc))
645 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
647 if (tcp_md5_hash_header(hp, th))
649 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
651 if (tcp_md5_hash_key(hp, key))
653 if (crypto_hash_final(desc, md5_hash))
656 tcp_put_md5sig_pool();
660 tcp_put_md5sig_pool();
662 memset(md5_hash, 0, 16);
666 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
668 const __u8 *hash_location = NULL;
669 struct tcp_md5sig_key *hash_expected;
670 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
671 const struct tcphdr *th = tcp_hdr(skb);
675 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
676 hash_location = tcp_parse_md5sig_option(th);
678 /* We've parsed the options - do we have a hash? */
679 if (!hash_expected && !hash_location)
682 if (hash_expected && !hash_location) {
683 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
687 if (!hash_expected && hash_location) {
688 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
692 /* check the signature */
693 genhash = tcp_v6_md5_hash_skb(newhash,
697 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
698 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
699 genhash ? "failed" : "mismatch",
700 &ip6h->saddr, ntohs(th->source),
701 &ip6h->daddr, ntohs(th->dest));
708 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
710 .obj_size = sizeof(struct tcp6_request_sock),
711 .rtx_syn_ack = tcp_v6_rtx_synack,
712 .send_ack = tcp_v6_reqsk_send_ack,
713 .destructor = tcp_v6_reqsk_destructor,
714 .send_reset = tcp_v6_send_reset,
715 .syn_ack_timeout = tcp_syn_ack_timeout,
718 #ifdef CONFIG_TCP_MD5SIG
719 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
720 .md5_lookup = tcp_v6_reqsk_md5_lookup,
721 .calc_md5_hash = tcp_v6_md5_hash_skb,
725 static void __tcp_v6_send_check(struct sk_buff *skb,
726 const struct in6_addr *saddr, const struct in6_addr *daddr)
728 struct tcphdr *th = tcp_hdr(skb);
730 if (skb->ip_summed == CHECKSUM_PARTIAL) {
731 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
732 skb->csum_start = skb_transport_header(skb) - skb->head;
733 skb->csum_offset = offsetof(struct tcphdr, check);
735 th->check = tcp_v6_check(skb->len, saddr, daddr,
736 csum_partial(th, th->doff << 2,
741 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
743 struct ipv6_pinfo *np = inet6_sk(sk);
745 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
748 static int tcp_v6_gso_send_check(struct sk_buff *skb)
750 const struct ipv6hdr *ipv6h;
753 if (!pskb_may_pull(skb, sizeof(*th)))
756 ipv6h = ipv6_hdr(skb);
760 skb->ip_summed = CHECKSUM_PARTIAL;
761 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
765 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
768 const struct ipv6hdr *iph = skb_gro_network_header(skb);
772 switch (skb->ip_summed) {
773 case CHECKSUM_COMPLETE:
774 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
776 skb->ip_summed = CHECKSUM_UNNECESSARY;
780 NAPI_GRO_CB(skb)->flush = 1;
784 wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
787 sum = csum_fold(skb_checksum(skb,
794 skb->ip_summed = CHECKSUM_UNNECESSARY;
798 return tcp_gro_receive(head, skb);
801 static int tcp6_gro_complete(struct sk_buff *skb)
803 const struct ipv6hdr *iph = ipv6_hdr(skb);
804 struct tcphdr *th = tcp_hdr(skb);
806 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
807 &iph->saddr, &iph->daddr, 0);
808 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
810 return tcp_gro_complete(skb);
813 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
814 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
816 const struct tcphdr *th = tcp_hdr(skb);
818 struct sk_buff *buff;
820 struct net *net = dev_net(skb_dst(skb)->dev);
821 struct sock *ctl_sk = net->ipv6.tcp_sk;
822 unsigned int tot_len = sizeof(struct tcphdr);
823 struct dst_entry *dst;
827 tot_len += TCPOLEN_TSTAMP_ALIGNED;
828 #ifdef CONFIG_TCP_MD5SIG
830 tot_len += TCPOLEN_MD5SIG_ALIGNED;
833 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
838 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
840 t1 = (struct tcphdr *) skb_push(buff, tot_len);
841 skb_reset_transport_header(buff);
843 /* Swap the send and the receive. */
844 memset(t1, 0, sizeof(*t1));
845 t1->dest = th->source;
846 t1->source = th->dest;
847 t1->doff = tot_len / 4;
848 t1->seq = htonl(seq);
849 t1->ack_seq = htonl(ack);
850 t1->ack = !rst || !th->ack;
852 t1->window = htons(win);
854 topt = (__be32 *)(t1 + 1);
857 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
858 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
859 *topt++ = htonl(tcp_time_stamp);
863 #ifdef CONFIG_TCP_MD5SIG
865 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
866 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
867 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
868 &ipv6_hdr(skb)->saddr,
869 &ipv6_hdr(skb)->daddr, t1);
873 memset(&fl6, 0, sizeof(fl6));
874 fl6.daddr = ipv6_hdr(skb)->saddr;
875 fl6.saddr = ipv6_hdr(skb)->daddr;
877 buff->ip_summed = CHECKSUM_PARTIAL;
880 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
882 fl6.flowi6_proto = IPPROTO_TCP;
883 if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
884 fl6.flowi6_oif = inet6_iif(skb);
885 fl6.fl6_dport = t1->dest;
886 fl6.fl6_sport = t1->source;
887 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
889 /* Pass a socket to ip6_dst_lookup either it is for RST
890 * Underlying function will use this to retrieve the network
893 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
895 skb_dst_set(buff, dst);
896 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
897 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
899 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
906 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
908 const struct tcphdr *th = tcp_hdr(skb);
909 u32 seq = 0, ack_seq = 0;
910 struct tcp_md5sig_key *key = NULL;
911 #ifdef CONFIG_TCP_MD5SIG
912 const __u8 *hash_location = NULL;
913 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
914 unsigned char newhash[16];
916 struct sock *sk1 = NULL;
922 if (!ipv6_unicast_destination(skb))
925 #ifdef CONFIG_TCP_MD5SIG
926 hash_location = tcp_parse_md5sig_option(th);
927 if (!sk && hash_location) {
929 * active side is lost. Try to find listening socket through
930 * source port, and then find md5 key through listening socket.
931 * we are not loose security here:
932 * Incoming packet is checked with md5 hash with finding key,
933 * no RST generated if md5 hash doesn't match.
935 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
936 &tcp_hashinfo, &ipv6h->daddr,
937 ntohs(th->source), inet6_iif(skb));
942 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
946 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
947 if (genhash || memcmp(hash_location, newhash, 16) != 0)
950 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
955 seq = ntohl(th->ack_seq);
957 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
960 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
962 #ifdef CONFIG_TCP_MD5SIG
971 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
972 struct tcp_md5sig_key *key, u8 tclass)
974 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
977 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
979 struct inet_timewait_sock *tw = inet_twsk(sk);
980 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
982 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
983 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
984 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
990 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
991 struct request_sock *req)
993 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
994 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
998 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1000 struct request_sock *req, **prev;
1001 const struct tcphdr *th = tcp_hdr(skb);
1004 /* Find possible connection requests. */
1005 req = inet6_csk_search_req(sk, &prev, th->source,
1006 &ipv6_hdr(skb)->saddr,
1007 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1009 return tcp_check_req(sk, skb, req, prev, false);
1011 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1012 &ipv6_hdr(skb)->saddr, th->source,
1013 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1016 if (nsk->sk_state != TCP_TIME_WAIT) {
1020 inet_twsk_put(inet_twsk(nsk));
1024 #ifdef CONFIG_SYN_COOKIES
1026 sk = cookie_v6_check(sk, skb);
1031 /* FIXME: this is substantially similar to the ipv4 code.
1032 * Can some kind of merge be done? -- erics
1034 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1036 struct tcp_extend_values tmp_ext;
1037 struct tcp_options_received tmp_opt;
1038 const u8 *hash_location;
1039 struct request_sock *req;
1040 struct inet6_request_sock *treq;
1041 struct ipv6_pinfo *np = inet6_sk(sk);
1042 struct tcp_sock *tp = tcp_sk(sk);
1043 __u32 isn = TCP_SKB_CB(skb)->when;
1044 struct dst_entry *dst = NULL;
1046 bool want_cookie = false;
1048 if (skb->protocol == htons(ETH_P_IP))
1049 return tcp_v4_conn_request(sk, skb);
1051 if (!ipv6_unicast_destination(skb))
1054 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1055 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
1060 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1063 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1067 #ifdef CONFIG_TCP_MD5SIG
1068 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1071 tcp_clear_options(&tmp_opt);
1072 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1073 tmp_opt.user_mss = tp->rx_opt.user_mss;
1074 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
1076 if (tmp_opt.cookie_plus > 0 &&
1077 tmp_opt.saw_tstamp &&
1078 !tp->rx_opt.cookie_out_never &&
1079 (sysctl_tcp_cookie_size > 0 ||
1080 (tp->cookie_values != NULL &&
1081 tp->cookie_values->cookie_desired > 0))) {
1084 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1085 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1087 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1090 /* Secret recipe starts with IP addresses */
1091 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1096 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1102 /* plus variable length Initiator Cookie */
1105 *c++ ^= *hash_location++;
1107 want_cookie = false; /* not our kind of cookie */
1108 tmp_ext.cookie_out_never = 0; /* false */
1109 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1110 } else if (!tp->rx_opt.cookie_in_always) {
1111 /* redundant indications, but ensure initialization. */
1112 tmp_ext.cookie_out_never = 1; /* true */
1113 tmp_ext.cookie_plus = 0;
1117 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1119 if (want_cookie && !tmp_opt.saw_tstamp)
1120 tcp_clear_options(&tmp_opt);
1122 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1123 tcp_openreq_init(req, &tmp_opt, skb);
1125 treq = inet6_rsk(req);
1126 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1127 treq->loc_addr = ipv6_hdr(skb)->daddr;
1128 if (!want_cookie || tmp_opt.tstamp_ok)
1129 TCP_ECN_create_request(req, skb);
1131 treq->iif = sk->sk_bound_dev_if;
1133 /* So that link locals have meaning */
1134 if (!sk->sk_bound_dev_if &&
1135 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1136 treq->iif = inet6_iif(skb);
1139 if (ipv6_opt_accepted(sk, skb) ||
1140 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1141 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1142 atomic_inc(&skb->users);
1143 treq->pktopts = skb;
1147 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1148 req->cookie_ts = tmp_opt.tstamp_ok;
1152 /* VJ's idea. We save last timestamp seen
1153 * from the destination in peer table, when entering
1154 * state TIME-WAIT, and check against it before
1155 * accepting new connection request.
1157 * If "isn" is not zero, this request hit alive
1158 * timewait bucket, so that all the necessary checks
1159 * are made in the function processing timewait state.
1161 if (tmp_opt.saw_tstamp &&
1162 tcp_death_row.sysctl_tw_recycle &&
1163 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1164 if (!tcp_peer_is_proven(req, dst, true)) {
1165 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1166 goto drop_and_release;
1169 /* Kill the following clause, if you dislike this way. */
1170 else if (!sysctl_tcp_syncookies &&
1171 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1172 (sysctl_max_syn_backlog >> 2)) &&
1173 !tcp_peer_is_proven(req, dst, false)) {
1174 /* Without syncookies last quarter of
1175 * backlog is filled with destinations,
1176 * proven to be alive.
1177 * It means that we continue to communicate
1178 * to destinations, already remembered
1179 * to the moment of synflood.
1181 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1182 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1183 goto drop_and_release;
1186 isn = tcp_v6_init_sequence(skb);
1189 tcp_rsk(req)->snt_isn = isn;
1191 if (security_inet_conn_request(sk, skb, req))
1192 goto drop_and_release;
1194 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1195 (struct request_values *)&tmp_ext,
1196 skb_get_queue_mapping(skb)) ||
1200 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1201 tcp_rsk(req)->listener = NULL;
1202 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1210 return 0; /* don't send reset */
1213 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1214 struct request_sock *req,
1215 struct dst_entry *dst)
1217 struct inet6_request_sock *treq;
1218 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1219 struct tcp6_sock *newtcp6sk;
1220 struct inet_sock *newinet;
1221 struct tcp_sock *newtp;
1223 #ifdef CONFIG_TCP_MD5SIG
1224 struct tcp_md5sig_key *key;
1228 if (skb->protocol == htons(ETH_P_IP)) {
1233 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1238 newtcp6sk = (struct tcp6_sock *)newsk;
1239 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1241 newinet = inet_sk(newsk);
1242 newnp = inet6_sk(newsk);
1243 newtp = tcp_sk(newsk);
1245 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1247 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1249 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1251 newnp->rcv_saddr = newnp->saddr;
1253 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1254 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1255 #ifdef CONFIG_TCP_MD5SIG
1256 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1259 newnp->ipv6_ac_list = NULL;
1260 newnp->ipv6_fl_list = NULL;
1261 newnp->pktoptions = NULL;
1263 newnp->mcast_oif = inet6_iif(skb);
1264 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1265 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1268 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1269 * here, tcp_create_openreq_child now does this for us, see the comment in
1270 * that function for the gory details. -acme
1273 /* It is tricky place. Until this moment IPv4 tcp
1274 worked with IPv6 icsk.icsk_af_ops.
1277 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1282 treq = inet6_rsk(req);
1284 if (sk_acceptq_is_full(sk))
1288 dst = inet6_csk_route_req(sk, &fl6, req);
1293 newsk = tcp_create_openreq_child(sk, req, skb);
1298 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1299 * count here, tcp_create_openreq_child now does this for us, see the
1300 * comment in that function for the gory details. -acme
1303 newsk->sk_gso_type = SKB_GSO_TCPV6;
1304 __ip6_dst_store(newsk, dst, NULL, NULL);
1305 inet6_sk_rx_dst_set(newsk, skb);
1307 newtcp6sk = (struct tcp6_sock *)newsk;
1308 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1310 newtp = tcp_sk(newsk);
1311 newinet = inet_sk(newsk);
1312 newnp = inet6_sk(newsk);
1314 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1316 newnp->daddr = treq->rmt_addr;
1317 newnp->saddr = treq->loc_addr;
1318 newnp->rcv_saddr = treq->loc_addr;
1319 newsk->sk_bound_dev_if = treq->iif;
1321 /* Now IPv6 options...
1323 First: no IPv4 options.
1325 newinet->inet_opt = NULL;
1326 newnp->ipv6_ac_list = NULL;
1327 newnp->ipv6_fl_list = NULL;
1330 newnp->rxopt.all = np->rxopt.all;
1332 /* Clone pktoptions received with SYN */
1333 newnp->pktoptions = NULL;
1334 if (treq->pktopts != NULL) {
1335 newnp->pktoptions = skb_clone(treq->pktopts,
1336 sk_gfp_atomic(sk, GFP_ATOMIC));
1337 consume_skb(treq->pktopts);
1338 treq->pktopts = NULL;
1339 if (newnp->pktoptions)
1340 skb_set_owner_r(newnp->pktoptions, newsk);
1343 newnp->mcast_oif = inet6_iif(skb);
1344 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1345 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1347 /* Clone native IPv6 options from listening socket (if any)
1349 Yes, keeping reference count would be much more clever,
1350 but we make one more one thing there: reattach optmem
1354 newnp->opt = ipv6_dup_options(newsk, np->opt);
1356 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1358 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1359 newnp->opt->opt_flen);
1361 tcp_mtup_init(newsk);
1362 tcp_sync_mss(newsk, dst_mtu(dst));
1363 newtp->advmss = dst_metric_advmss(dst);
1364 if (tcp_sk(sk)->rx_opt.user_mss &&
1365 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1366 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1368 tcp_initialize_rcv_mss(newsk);
1369 tcp_synack_rtt_meas(newsk, req);
1370 newtp->total_retrans = req->num_retrans;
1372 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1373 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1375 #ifdef CONFIG_TCP_MD5SIG
1376 /* Copy over the MD5 key from the original socket */
1377 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1378 /* We're using one, so create a matching key
1379 * on the newsk structure. If we fail to get
1380 * memory, then we end up not copying the key
1383 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1384 AF_INET6, key->key, key->keylen,
1385 sk_gfp_atomic(sk, GFP_ATOMIC));
1389 if (__inet_inherit_port(sk, newsk) < 0) {
1393 __inet6_hash(newsk, NULL);
1398 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1402 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1406 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1408 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1409 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1410 &ipv6_hdr(skb)->daddr, skb->csum)) {
1411 skb->ip_summed = CHECKSUM_UNNECESSARY;
1416 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1417 &ipv6_hdr(skb)->saddr,
1418 &ipv6_hdr(skb)->daddr, 0));
1420 if (skb->len <= 76) {
1421 return __skb_checksum_complete(skb);
1426 /* The socket must have it's spinlock held when we get
1429 * We have a potential double-lock case here, so even when
1430 * doing backlog processing we use the BH locking scheme.
1431 * This is because we cannot sleep with the original spinlock
1434 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1436 struct ipv6_pinfo *np = inet6_sk(sk);
1437 struct tcp_sock *tp;
1438 struct sk_buff *opt_skb = NULL;
1440 /* Imagine: socket is IPv6. IPv4 packet arrives,
1441 goes to IPv4 receive handler and backlogged.
1442 From backlog it always goes here. Kerboom...
1443 Fortunately, tcp_rcv_established and rcv_established
1444 handle them correctly, but it is not case with
1445 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1448 if (skb->protocol == htons(ETH_P_IP))
1449 return tcp_v4_do_rcv(sk, skb);
1451 #ifdef CONFIG_TCP_MD5SIG
1452 if (tcp_v6_inbound_md5_hash (sk, skb))
1456 if (sk_filter(sk, skb))
1460 * socket locking is here for SMP purposes as backlog rcv
1461 * is currently called with bh processing disabled.
1464 /* Do Stevens' IPV6_PKTOPTIONS.
1466 Yes, guys, it is the only place in our code, where we
1467 may make it not affecting IPv4.
1468 The rest of code is protocol independent,
1469 and I do not like idea to uglify IPv4.
1471 Actually, all the idea behind IPV6_PKTOPTIONS
1472 looks not very well thought. For now we latch
1473 options, received in the last packet, enqueued
1474 by tcp. Feel free to propose better solution.
1478 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1480 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1481 struct dst_entry *dst = sk->sk_rx_dst;
1483 sock_rps_save_rxhash(sk, skb);
1485 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1486 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1488 sk->sk_rx_dst = NULL;
1492 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1495 goto ipv6_pktoptions;
1499 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1502 if (sk->sk_state == TCP_LISTEN) {
1503 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1508 * Queue it on the new socket if the new socket is active,
1509 * otherwise we just shortcircuit this and continue with
1513 sock_rps_save_rxhash(nsk, skb);
1514 if (tcp_child_process(sk, nsk, skb))
1517 __kfree_skb(opt_skb);
1521 sock_rps_save_rxhash(sk, skb);
1523 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1526 goto ipv6_pktoptions;
1530 tcp_v6_send_reset(sk, skb);
1533 __kfree_skb(opt_skb);
1537 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1542 /* Do you ask, what is it?
1544 1. skb was enqueued by tcp.
1545 2. skb is added to tail of read queue, rather than out of order.
1546 3. socket is not in passive state.
1547 4. Finally, it really contains options, which user wants to receive.
1550 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1551 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1552 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1553 np->mcast_oif = inet6_iif(opt_skb);
1554 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1555 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1556 if (np->rxopt.bits.rxtclass)
1557 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1558 if (ipv6_opt_accepted(sk, opt_skb)) {
1559 skb_set_owner_r(opt_skb, sk);
1560 opt_skb = xchg(&np->pktoptions, opt_skb);
1562 __kfree_skb(opt_skb);
1563 opt_skb = xchg(&np->pktoptions, NULL);
1571 static int tcp_v6_rcv(struct sk_buff *skb)
1573 const struct tcphdr *th;
1574 const struct ipv6hdr *hdr;
1577 struct net *net = dev_net(skb->dev);
1579 if (skb->pkt_type != PACKET_HOST)
1583 * Count it even if it's bad.
1585 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1587 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1592 if (th->doff < sizeof(struct tcphdr)/4)
1594 if (!pskb_may_pull(skb, th->doff*4))
1597 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1601 hdr = ipv6_hdr(skb);
1602 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1603 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1604 skb->len - th->doff*4);
1605 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1606 TCP_SKB_CB(skb)->when = 0;
1607 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1608 TCP_SKB_CB(skb)->sacked = 0;
1610 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1615 if (sk->sk_state == TCP_TIME_WAIT)
1618 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1619 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1620 goto discard_and_relse;
1623 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1624 goto discard_and_relse;
1626 if (sk_filter(sk, skb))
1627 goto discard_and_relse;
1631 bh_lock_sock_nested(sk);
1633 if (!sock_owned_by_user(sk)) {
1634 #ifdef CONFIG_NET_DMA
1635 struct tcp_sock *tp = tcp_sk(sk);
1636 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1637 tp->ucopy.dma_chan = net_dma_find_channel();
1638 if (tp->ucopy.dma_chan)
1639 ret = tcp_v6_do_rcv(sk, skb);
1643 if (!tcp_prequeue(sk, skb))
1644 ret = tcp_v6_do_rcv(sk, skb);
1646 } else if (unlikely(sk_add_backlog(sk, skb,
1647 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1649 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1650 goto discard_and_relse;
1655 return ret ? -1 : 0;
1658 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1661 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1663 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1665 tcp_v6_send_reset(NULL, skb);
1682 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1683 inet_twsk_put(inet_twsk(sk));
1687 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1688 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1689 inet_twsk_put(inet_twsk(sk));
1693 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1698 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1699 &ipv6_hdr(skb)->daddr,
1700 ntohs(th->dest), inet6_iif(skb));
1702 struct inet_timewait_sock *tw = inet_twsk(sk);
1703 inet_twsk_deschedule(tw, &tcp_death_row);
1708 /* Fall through to ACK */
1711 tcp_v6_timewait_ack(sk, skb);
1715 case TCP_TW_SUCCESS:;
1720 static void tcp_v6_early_demux(struct sk_buff *skb)
1722 const struct ipv6hdr *hdr;
1723 const struct tcphdr *th;
1726 if (skb->pkt_type != PACKET_HOST)
1729 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1732 hdr = ipv6_hdr(skb);
1735 if (th->doff < sizeof(struct tcphdr) / 4)
1738 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1739 &hdr->saddr, th->source,
1740 &hdr->daddr, ntohs(th->dest),
1744 skb->destructor = sock_edemux;
1745 if (sk->sk_state != TCP_TIME_WAIT) {
1746 struct dst_entry *dst = sk->sk_rx_dst;
1749 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1751 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1752 skb_dst_set_noref(skb, dst);
1757 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1758 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1759 .twsk_unique = tcp_twsk_unique,
1760 .twsk_destructor= tcp_twsk_destructor,
1763 static const struct inet_connection_sock_af_ops ipv6_specific = {
1764 .queue_xmit = inet6_csk_xmit,
1765 .send_check = tcp_v6_send_check,
1766 .rebuild_header = inet6_sk_rebuild_header,
1767 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1768 .conn_request = tcp_v6_conn_request,
1769 .syn_recv_sock = tcp_v6_syn_recv_sock,
1770 .net_header_len = sizeof(struct ipv6hdr),
1771 .net_frag_header_len = sizeof(struct frag_hdr),
1772 .setsockopt = ipv6_setsockopt,
1773 .getsockopt = ipv6_getsockopt,
1774 .addr2sockaddr = inet6_csk_addr2sockaddr,
1775 .sockaddr_len = sizeof(struct sockaddr_in6),
1776 .bind_conflict = inet6_csk_bind_conflict,
1777 #ifdef CONFIG_COMPAT
1778 .compat_setsockopt = compat_ipv6_setsockopt,
1779 .compat_getsockopt = compat_ipv6_getsockopt,
1783 #ifdef CONFIG_TCP_MD5SIG
1784 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1785 .md5_lookup = tcp_v6_md5_lookup,
1786 .calc_md5_hash = tcp_v6_md5_hash_skb,
1787 .md5_parse = tcp_v6_parse_md5_keys,
1792 * TCP over IPv4 via INET6 API
1795 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1796 .queue_xmit = ip_queue_xmit,
1797 .send_check = tcp_v4_send_check,
1798 .rebuild_header = inet_sk_rebuild_header,
1799 .sk_rx_dst_set = inet_sk_rx_dst_set,
1800 .conn_request = tcp_v6_conn_request,
1801 .syn_recv_sock = tcp_v6_syn_recv_sock,
1802 .net_header_len = sizeof(struct iphdr),
1803 .setsockopt = ipv6_setsockopt,
1804 .getsockopt = ipv6_getsockopt,
1805 .addr2sockaddr = inet6_csk_addr2sockaddr,
1806 .sockaddr_len = sizeof(struct sockaddr_in6),
1807 .bind_conflict = inet6_csk_bind_conflict,
1808 #ifdef CONFIG_COMPAT
1809 .compat_setsockopt = compat_ipv6_setsockopt,
1810 .compat_getsockopt = compat_ipv6_getsockopt,
1814 #ifdef CONFIG_TCP_MD5SIG
1815 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1816 .md5_lookup = tcp_v4_md5_lookup,
1817 .calc_md5_hash = tcp_v4_md5_hash_skb,
1818 .md5_parse = tcp_v6_parse_md5_keys,
1822 /* NOTE: A lot of things set to zero explicitly by call to
1823 * sk_alloc() so need not be done here.
1825 static int tcp_v6_init_sock(struct sock *sk)
1827 struct inet_connection_sock *icsk = inet_csk(sk);
1831 icsk->icsk_af_ops = &ipv6_specific;
1833 #ifdef CONFIG_TCP_MD5SIG
1834 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1840 static void tcp_v6_destroy_sock(struct sock *sk)
1842 tcp_v4_destroy_sock(sk);
1843 inet6_destroy_sock(sk);
1846 #ifdef CONFIG_PROC_FS
1847 /* Proc filesystem TCPv6 sock list dumping. */
1848 static void get_openreq6(struct seq_file *seq,
1849 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1851 int ttd = req->expires - jiffies;
1852 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1853 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1859 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1860 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1862 src->s6_addr32[0], src->s6_addr32[1],
1863 src->s6_addr32[2], src->s6_addr32[3],
1864 ntohs(inet_rsk(req)->loc_port),
1865 dest->s6_addr32[0], dest->s6_addr32[1],
1866 dest->s6_addr32[2], dest->s6_addr32[3],
1867 ntohs(inet_rsk(req)->rmt_port),
1869 0,0, /* could print option size, but that is af dependent. */
1870 1, /* timers active (only the expire timer) */
1871 jiffies_to_clock_t(ttd),
1873 from_kuid_munged(seq_user_ns(seq), uid),
1874 0, /* non standard timer */
1875 0, /* open_requests have no inode */
1879 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1881 const struct in6_addr *dest, *src;
1884 unsigned long timer_expires;
1885 const struct inet_sock *inet = inet_sk(sp);
1886 const struct tcp_sock *tp = tcp_sk(sp);
1887 const struct inet_connection_sock *icsk = inet_csk(sp);
1888 const struct ipv6_pinfo *np = inet6_sk(sp);
1891 src = &np->rcv_saddr;
1892 destp = ntohs(inet->inet_dport);
1893 srcp = ntohs(inet->inet_sport);
1895 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1897 timer_expires = icsk->icsk_timeout;
1898 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1900 timer_expires = icsk->icsk_timeout;
1901 } else if (timer_pending(&sp->sk_timer)) {
1903 timer_expires = sp->sk_timer.expires;
1906 timer_expires = jiffies;
1910 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1911 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1913 src->s6_addr32[0], src->s6_addr32[1],
1914 src->s6_addr32[2], src->s6_addr32[3], srcp,
1915 dest->s6_addr32[0], dest->s6_addr32[1],
1916 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1918 tp->write_seq-tp->snd_una,
1919 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1921 jiffies_delta_to_clock_t(timer_expires - jiffies),
1922 icsk->icsk_retransmits,
1923 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1924 icsk->icsk_probes_out,
1926 atomic_read(&sp->sk_refcnt), sp,
1927 jiffies_to_clock_t(icsk->icsk_rto),
1928 jiffies_to_clock_t(icsk->icsk_ack.ato),
1929 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1931 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1935 static void get_timewait6_sock(struct seq_file *seq,
1936 struct inet_timewait_sock *tw, int i)
1938 const struct in6_addr *dest, *src;
1940 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1941 long delta = tw->tw_ttd - jiffies;
1943 dest = &tw6->tw_v6_daddr;
1944 src = &tw6->tw_v6_rcv_saddr;
1945 destp = ntohs(tw->tw_dport);
1946 srcp = ntohs(tw->tw_sport);
1949 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1950 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1952 src->s6_addr32[0], src->s6_addr32[1],
1953 src->s6_addr32[2], src->s6_addr32[3], srcp,
1954 dest->s6_addr32[0], dest->s6_addr32[1],
1955 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1956 tw->tw_substate, 0, 0,
1957 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1958 atomic_read(&tw->tw_refcnt), tw);
1961 static int tcp6_seq_show(struct seq_file *seq, void *v)
1963 struct tcp_iter_state *st;
1965 if (v == SEQ_START_TOKEN) {
1970 "st tx_queue rx_queue tr tm->when retrnsmt"
1971 " uid timeout inode\n");
1976 switch (st->state) {
1977 case TCP_SEQ_STATE_LISTENING:
1978 case TCP_SEQ_STATE_ESTABLISHED:
1979 get_tcp6_sock(seq, v, st->num);
1981 case TCP_SEQ_STATE_OPENREQ:
1982 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1984 case TCP_SEQ_STATE_TIME_WAIT:
1985 get_timewait6_sock(seq, v, st->num);
1992 static const struct file_operations tcp6_afinfo_seq_fops = {
1993 .owner = THIS_MODULE,
1994 .open = tcp_seq_open,
1996 .llseek = seq_lseek,
1997 .release = seq_release_net
2000 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2003 .seq_fops = &tcp6_afinfo_seq_fops,
2005 .show = tcp6_seq_show,
2009 int __net_init tcp6_proc_init(struct net *net)
2011 return tcp_proc_register(net, &tcp6_seq_afinfo);
2014 void tcp6_proc_exit(struct net *net)
2016 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2020 struct proto tcpv6_prot = {
2022 .owner = THIS_MODULE,
2024 .connect = tcp_v6_connect,
2025 .disconnect = tcp_disconnect,
2026 .accept = inet_csk_accept,
2028 .init = tcp_v6_init_sock,
2029 .destroy = tcp_v6_destroy_sock,
2030 .shutdown = tcp_shutdown,
2031 .setsockopt = tcp_setsockopt,
2032 .getsockopt = tcp_getsockopt,
2033 .recvmsg = tcp_recvmsg,
2034 .sendmsg = tcp_sendmsg,
2035 .sendpage = tcp_sendpage,
2036 .backlog_rcv = tcp_v6_do_rcv,
2037 .release_cb = tcp_release_cb,
2038 .mtu_reduced = tcp_v6_mtu_reduced,
2039 .hash = tcp_v6_hash,
2040 .unhash = inet_unhash,
2041 .get_port = inet_csk_get_port,
2042 .enter_memory_pressure = tcp_enter_memory_pressure,
2043 .sockets_allocated = &tcp_sockets_allocated,
2044 .memory_allocated = &tcp_memory_allocated,
2045 .memory_pressure = &tcp_memory_pressure,
2046 .orphan_count = &tcp_orphan_count,
2047 .sysctl_wmem = sysctl_tcp_wmem,
2048 .sysctl_rmem = sysctl_tcp_rmem,
2049 .max_header = MAX_TCP_HEADER,
2050 .obj_size = sizeof(struct tcp6_sock),
2051 .slab_flags = SLAB_DESTROY_BY_RCU,
2052 .twsk_prot = &tcp6_timewait_sock_ops,
2053 .rsk_prot = &tcp6_request_sock_ops,
2054 .h.hashinfo = &tcp_hashinfo,
2055 .no_autobind = true,
2056 #ifdef CONFIG_COMPAT
2057 .compat_setsockopt = compat_tcp_setsockopt,
2058 .compat_getsockopt = compat_tcp_getsockopt,
2060 #ifdef CONFIG_MEMCG_KMEM
2061 .proto_cgroup = tcp_proto_cgroup,
2065 static const struct inet6_protocol tcpv6_protocol = {
2066 .early_demux = tcp_v6_early_demux,
2067 .handler = tcp_v6_rcv,
2068 .err_handler = tcp_v6_err,
2069 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2072 static const struct net_offload tcpv6_offload = {
2073 .gso_send_check = tcp_v6_gso_send_check,
2074 .gso_segment = tcp_tso_segment,
2075 .gro_receive = tcp6_gro_receive,
2076 .gro_complete = tcp6_gro_complete,
2079 static struct inet_protosw tcpv6_protosw = {
2080 .type = SOCK_STREAM,
2081 .protocol = IPPROTO_TCP,
2082 .prot = &tcpv6_prot,
2083 .ops = &inet6_stream_ops,
2085 .flags = INET_PROTOSW_PERMANENT |
2089 static int __net_init tcpv6_net_init(struct net *net)
2091 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2092 SOCK_RAW, IPPROTO_TCP, net);
2095 static void __net_exit tcpv6_net_exit(struct net *net)
2097 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2100 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2102 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2105 static struct pernet_operations tcpv6_net_ops = {
2106 .init = tcpv6_net_init,
2107 .exit = tcpv6_net_exit,
2108 .exit_batch = tcpv6_net_exit_batch,
2111 int __init tcpv6_init(void)
2115 ret = inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
2119 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2123 /* register inet6 protocol */
2124 ret = inet6_register_protosw(&tcpv6_protosw);
2126 goto out_tcpv6_protocol;
2128 ret = register_pernet_subsys(&tcpv6_net_ops);
2130 goto out_tcpv6_protosw;
2135 inet6_unregister_protosw(&tcpv6_protosw);
2137 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2139 inet6_del_offload(&tcpv6_offload, IPPROTO_TCP);
2143 void tcpv6_exit(void)
2145 unregister_pernet_subsys(&tcpv6_net_ops);
2146 inet6_unregister_protosw(&tcpv6_protosw);
2147 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2148 inet6_del_offload(&tcpv6_offload, IPPROTO_TCP);