3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
67 #include <asm/uaccess.h>
69 #include <linux/proc_fs.h>
70 #include <linux/seq_file.h>
72 #include <linux/crypto.h>
73 #include <linux/scatterlist.h>
74 #include "ip6_offload.h"
76 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
77 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
78 struct request_sock *req);
80 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
82 static const struct inet_connection_sock_af_ops ipv6_mapped;
83 static const struct inet_connection_sock_af_ops ipv6_specific;
84 #ifdef CONFIG_TCP_MD5SIG
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
88 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
89 const struct in6_addr *addr)
95 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
97 struct dst_entry *dst = skb_dst(skb);
98 const struct rt6_info *rt = (const struct rt6_info *)dst;
102 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
104 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
107 static void tcp_v6_hash(struct sock *sk)
109 if (sk->sk_state != TCP_CLOSE) {
110 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
115 __inet6_hash(sk, NULL);
120 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
122 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
123 ipv6_hdr(skb)->saddr.s6_addr32,
125 tcp_hdr(skb)->source);
128 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
131 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
132 struct inet_sock *inet = inet_sk(sk);
133 struct inet_connection_sock *icsk = inet_csk(sk);
134 struct ipv6_pinfo *np = inet6_sk(sk);
135 struct tcp_sock *tp = tcp_sk(sk);
136 struct in6_addr *saddr = NULL, *final_p, final;
139 struct dst_entry *dst;
143 if (addr_len < SIN6_LEN_RFC2133)
146 if (usin->sin6_family != AF_INET6)
147 return -EAFNOSUPPORT;
149 memset(&fl6, 0, sizeof(fl6));
152 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
153 IP6_ECN_flow_init(fl6.flowlabel);
154 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
155 struct ip6_flowlabel *flowlabel;
156 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
157 if (flowlabel == NULL)
159 usin->sin6_addr = flowlabel->dst;
160 fl6_sock_release(flowlabel);
165 * connect() to INADDR_ANY means loopback (BSD'ism).
168 if(ipv6_addr_any(&usin->sin6_addr))
169 usin->sin6_addr.s6_addr[15] = 0x1;
171 addr_type = ipv6_addr_type(&usin->sin6_addr);
173 if(addr_type & IPV6_ADDR_MULTICAST)
176 if (addr_type&IPV6_ADDR_LINKLOCAL) {
177 if (addr_len >= sizeof(struct sockaddr_in6) &&
178 usin->sin6_scope_id) {
179 /* If interface is set while binding, indices
182 if (sk->sk_bound_dev_if &&
183 sk->sk_bound_dev_if != usin->sin6_scope_id)
186 sk->sk_bound_dev_if = usin->sin6_scope_id;
189 /* Connect to link-local address requires an interface */
190 if (!sk->sk_bound_dev_if)
194 if (tp->rx_opt.ts_recent_stamp &&
195 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
196 tp->rx_opt.ts_recent = 0;
197 tp->rx_opt.ts_recent_stamp = 0;
201 np->daddr = usin->sin6_addr;
202 np->flow_label = fl6.flowlabel;
208 if (addr_type == IPV6_ADDR_MAPPED) {
209 u32 exthdrlen = icsk->icsk_ext_hdr_len;
210 struct sockaddr_in sin;
212 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
214 if (__ipv6_only_sock(sk))
217 sin.sin_family = AF_INET;
218 sin.sin_port = usin->sin6_port;
219 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
221 icsk->icsk_af_ops = &ipv6_mapped;
222 sk->sk_backlog_rcv = tcp_v4_do_rcv;
223 #ifdef CONFIG_TCP_MD5SIG
224 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
227 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
230 icsk->icsk_ext_hdr_len = exthdrlen;
231 icsk->icsk_af_ops = &ipv6_specific;
232 sk->sk_backlog_rcv = tcp_v6_do_rcv;
233 #ifdef CONFIG_TCP_MD5SIG
234 tp->af_specific = &tcp_sock_ipv6_specific;
238 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
239 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
246 if (!ipv6_addr_any(&np->rcv_saddr))
247 saddr = &np->rcv_saddr;
249 fl6.flowi6_proto = IPPROTO_TCP;
250 fl6.daddr = np->daddr;
251 fl6.saddr = saddr ? *saddr : np->saddr;
252 fl6.flowi6_oif = sk->sk_bound_dev_if;
253 fl6.flowi6_mark = sk->sk_mark;
254 fl6.fl6_dport = usin->sin6_port;
255 fl6.fl6_sport = inet->inet_sport;
257 final_p = fl6_update_dst(&fl6, np->opt, &final);
259 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
261 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
269 np->rcv_saddr = *saddr;
272 /* set the source address */
274 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
276 sk->sk_gso_type = SKB_GSO_TCPV6;
277 __ip6_dst_store(sk, dst, NULL, NULL);
279 rt = (struct rt6_info *) dst;
280 if (tcp_death_row.sysctl_tw_recycle &&
281 !tp->rx_opt.ts_recent_stamp &&
282 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr))
283 tcp_fetch_timewait_stamp(sk, dst);
285 icsk->icsk_ext_hdr_len = 0;
287 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
290 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
292 inet->inet_dport = usin->sin6_port;
294 tcp_set_state(sk, TCP_SYN_SENT);
295 err = inet6_hash_connect(&tcp_death_row, sk);
300 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
305 err = tcp_connect(sk);
312 tcp_set_state(sk, TCP_CLOSE);
315 inet->inet_dport = 0;
316 sk->sk_route_caps = 0;
320 static void tcp_v6_mtu_reduced(struct sock *sk)
322 struct dst_entry *dst;
324 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
327 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
331 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
332 tcp_sync_mss(sk, dst_mtu(dst));
333 tcp_simple_retransmit(sk);
337 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
338 u8 type, u8 code, int offset, __be32 info)
340 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
341 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
342 struct ipv6_pinfo *np;
347 struct net *net = dev_net(skb->dev);
349 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
350 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
353 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
358 if (sk->sk_state == TCP_TIME_WAIT) {
359 inet_twsk_put(inet_twsk(sk));
364 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
365 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
367 if (sk->sk_state == TCP_CLOSE)
370 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
371 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 seq = ntohl(th->seq);
377 if (sk->sk_state != TCP_LISTEN &&
378 !between(seq, tp->snd_una, tp->snd_nxt)) {
379 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
385 if (type == NDISC_REDIRECT) {
386 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
389 dst->ops->redirect(dst, sk, skb);
392 if (type == ICMPV6_PKT_TOOBIG) {
393 tp->mtu_info = ntohl(info);
394 if (!sock_owned_by_user(sk))
395 tcp_v6_mtu_reduced(sk);
396 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
402 icmpv6_err_convert(type, code, &err);
404 /* Might be for an request_sock */
405 switch (sk->sk_state) {
406 struct request_sock *req, **prev;
408 if (sock_owned_by_user(sk))
411 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
412 &hdr->saddr, inet6_iif(skb));
416 /* ICMPs are not backlogged, hence we cannot get
417 * an established socket here.
419 WARN_ON(req->sk != NULL);
421 if (seq != tcp_rsk(req)->snt_isn) {
422 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
426 inet_csk_reqsk_queue_drop(sk, req, prev);
430 case TCP_SYN_RECV: /* Cannot happen.
431 It can, it SYNs are crossed. --ANK */
432 if (!sock_owned_by_user(sk)) {
434 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
438 sk->sk_err_soft = err;
442 if (!sock_owned_by_user(sk) && np->recverr) {
444 sk->sk_error_report(sk);
446 sk->sk_err_soft = err;
454 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
456 struct request_sock *req,
457 struct request_values *rvp,
460 struct inet6_request_sock *treq = inet6_rsk(req);
461 struct ipv6_pinfo *np = inet6_sk(sk);
462 struct sk_buff * skb;
465 /* First, grab a route. */
466 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
469 skb = tcp_make_synack(sk, dst, req, rvp, NULL);
472 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
474 fl6->daddr = treq->rmt_addr;
475 skb_set_queue_mapping(skb, queue_mapping);
476 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
477 err = net_xmit_eval(err);
484 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
485 struct request_values *rvp)
490 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
492 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
496 static void tcp_v6_reqsk_destructor(struct request_sock *req)
498 kfree_skb(inet6_rsk(req)->pktopts);
501 #ifdef CONFIG_TCP_MD5SIG
502 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
503 const struct in6_addr *addr)
505 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
508 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
509 struct sock *addr_sk)
511 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
514 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
515 struct request_sock *req)
517 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
520 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
523 struct tcp_md5sig cmd;
524 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
526 if (optlen < sizeof(cmd))
529 if (copy_from_user(&cmd, optval, sizeof(cmd)))
532 if (sin6->sin6_family != AF_INET6)
535 if (!cmd.tcpm_keylen) {
536 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
537 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
539 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
543 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
546 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
547 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
548 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
550 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
551 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
554 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
555 const struct in6_addr *daddr,
556 const struct in6_addr *saddr, int nbytes)
558 struct tcp6_pseudohdr *bp;
559 struct scatterlist sg;
561 bp = &hp->md5_blk.ip6;
562 /* 1. TCP pseudo-header (RFC2460) */
565 bp->protocol = cpu_to_be32(IPPROTO_TCP);
566 bp->len = cpu_to_be32(nbytes);
568 sg_init_one(&sg, bp, sizeof(*bp));
569 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
572 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
573 const struct in6_addr *daddr, struct in6_addr *saddr,
574 const struct tcphdr *th)
576 struct tcp_md5sig_pool *hp;
577 struct hash_desc *desc;
579 hp = tcp_get_md5sig_pool();
581 goto clear_hash_noput;
582 desc = &hp->md5_desc;
584 if (crypto_hash_init(desc))
586 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
588 if (tcp_md5_hash_header(hp, th))
590 if (tcp_md5_hash_key(hp, key))
592 if (crypto_hash_final(desc, md5_hash))
595 tcp_put_md5sig_pool();
599 tcp_put_md5sig_pool();
601 memset(md5_hash, 0, 16);
605 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
606 const struct sock *sk,
607 const struct request_sock *req,
608 const struct sk_buff *skb)
610 const struct in6_addr *saddr, *daddr;
611 struct tcp_md5sig_pool *hp;
612 struct hash_desc *desc;
613 const struct tcphdr *th = tcp_hdr(skb);
616 saddr = &inet6_sk(sk)->saddr;
617 daddr = &inet6_sk(sk)->daddr;
619 saddr = &inet6_rsk(req)->loc_addr;
620 daddr = &inet6_rsk(req)->rmt_addr;
622 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
623 saddr = &ip6h->saddr;
624 daddr = &ip6h->daddr;
627 hp = tcp_get_md5sig_pool();
629 goto clear_hash_noput;
630 desc = &hp->md5_desc;
632 if (crypto_hash_init(desc))
635 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
637 if (tcp_md5_hash_header(hp, th))
639 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
641 if (tcp_md5_hash_key(hp, key))
643 if (crypto_hash_final(desc, md5_hash))
646 tcp_put_md5sig_pool();
650 tcp_put_md5sig_pool();
652 memset(md5_hash, 0, 16);
656 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
658 const __u8 *hash_location = NULL;
659 struct tcp_md5sig_key *hash_expected;
660 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
661 const struct tcphdr *th = tcp_hdr(skb);
665 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
666 hash_location = tcp_parse_md5sig_option(th);
668 /* We've parsed the options - do we have a hash? */
669 if (!hash_expected && !hash_location)
672 if (hash_expected && !hash_location) {
673 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
677 if (!hash_expected && hash_location) {
678 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
682 /* check the signature */
683 genhash = tcp_v6_md5_hash_skb(newhash,
687 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
688 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
689 genhash ? "failed" : "mismatch",
690 &ip6h->saddr, ntohs(th->source),
691 &ip6h->daddr, ntohs(th->dest));
698 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
700 .obj_size = sizeof(struct tcp6_request_sock),
701 .rtx_syn_ack = tcp_v6_rtx_synack,
702 .send_ack = tcp_v6_reqsk_send_ack,
703 .destructor = tcp_v6_reqsk_destructor,
704 .send_reset = tcp_v6_send_reset,
705 .syn_ack_timeout = tcp_syn_ack_timeout,
708 #ifdef CONFIG_TCP_MD5SIG
709 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
710 .md5_lookup = tcp_v6_reqsk_md5_lookup,
711 .calc_md5_hash = tcp_v6_md5_hash_skb,
715 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
716 u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass)
718 const struct tcphdr *th = tcp_hdr(skb);
720 struct sk_buff *buff;
722 struct net *net = dev_net(skb_dst(skb)->dev);
723 struct sock *ctl_sk = net->ipv6.tcp_sk;
724 unsigned int tot_len = sizeof(struct tcphdr);
725 struct dst_entry *dst;
729 tot_len += TCPOLEN_TSTAMP_ALIGNED;
730 #ifdef CONFIG_TCP_MD5SIG
732 tot_len += TCPOLEN_MD5SIG_ALIGNED;
735 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
740 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
742 t1 = (struct tcphdr *) skb_push(buff, tot_len);
743 skb_reset_transport_header(buff);
745 /* Swap the send and the receive. */
746 memset(t1, 0, sizeof(*t1));
747 t1->dest = th->source;
748 t1->source = th->dest;
749 t1->doff = tot_len / 4;
750 t1->seq = htonl(seq);
751 t1->ack_seq = htonl(ack);
752 t1->ack = !rst || !th->ack;
754 t1->window = htons(win);
756 topt = (__be32 *)(t1 + 1);
759 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
760 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
761 *topt++ = htonl(tcp_time_stamp);
765 #ifdef CONFIG_TCP_MD5SIG
767 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
768 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
769 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
770 &ipv6_hdr(skb)->saddr,
771 &ipv6_hdr(skb)->daddr, t1);
775 memset(&fl6, 0, sizeof(fl6));
776 fl6.daddr = ipv6_hdr(skb)->saddr;
777 fl6.saddr = ipv6_hdr(skb)->daddr;
779 buff->ip_summed = CHECKSUM_PARTIAL;
782 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
784 fl6.flowi6_proto = IPPROTO_TCP;
785 if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
786 fl6.flowi6_oif = inet6_iif(skb);
787 fl6.fl6_dport = t1->dest;
788 fl6.fl6_sport = t1->source;
789 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
791 /* Pass a socket to ip6_dst_lookup either it is for RST
792 * Underlying function will use this to retrieve the network
795 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
797 skb_dst_set(buff, dst);
798 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
799 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
801 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
808 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
810 const struct tcphdr *th = tcp_hdr(skb);
811 u32 seq = 0, ack_seq = 0;
812 struct tcp_md5sig_key *key = NULL;
813 #ifdef CONFIG_TCP_MD5SIG
814 const __u8 *hash_location = NULL;
815 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
816 unsigned char newhash[16];
818 struct sock *sk1 = NULL;
824 if (!ipv6_unicast_destination(skb))
827 #ifdef CONFIG_TCP_MD5SIG
828 hash_location = tcp_parse_md5sig_option(th);
829 if (!sk && hash_location) {
831 * active side is lost. Try to find listening socket through
832 * source port, and then find md5 key through listening socket.
833 * we are not loose security here:
834 * Incoming packet is checked with md5 hash with finding key,
835 * no RST generated if md5 hash doesn't match.
837 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
838 &tcp_hashinfo, &ipv6h->daddr,
839 ntohs(th->source), inet6_iif(skb));
844 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
848 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
849 if (genhash || memcmp(hash_location, newhash, 16) != 0)
852 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
857 seq = ntohl(th->ack_seq);
859 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
862 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0);
864 #ifdef CONFIG_TCP_MD5SIG
873 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
874 struct tcp_md5sig_key *key, u8 tclass)
876 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0, tclass);
879 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
881 struct inet_timewait_sock *tw = inet_twsk(sk);
882 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
884 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
885 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
886 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
892 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
893 struct request_sock *req)
895 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
896 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
900 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
902 struct request_sock *req, **prev;
903 const struct tcphdr *th = tcp_hdr(skb);
906 /* Find possible connection requests. */
907 req = inet6_csk_search_req(sk, &prev, th->source,
908 &ipv6_hdr(skb)->saddr,
909 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
911 return tcp_check_req(sk, skb, req, prev, false);
913 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
914 &ipv6_hdr(skb)->saddr, th->source,
915 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
918 if (nsk->sk_state != TCP_TIME_WAIT) {
922 inet_twsk_put(inet_twsk(nsk));
926 #ifdef CONFIG_SYN_COOKIES
928 sk = cookie_v6_check(sk, skb);
933 /* FIXME: this is substantially similar to the ipv4 code.
934 * Can some kind of merge be done? -- erics
936 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
938 struct tcp_extend_values tmp_ext;
939 struct tcp_options_received tmp_opt;
940 const u8 *hash_location;
941 struct request_sock *req;
942 struct inet6_request_sock *treq;
943 struct ipv6_pinfo *np = inet6_sk(sk);
944 struct tcp_sock *tp = tcp_sk(sk);
945 __u32 isn = TCP_SKB_CB(skb)->when;
946 struct dst_entry *dst = NULL;
948 bool want_cookie = false;
950 if (skb->protocol == htons(ETH_P_IP))
951 return tcp_v4_conn_request(sk, skb);
953 if (!ipv6_unicast_destination(skb))
956 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
957 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
962 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
965 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
969 #ifdef CONFIG_TCP_MD5SIG
970 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
973 tcp_clear_options(&tmp_opt);
974 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
975 tmp_opt.user_mss = tp->rx_opt.user_mss;
976 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
978 if (tmp_opt.cookie_plus > 0 &&
979 tmp_opt.saw_tstamp &&
980 !tp->rx_opt.cookie_out_never &&
981 (sysctl_tcp_cookie_size > 0 ||
982 (tp->cookie_values != NULL &&
983 tp->cookie_values->cookie_desired > 0))) {
986 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
987 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
989 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
992 /* Secret recipe starts with IP addresses */
993 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
998 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1004 /* plus variable length Initiator Cookie */
1007 *c++ ^= *hash_location++;
1009 want_cookie = false; /* not our kind of cookie */
1010 tmp_ext.cookie_out_never = 0; /* false */
1011 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1012 } else if (!tp->rx_opt.cookie_in_always) {
1013 /* redundant indications, but ensure initialization. */
1014 tmp_ext.cookie_out_never = 1; /* true */
1015 tmp_ext.cookie_plus = 0;
1019 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1021 if (want_cookie && !tmp_opt.saw_tstamp)
1022 tcp_clear_options(&tmp_opt);
1024 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1025 tcp_openreq_init(req, &tmp_opt, skb);
1027 treq = inet6_rsk(req);
1028 treq->rmt_addr = ipv6_hdr(skb)->saddr;
1029 treq->loc_addr = ipv6_hdr(skb)->daddr;
1030 if (!want_cookie || tmp_opt.tstamp_ok)
1031 TCP_ECN_create_request(req, skb);
1033 treq->iif = sk->sk_bound_dev_if;
1035 /* So that link locals have meaning */
1036 if (!sk->sk_bound_dev_if &&
1037 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1038 treq->iif = inet6_iif(skb);
1041 if (ipv6_opt_accepted(sk, skb) ||
1042 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1043 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1044 atomic_inc(&skb->users);
1045 treq->pktopts = skb;
1049 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1050 req->cookie_ts = tmp_opt.tstamp_ok;
1054 /* VJ's idea. We save last timestamp seen
1055 * from the destination in peer table, when entering
1056 * state TIME-WAIT, and check against it before
1057 * accepting new connection request.
1059 * If "isn" is not zero, this request hit alive
1060 * timewait bucket, so that all the necessary checks
1061 * are made in the function processing timewait state.
1063 if (tmp_opt.saw_tstamp &&
1064 tcp_death_row.sysctl_tw_recycle &&
1065 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1066 if (!tcp_peer_is_proven(req, dst, true)) {
1067 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1068 goto drop_and_release;
1071 /* Kill the following clause, if you dislike this way. */
1072 else if (!sysctl_tcp_syncookies &&
1073 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1074 (sysctl_max_syn_backlog >> 2)) &&
1075 !tcp_peer_is_proven(req, dst, false)) {
1076 /* Without syncookies last quarter of
1077 * backlog is filled with destinations,
1078 * proven to be alive.
1079 * It means that we continue to communicate
1080 * to destinations, already remembered
1081 * to the moment of synflood.
1083 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1084 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1085 goto drop_and_release;
1088 isn = tcp_v6_init_sequence(skb);
1091 tcp_rsk(req)->snt_isn = isn;
1093 if (security_inet_conn_request(sk, skb, req))
1094 goto drop_and_release;
1096 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1097 (struct request_values *)&tmp_ext,
1098 skb_get_queue_mapping(skb)) ||
1102 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1103 tcp_rsk(req)->listener = NULL;
1104 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1112 return 0; /* don't send reset */
1115 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1116 struct request_sock *req,
1117 struct dst_entry *dst)
1119 struct inet6_request_sock *treq;
1120 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1121 struct tcp6_sock *newtcp6sk;
1122 struct inet_sock *newinet;
1123 struct tcp_sock *newtp;
1125 #ifdef CONFIG_TCP_MD5SIG
1126 struct tcp_md5sig_key *key;
1130 if (skb->protocol == htons(ETH_P_IP)) {
1135 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1140 newtcp6sk = (struct tcp6_sock *)newsk;
1141 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1143 newinet = inet_sk(newsk);
1144 newnp = inet6_sk(newsk);
1145 newtp = tcp_sk(newsk);
1147 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1149 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1151 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1153 newnp->rcv_saddr = newnp->saddr;
1155 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1156 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1157 #ifdef CONFIG_TCP_MD5SIG
1158 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1161 newnp->ipv6_ac_list = NULL;
1162 newnp->ipv6_fl_list = NULL;
1163 newnp->pktoptions = NULL;
1165 newnp->mcast_oif = inet6_iif(skb);
1166 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1167 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1170 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1171 * here, tcp_create_openreq_child now does this for us, see the comment in
1172 * that function for the gory details. -acme
1175 /* It is tricky place. Until this moment IPv4 tcp
1176 worked with IPv6 icsk.icsk_af_ops.
1179 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1184 treq = inet6_rsk(req);
1186 if (sk_acceptq_is_full(sk))
1190 dst = inet6_csk_route_req(sk, &fl6, req);
1195 newsk = tcp_create_openreq_child(sk, req, skb);
1200 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1201 * count here, tcp_create_openreq_child now does this for us, see the
1202 * comment in that function for the gory details. -acme
1205 newsk->sk_gso_type = SKB_GSO_TCPV6;
1206 __ip6_dst_store(newsk, dst, NULL, NULL);
1207 inet6_sk_rx_dst_set(newsk, skb);
1209 newtcp6sk = (struct tcp6_sock *)newsk;
1210 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1212 newtp = tcp_sk(newsk);
1213 newinet = inet_sk(newsk);
1214 newnp = inet6_sk(newsk);
1216 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1218 newnp->daddr = treq->rmt_addr;
1219 newnp->saddr = treq->loc_addr;
1220 newnp->rcv_saddr = treq->loc_addr;
1221 newsk->sk_bound_dev_if = treq->iif;
1223 /* Now IPv6 options...
1225 First: no IPv4 options.
1227 newinet->inet_opt = NULL;
1228 newnp->ipv6_ac_list = NULL;
1229 newnp->ipv6_fl_list = NULL;
1232 newnp->rxopt.all = np->rxopt.all;
1234 /* Clone pktoptions received with SYN */
1235 newnp->pktoptions = NULL;
1236 if (treq->pktopts != NULL) {
1237 newnp->pktoptions = skb_clone(treq->pktopts,
1238 sk_gfp_atomic(sk, GFP_ATOMIC));
1239 consume_skb(treq->pktopts);
1240 treq->pktopts = NULL;
1241 if (newnp->pktoptions)
1242 skb_set_owner_r(newnp->pktoptions, newsk);
1245 newnp->mcast_oif = inet6_iif(skb);
1246 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1247 newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1249 /* Clone native IPv6 options from listening socket (if any)
1251 Yes, keeping reference count would be much more clever,
1252 but we make one more one thing there: reattach optmem
1256 newnp->opt = ipv6_dup_options(newsk, np->opt);
1258 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1260 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1261 newnp->opt->opt_flen);
1263 tcp_mtup_init(newsk);
1264 tcp_sync_mss(newsk, dst_mtu(dst));
1265 newtp->advmss = dst_metric_advmss(dst);
1266 if (tcp_sk(sk)->rx_opt.user_mss &&
1267 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1268 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1270 tcp_initialize_rcv_mss(newsk);
1271 tcp_synack_rtt_meas(newsk, req);
1272 newtp->total_retrans = req->num_retrans;
1274 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1275 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1277 #ifdef CONFIG_TCP_MD5SIG
1278 /* Copy over the MD5 key from the original socket */
1279 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1280 /* We're using one, so create a matching key
1281 * on the newsk structure. If we fail to get
1282 * memory, then we end up not copying the key
1285 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
1286 AF_INET6, key->key, key->keylen,
1287 sk_gfp_atomic(sk, GFP_ATOMIC));
1291 if (__inet_inherit_port(sk, newsk) < 0) {
1295 __inet6_hash(newsk, NULL);
1300 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1304 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1308 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1310 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1311 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1312 &ipv6_hdr(skb)->daddr, skb->csum)) {
1313 skb->ip_summed = CHECKSUM_UNNECESSARY;
1318 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1319 &ipv6_hdr(skb)->saddr,
1320 &ipv6_hdr(skb)->daddr, 0));
1322 if (skb->len <= 76) {
1323 return __skb_checksum_complete(skb);
1328 /* The socket must have it's spinlock held when we get
1331 * We have a potential double-lock case here, so even when
1332 * doing backlog processing we use the BH locking scheme.
1333 * This is because we cannot sleep with the original spinlock
1336 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1338 struct ipv6_pinfo *np = inet6_sk(sk);
1339 struct tcp_sock *tp;
1340 struct sk_buff *opt_skb = NULL;
1342 /* Imagine: socket is IPv6. IPv4 packet arrives,
1343 goes to IPv4 receive handler and backlogged.
1344 From backlog it always goes here. Kerboom...
1345 Fortunately, tcp_rcv_established and rcv_established
1346 handle them correctly, but it is not case with
1347 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1350 if (skb->protocol == htons(ETH_P_IP))
1351 return tcp_v4_do_rcv(sk, skb);
1353 #ifdef CONFIG_TCP_MD5SIG
1354 if (tcp_v6_inbound_md5_hash (sk, skb))
1358 if (sk_filter(sk, skb))
1362 * socket locking is here for SMP purposes as backlog rcv
1363 * is currently called with bh processing disabled.
1366 /* Do Stevens' IPV6_PKTOPTIONS.
1368 Yes, guys, it is the only place in our code, where we
1369 may make it not affecting IPv4.
1370 The rest of code is protocol independent,
1371 and I do not like idea to uglify IPv4.
1373 Actually, all the idea behind IPV6_PKTOPTIONS
1374 looks not very well thought. For now we latch
1375 options, received in the last packet, enqueued
1376 by tcp. Feel free to propose better solution.
1380 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1382 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1383 struct dst_entry *dst = sk->sk_rx_dst;
1385 sock_rps_save_rxhash(sk, skb);
1387 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1388 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1390 sk->sk_rx_dst = NULL;
1394 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1397 goto ipv6_pktoptions;
1401 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1404 if (sk->sk_state == TCP_LISTEN) {
1405 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1410 * Queue it on the new socket if the new socket is active,
1411 * otherwise we just shortcircuit this and continue with
1415 sock_rps_save_rxhash(nsk, skb);
1416 if (tcp_child_process(sk, nsk, skb))
1419 __kfree_skb(opt_skb);
1423 sock_rps_save_rxhash(sk, skb);
1425 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1428 goto ipv6_pktoptions;
1432 tcp_v6_send_reset(sk, skb);
1435 __kfree_skb(opt_skb);
1439 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1444 /* Do you ask, what is it?
1446 1. skb was enqueued by tcp.
1447 2. skb is added to tail of read queue, rather than out of order.
1448 3. socket is not in passive state.
1449 4. Finally, it really contains options, which user wants to receive.
1452 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1453 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1454 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1455 np->mcast_oif = inet6_iif(opt_skb);
1456 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1457 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1458 if (np->rxopt.bits.rxtclass)
1459 np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb));
1460 if (ipv6_opt_accepted(sk, opt_skb)) {
1461 skb_set_owner_r(opt_skb, sk);
1462 opt_skb = xchg(&np->pktoptions, opt_skb);
1464 __kfree_skb(opt_skb);
1465 opt_skb = xchg(&np->pktoptions, NULL);
1473 static int tcp_v6_rcv(struct sk_buff *skb)
1475 const struct tcphdr *th;
1476 const struct ipv6hdr *hdr;
1479 struct net *net = dev_net(skb->dev);
1481 if (skb->pkt_type != PACKET_HOST)
1485 * Count it even if it's bad.
1487 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1489 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1494 if (th->doff < sizeof(struct tcphdr)/4)
1496 if (!pskb_may_pull(skb, th->doff*4))
1499 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1503 hdr = ipv6_hdr(skb);
1504 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1505 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1506 skb->len - th->doff*4);
1507 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1508 TCP_SKB_CB(skb)->when = 0;
1509 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1510 TCP_SKB_CB(skb)->sacked = 0;
1512 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1517 if (sk->sk_state == TCP_TIME_WAIT)
1520 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1521 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1522 goto discard_and_relse;
1525 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1526 goto discard_and_relse;
1528 if (sk_filter(sk, skb))
1529 goto discard_and_relse;
1533 bh_lock_sock_nested(sk);
1535 if (!sock_owned_by_user(sk)) {
1536 #ifdef CONFIG_NET_DMA
1537 struct tcp_sock *tp = tcp_sk(sk);
1538 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1539 tp->ucopy.dma_chan = net_dma_find_channel();
1540 if (tp->ucopy.dma_chan)
1541 ret = tcp_v6_do_rcv(sk, skb);
1545 if (!tcp_prequeue(sk, skb))
1546 ret = tcp_v6_do_rcv(sk, skb);
1548 } else if (unlikely(sk_add_backlog(sk, skb,
1549 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1551 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1552 goto discard_and_relse;
1557 return ret ? -1 : 0;
1560 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1563 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1565 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1567 tcp_v6_send_reset(NULL, skb);
1584 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1585 inet_twsk_put(inet_twsk(sk));
1589 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1590 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1591 inet_twsk_put(inet_twsk(sk));
1595 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1600 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1601 &ipv6_hdr(skb)->daddr,
1602 ntohs(th->dest), inet6_iif(skb));
1604 struct inet_timewait_sock *tw = inet_twsk(sk);
1605 inet_twsk_deschedule(tw, &tcp_death_row);
1610 /* Fall through to ACK */
1613 tcp_v6_timewait_ack(sk, skb);
1617 case TCP_TW_SUCCESS:;
1622 static void tcp_v6_early_demux(struct sk_buff *skb)
1624 const struct ipv6hdr *hdr;
1625 const struct tcphdr *th;
1628 if (skb->pkt_type != PACKET_HOST)
1631 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1634 hdr = ipv6_hdr(skb);
1637 if (th->doff < sizeof(struct tcphdr) / 4)
1640 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1641 &hdr->saddr, th->source,
1642 &hdr->daddr, ntohs(th->dest),
1646 skb->destructor = sock_edemux;
1647 if (sk->sk_state != TCP_TIME_WAIT) {
1648 struct dst_entry *dst = sk->sk_rx_dst;
1651 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1653 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1654 skb_dst_set_noref(skb, dst);
1659 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1660 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1661 .twsk_unique = tcp_twsk_unique,
1662 .twsk_destructor= tcp_twsk_destructor,
1665 static const struct inet_connection_sock_af_ops ipv6_specific = {
1666 .queue_xmit = inet6_csk_xmit,
1667 .send_check = tcp_v6_send_check,
1668 .rebuild_header = inet6_sk_rebuild_header,
1669 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1670 .conn_request = tcp_v6_conn_request,
1671 .syn_recv_sock = tcp_v6_syn_recv_sock,
1672 .net_header_len = sizeof(struct ipv6hdr),
1673 .net_frag_header_len = sizeof(struct frag_hdr),
1674 .setsockopt = ipv6_setsockopt,
1675 .getsockopt = ipv6_getsockopt,
1676 .addr2sockaddr = inet6_csk_addr2sockaddr,
1677 .sockaddr_len = sizeof(struct sockaddr_in6),
1678 .bind_conflict = inet6_csk_bind_conflict,
1679 #ifdef CONFIG_COMPAT
1680 .compat_setsockopt = compat_ipv6_setsockopt,
1681 .compat_getsockopt = compat_ipv6_getsockopt,
1685 #ifdef CONFIG_TCP_MD5SIG
1686 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1687 .md5_lookup = tcp_v6_md5_lookup,
1688 .calc_md5_hash = tcp_v6_md5_hash_skb,
1689 .md5_parse = tcp_v6_parse_md5_keys,
1694 * TCP over IPv4 via INET6 API
1697 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1698 .queue_xmit = ip_queue_xmit,
1699 .send_check = tcp_v4_send_check,
1700 .rebuild_header = inet_sk_rebuild_header,
1701 .sk_rx_dst_set = inet_sk_rx_dst_set,
1702 .conn_request = tcp_v6_conn_request,
1703 .syn_recv_sock = tcp_v6_syn_recv_sock,
1704 .net_header_len = sizeof(struct iphdr),
1705 .setsockopt = ipv6_setsockopt,
1706 .getsockopt = ipv6_getsockopt,
1707 .addr2sockaddr = inet6_csk_addr2sockaddr,
1708 .sockaddr_len = sizeof(struct sockaddr_in6),
1709 .bind_conflict = inet6_csk_bind_conflict,
1710 #ifdef CONFIG_COMPAT
1711 .compat_setsockopt = compat_ipv6_setsockopt,
1712 .compat_getsockopt = compat_ipv6_getsockopt,
1716 #ifdef CONFIG_TCP_MD5SIG
1717 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1718 .md5_lookup = tcp_v4_md5_lookup,
1719 .calc_md5_hash = tcp_v4_md5_hash_skb,
1720 .md5_parse = tcp_v6_parse_md5_keys,
1724 /* NOTE: A lot of things set to zero explicitly by call to
1725 * sk_alloc() so need not be done here.
1727 static int tcp_v6_init_sock(struct sock *sk)
1729 struct inet_connection_sock *icsk = inet_csk(sk);
1733 icsk->icsk_af_ops = &ipv6_specific;
1735 #ifdef CONFIG_TCP_MD5SIG
1736 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1742 static void tcp_v6_destroy_sock(struct sock *sk)
1744 tcp_v4_destroy_sock(sk);
1745 inet6_destroy_sock(sk);
1748 #ifdef CONFIG_PROC_FS
1749 /* Proc filesystem TCPv6 sock list dumping. */
1750 static void get_openreq6(struct seq_file *seq,
1751 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1753 int ttd = req->expires - jiffies;
1754 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1755 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1761 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1762 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1764 src->s6_addr32[0], src->s6_addr32[1],
1765 src->s6_addr32[2], src->s6_addr32[3],
1766 ntohs(inet_rsk(req)->loc_port),
1767 dest->s6_addr32[0], dest->s6_addr32[1],
1768 dest->s6_addr32[2], dest->s6_addr32[3],
1769 ntohs(inet_rsk(req)->rmt_port),
1771 0,0, /* could print option size, but that is af dependent. */
1772 1, /* timers active (only the expire timer) */
1773 jiffies_to_clock_t(ttd),
1775 from_kuid_munged(seq_user_ns(seq), uid),
1776 0, /* non standard timer */
1777 0, /* open_requests have no inode */
1781 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1783 const struct in6_addr *dest, *src;
1786 unsigned long timer_expires;
1787 const struct inet_sock *inet = inet_sk(sp);
1788 const struct tcp_sock *tp = tcp_sk(sp);
1789 const struct inet_connection_sock *icsk = inet_csk(sp);
1790 const struct ipv6_pinfo *np = inet6_sk(sp);
1793 src = &np->rcv_saddr;
1794 destp = ntohs(inet->inet_dport);
1795 srcp = ntohs(inet->inet_sport);
1797 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1799 timer_expires = icsk->icsk_timeout;
1800 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1802 timer_expires = icsk->icsk_timeout;
1803 } else if (timer_pending(&sp->sk_timer)) {
1805 timer_expires = sp->sk_timer.expires;
1808 timer_expires = jiffies;
1812 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1813 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
1815 src->s6_addr32[0], src->s6_addr32[1],
1816 src->s6_addr32[2], src->s6_addr32[3], srcp,
1817 dest->s6_addr32[0], dest->s6_addr32[1],
1818 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1820 tp->write_seq-tp->snd_una,
1821 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1823 jiffies_delta_to_clock_t(timer_expires - jiffies),
1824 icsk->icsk_retransmits,
1825 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1826 icsk->icsk_probes_out,
1828 atomic_read(&sp->sk_refcnt), sp,
1829 jiffies_to_clock_t(icsk->icsk_rto),
1830 jiffies_to_clock_t(icsk->icsk_ack.ato),
1831 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1833 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1837 static void get_timewait6_sock(struct seq_file *seq,
1838 struct inet_timewait_sock *tw, int i)
1840 const struct in6_addr *dest, *src;
1842 const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1843 long delta = tw->tw_ttd - jiffies;
1845 dest = &tw6->tw_v6_daddr;
1846 src = &tw6->tw_v6_rcv_saddr;
1847 destp = ntohs(tw->tw_dport);
1848 srcp = ntohs(tw->tw_sport);
1851 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1852 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1854 src->s6_addr32[0], src->s6_addr32[1],
1855 src->s6_addr32[2], src->s6_addr32[3], srcp,
1856 dest->s6_addr32[0], dest->s6_addr32[1],
1857 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1858 tw->tw_substate, 0, 0,
1859 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1860 atomic_read(&tw->tw_refcnt), tw);
1863 static int tcp6_seq_show(struct seq_file *seq, void *v)
1865 struct tcp_iter_state *st;
1867 if (v == SEQ_START_TOKEN) {
1872 "st tx_queue rx_queue tr tm->when retrnsmt"
1873 " uid timeout inode\n");
1878 switch (st->state) {
1879 case TCP_SEQ_STATE_LISTENING:
1880 case TCP_SEQ_STATE_ESTABLISHED:
1881 get_tcp6_sock(seq, v, st->num);
1883 case TCP_SEQ_STATE_OPENREQ:
1884 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1886 case TCP_SEQ_STATE_TIME_WAIT:
1887 get_timewait6_sock(seq, v, st->num);
1894 static const struct file_operations tcp6_afinfo_seq_fops = {
1895 .owner = THIS_MODULE,
1896 .open = tcp_seq_open,
1898 .llseek = seq_lseek,
1899 .release = seq_release_net
1902 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1905 .seq_fops = &tcp6_afinfo_seq_fops,
1907 .show = tcp6_seq_show,
1911 int __net_init tcp6_proc_init(struct net *net)
1913 return tcp_proc_register(net, &tcp6_seq_afinfo);
1916 void tcp6_proc_exit(struct net *net)
1918 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1922 struct proto tcpv6_prot = {
1924 .owner = THIS_MODULE,
1926 .connect = tcp_v6_connect,
1927 .disconnect = tcp_disconnect,
1928 .accept = inet_csk_accept,
1930 .init = tcp_v6_init_sock,
1931 .destroy = tcp_v6_destroy_sock,
1932 .shutdown = tcp_shutdown,
1933 .setsockopt = tcp_setsockopt,
1934 .getsockopt = tcp_getsockopt,
1935 .recvmsg = tcp_recvmsg,
1936 .sendmsg = tcp_sendmsg,
1937 .sendpage = tcp_sendpage,
1938 .backlog_rcv = tcp_v6_do_rcv,
1939 .release_cb = tcp_release_cb,
1940 .mtu_reduced = tcp_v6_mtu_reduced,
1941 .hash = tcp_v6_hash,
1942 .unhash = inet_unhash,
1943 .get_port = inet_csk_get_port,
1944 .enter_memory_pressure = tcp_enter_memory_pressure,
1945 .sockets_allocated = &tcp_sockets_allocated,
1946 .memory_allocated = &tcp_memory_allocated,
1947 .memory_pressure = &tcp_memory_pressure,
1948 .orphan_count = &tcp_orphan_count,
1949 .sysctl_wmem = sysctl_tcp_wmem,
1950 .sysctl_rmem = sysctl_tcp_rmem,
1951 .max_header = MAX_TCP_HEADER,
1952 .obj_size = sizeof(struct tcp6_sock),
1953 .slab_flags = SLAB_DESTROY_BY_RCU,
1954 .twsk_prot = &tcp6_timewait_sock_ops,
1955 .rsk_prot = &tcp6_request_sock_ops,
1956 .h.hashinfo = &tcp_hashinfo,
1957 .no_autobind = true,
1958 #ifdef CONFIG_COMPAT
1959 .compat_setsockopt = compat_tcp_setsockopt,
1960 .compat_getsockopt = compat_tcp_getsockopt,
1962 #ifdef CONFIG_MEMCG_KMEM
1963 .proto_cgroup = tcp_proto_cgroup,
1967 static const struct inet6_protocol tcpv6_protocol = {
1968 .early_demux = tcp_v6_early_demux,
1969 .handler = tcp_v6_rcv,
1970 .err_handler = tcp_v6_err,
1971 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1974 static struct inet_protosw tcpv6_protosw = {
1975 .type = SOCK_STREAM,
1976 .protocol = IPPROTO_TCP,
1977 .prot = &tcpv6_prot,
1978 .ops = &inet6_stream_ops,
1980 .flags = INET_PROTOSW_PERMANENT |
1984 static int __net_init tcpv6_net_init(struct net *net)
1986 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1987 SOCK_RAW, IPPROTO_TCP, net);
1990 static void __net_exit tcpv6_net_exit(struct net *net)
1992 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1995 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1997 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2000 static struct pernet_operations tcpv6_net_ops = {
2001 .init = tcpv6_net_init,
2002 .exit = tcpv6_net_exit,
2003 .exit_batch = tcpv6_net_exit_batch,
2006 int __init tcpv6_init(void)
2010 ret = tcpv6_offload_init();
2014 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2018 /* register inet6 protocol */
2019 ret = inet6_register_protosw(&tcpv6_protosw);
2021 goto out_tcpv6_protocol;
2023 ret = register_pernet_subsys(&tcpv6_net_ops);
2025 goto out_tcpv6_protosw;
2030 inet6_unregister_protosw(&tcpv6_protosw);
2032 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2034 tcpv6_offload_cleanup();
2038 void tcpv6_exit(void)
2040 unregister_pernet_subsys(&tcpv6_net_ops);
2041 inet6_unregister_protosw(&tcpv6_protosw);
2042 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2043 tcpv6_offload_cleanup();