3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
66 #include <net/busy_poll.h>
68 #include <asm/uaccess.h>
70 #include <linux/proc_fs.h>
71 #include <linux/seq_file.h>
73 #include <linux/crypto.h>
74 #include <linux/scatterlist.h>
76 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
77 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
78 struct request_sock *req);
80 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
82 static const struct inet_connection_sock_af_ops ipv6_mapped;
83 static const struct inet_connection_sock_af_ops ipv6_specific;
84 #ifdef CONFIG_TCP_MD5SIG
85 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
88 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
89 const struct in6_addr *addr)
95 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
97 struct dst_entry *dst = skb_dst(skb);
98 const struct rt6_info *rt = (const struct rt6_info *)dst;
102 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
104 inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
107 static void tcp_v6_hash(struct sock *sk)
109 if (sk->sk_state != TCP_CLOSE) {
110 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
115 __inet6_hash(sk, NULL);
120 static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
122 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
123 ipv6_hdr(skb)->saddr.s6_addr32,
125 tcp_hdr(skb)->source);
128 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
131 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
132 struct inet_sock *inet = inet_sk(sk);
133 struct inet_connection_sock *icsk = inet_csk(sk);
134 struct ipv6_pinfo *np = inet6_sk(sk);
135 struct tcp_sock *tp = tcp_sk(sk);
136 struct in6_addr *saddr = NULL, *final_p, final;
139 struct dst_entry *dst;
143 if (addr_len < SIN6_LEN_RFC2133)
146 if (usin->sin6_family != AF_INET6)
147 return -EAFNOSUPPORT;
149 memset(&fl6, 0, sizeof(fl6));
152 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
153 IP6_ECN_flow_init(fl6.flowlabel);
154 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
155 struct ip6_flowlabel *flowlabel;
156 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
157 if (flowlabel == NULL)
159 fl6_sock_release(flowlabel);
164 * connect() to INADDR_ANY means loopback (BSD'ism).
167 if (ipv6_addr_any(&usin->sin6_addr))
168 usin->sin6_addr.s6_addr[15] = 0x1;
170 addr_type = ipv6_addr_type(&usin->sin6_addr);
172 if (addr_type & IPV6_ADDR_MULTICAST)
175 if (addr_type&IPV6_ADDR_LINKLOCAL) {
176 if (addr_len >= sizeof(struct sockaddr_in6) &&
177 usin->sin6_scope_id) {
178 /* If interface is set while binding, indices
181 if (sk->sk_bound_dev_if &&
182 sk->sk_bound_dev_if != usin->sin6_scope_id)
185 sk->sk_bound_dev_if = usin->sin6_scope_id;
188 /* Connect to link-local address requires an interface */
189 if (!sk->sk_bound_dev_if)
193 if (tp->rx_opt.ts_recent_stamp &&
194 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
195 tp->rx_opt.ts_recent = 0;
196 tp->rx_opt.ts_recent_stamp = 0;
200 sk->sk_v6_daddr = usin->sin6_addr;
201 np->flow_label = fl6.flowlabel;
207 if (addr_type == IPV6_ADDR_MAPPED) {
208 u32 exthdrlen = icsk->icsk_ext_hdr_len;
209 struct sockaddr_in sin;
211 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
213 if (__ipv6_only_sock(sk))
216 sin.sin_family = AF_INET;
217 sin.sin_port = usin->sin6_port;
218 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
220 icsk->icsk_af_ops = &ipv6_mapped;
221 sk->sk_backlog_rcv = tcp_v4_do_rcv;
222 #ifdef CONFIG_TCP_MD5SIG
223 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
226 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
229 icsk->icsk_ext_hdr_len = exthdrlen;
230 icsk->icsk_af_ops = &ipv6_specific;
231 sk->sk_backlog_rcv = tcp_v6_do_rcv;
232 #ifdef CONFIG_TCP_MD5SIG
233 tp->af_specific = &tcp_sock_ipv6_specific;
237 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
238 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
239 &sk->sk_v6_rcv_saddr);
245 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
246 saddr = &sk->sk_v6_rcv_saddr;
248 fl6.flowi6_proto = IPPROTO_TCP;
249 fl6.daddr = sk->sk_v6_daddr;
250 fl6.saddr = saddr ? *saddr : np->saddr;
251 fl6.flowi6_oif = sk->sk_bound_dev_if;
252 fl6.flowi6_mark = sk->sk_mark;
253 fl6.fl6_dport = usin->sin6_port;
254 fl6.fl6_sport = inet->inet_sport;
256 final_p = fl6_update_dst(&fl6, np->opt, &final);
258 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
260 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
268 sk->sk_v6_rcv_saddr = *saddr;
271 /* set the source address */
273 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
275 sk->sk_gso_type = SKB_GSO_TCPV6;
276 __ip6_dst_store(sk, dst, NULL, NULL);
278 rt = (struct rt6_info *) dst;
279 if (tcp_death_row.sysctl_tw_recycle &&
280 !tp->rx_opt.ts_recent_stamp &&
281 ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))
282 tcp_fetch_timewait_stamp(sk, dst);
284 icsk->icsk_ext_hdr_len = 0;
286 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
289 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
291 inet->inet_dport = usin->sin6_port;
293 tcp_set_state(sk, TCP_SYN_SENT);
294 err = inet6_hash_connect(&tcp_death_row, sk);
298 if (!tp->write_seq && likely(!tp->repair))
299 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
300 sk->sk_v6_daddr.s6_addr32,
304 err = tcp_connect(sk);
311 tcp_set_state(sk, TCP_CLOSE);
314 inet->inet_dport = 0;
315 sk->sk_route_caps = 0;
319 static void tcp_v6_mtu_reduced(struct sock *sk)
321 struct dst_entry *dst;
323 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
326 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
330 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
331 tcp_sync_mss(sk, dst_mtu(dst));
332 tcp_simple_retransmit(sk);
336 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
337 u8 type, u8 code, int offset, __be32 info)
339 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
340 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
341 struct ipv6_pinfo *np;
346 struct net *net = dev_net(skb->dev);
348 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
349 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
352 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
357 if (sk->sk_state == TCP_TIME_WAIT) {
358 inet_twsk_put(inet_twsk(sk));
363 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
364 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
366 if (sk->sk_state == TCP_CLOSE)
369 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
370 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
375 seq = ntohl(th->seq);
376 if (sk->sk_state != TCP_LISTEN &&
377 !between(seq, tp->snd_una, tp->snd_nxt)) {
378 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
384 if (type == NDISC_REDIRECT) {
385 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
388 dst->ops->redirect(dst, sk, skb);
392 if (type == ICMPV6_PKT_TOOBIG) {
393 /* We are not interested in TCP_LISTEN and open_requests
394 * (SYN-ACKs send out by Linux are always <576bytes so
395 * they should go through unfragmented).
397 if (sk->sk_state == TCP_LISTEN)
400 if (!ip6_sk_accept_pmtu(sk))
403 tp->mtu_info = ntohl(info);
404 if (!sock_owned_by_user(sk))
405 tcp_v6_mtu_reduced(sk);
406 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
412 icmpv6_err_convert(type, code, &err);
414 /* Might be for an request_sock */
415 switch (sk->sk_state) {
416 struct request_sock *req, **prev;
418 if (sock_owned_by_user(sk))
421 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
422 &hdr->saddr, inet6_iif(skb));
426 /* ICMPs are not backlogged, hence we cannot get
427 * an established socket here.
429 WARN_ON(req->sk != NULL);
431 if (seq != tcp_rsk(req)->snt_isn) {
432 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
436 inet_csk_reqsk_queue_drop(sk, req, prev);
437 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
441 case TCP_SYN_RECV: /* Cannot happen.
442 It can, it SYNs are crossed. --ANK */
443 if (!sock_owned_by_user(sk)) {
445 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
449 sk->sk_err_soft = err;
453 if (!sock_owned_by_user(sk) && np->recverr) {
455 sk->sk_error_report(sk);
457 sk->sk_err_soft = err;
465 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
467 struct request_sock *req,
470 struct inet_request_sock *ireq = inet_rsk(req);
471 struct ipv6_pinfo *np = inet6_sk(sk);
475 /* First, grab a route. */
476 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
479 skb = tcp_make_synack(sk, dst, req, NULL);
482 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
483 &ireq->ir_v6_rmt_addr);
485 fl6->daddr = ireq->ir_v6_rmt_addr;
486 skb_set_queue_mapping(skb, queue_mapping);
487 err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
488 err = net_xmit_eval(err);
495 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
500 res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
502 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
506 static void tcp_v6_reqsk_destructor(struct request_sock *req)
508 kfree_skb(inet_rsk(req)->pktopts);
511 #ifdef CONFIG_TCP_MD5SIG
512 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
513 const struct in6_addr *addr)
515 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
518 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
519 struct sock *addr_sk)
521 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
524 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
525 struct request_sock *req)
527 return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);
530 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
533 struct tcp_md5sig cmd;
534 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
536 if (optlen < sizeof(cmd))
539 if (copy_from_user(&cmd, optval, sizeof(cmd)))
542 if (sin6->sin6_family != AF_INET6)
545 if (!cmd.tcpm_keylen) {
546 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
547 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
549 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
553 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
556 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
557 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
558 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
560 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
561 AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
564 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
565 const struct in6_addr *daddr,
566 const struct in6_addr *saddr, int nbytes)
568 struct tcp6_pseudohdr *bp;
569 struct scatterlist sg;
571 bp = &hp->md5_blk.ip6;
572 /* 1. TCP pseudo-header (RFC2460) */
575 bp->protocol = cpu_to_be32(IPPROTO_TCP);
576 bp->len = cpu_to_be32(nbytes);
578 sg_init_one(&sg, bp, sizeof(*bp));
579 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
582 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
583 const struct in6_addr *daddr, struct in6_addr *saddr,
584 const struct tcphdr *th)
586 struct tcp_md5sig_pool *hp;
587 struct hash_desc *desc;
589 hp = tcp_get_md5sig_pool();
591 goto clear_hash_noput;
592 desc = &hp->md5_desc;
594 if (crypto_hash_init(desc))
596 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
598 if (tcp_md5_hash_header(hp, th))
600 if (tcp_md5_hash_key(hp, key))
602 if (crypto_hash_final(desc, md5_hash))
605 tcp_put_md5sig_pool();
609 tcp_put_md5sig_pool();
611 memset(md5_hash, 0, 16);
615 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
616 const struct sock *sk,
617 const struct request_sock *req,
618 const struct sk_buff *skb)
620 const struct in6_addr *saddr, *daddr;
621 struct tcp_md5sig_pool *hp;
622 struct hash_desc *desc;
623 const struct tcphdr *th = tcp_hdr(skb);
626 saddr = &inet6_sk(sk)->saddr;
627 daddr = &sk->sk_v6_daddr;
629 saddr = &inet_rsk(req)->ir_v6_loc_addr;
630 daddr = &inet_rsk(req)->ir_v6_rmt_addr;
632 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
633 saddr = &ip6h->saddr;
634 daddr = &ip6h->daddr;
637 hp = tcp_get_md5sig_pool();
639 goto clear_hash_noput;
640 desc = &hp->md5_desc;
642 if (crypto_hash_init(desc))
645 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
647 if (tcp_md5_hash_header(hp, th))
649 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
651 if (tcp_md5_hash_key(hp, key))
653 if (crypto_hash_final(desc, md5_hash))
656 tcp_put_md5sig_pool();
660 tcp_put_md5sig_pool();
662 memset(md5_hash, 0, 16);
666 static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
668 const __u8 *hash_location = NULL;
669 struct tcp_md5sig_key *hash_expected;
670 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
671 const struct tcphdr *th = tcp_hdr(skb);
675 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
676 hash_location = tcp_parse_md5sig_option(th);
678 /* We've parsed the options - do we have a hash? */
679 if (!hash_expected && !hash_location)
682 if (hash_expected && !hash_location) {
683 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
687 if (!hash_expected && hash_location) {
688 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
692 /* check the signature */
693 genhash = tcp_v6_md5_hash_skb(newhash,
697 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
698 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
699 genhash ? "failed" : "mismatch",
700 &ip6h->saddr, ntohs(th->source),
701 &ip6h->daddr, ntohs(th->dest));
708 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
710 .obj_size = sizeof(struct tcp6_request_sock),
711 .rtx_syn_ack = tcp_v6_rtx_synack,
712 .send_ack = tcp_v6_reqsk_send_ack,
713 .destructor = tcp_v6_reqsk_destructor,
714 .send_reset = tcp_v6_send_reset,
715 .syn_ack_timeout = tcp_syn_ack_timeout,
718 #ifdef CONFIG_TCP_MD5SIG
719 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
720 .md5_lookup = tcp_v6_reqsk_md5_lookup,
721 .calc_md5_hash = tcp_v6_md5_hash_skb,
725 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
726 u32 tsval, u32 tsecr,
727 struct tcp_md5sig_key *key, int rst, u8 tclass)
729 const struct tcphdr *th = tcp_hdr(skb);
731 struct sk_buff *buff;
733 struct net *net = dev_net(skb_dst(skb)->dev);
734 struct sock *ctl_sk = net->ipv6.tcp_sk;
735 unsigned int tot_len = sizeof(struct tcphdr);
736 struct dst_entry *dst;
740 tot_len += TCPOLEN_TSTAMP_ALIGNED;
741 #ifdef CONFIG_TCP_MD5SIG
743 tot_len += TCPOLEN_MD5SIG_ALIGNED;
746 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
751 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
753 t1 = (struct tcphdr *) skb_push(buff, tot_len);
754 skb_reset_transport_header(buff);
756 /* Swap the send and the receive. */
757 memset(t1, 0, sizeof(*t1));
758 t1->dest = th->source;
759 t1->source = th->dest;
760 t1->doff = tot_len / 4;
761 t1->seq = htonl(seq);
762 t1->ack_seq = htonl(ack);
763 t1->ack = !rst || !th->ack;
765 t1->window = htons(win);
767 topt = (__be32 *)(t1 + 1);
770 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
771 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
772 *topt++ = htonl(tsval);
773 *topt++ = htonl(tsecr);
776 #ifdef CONFIG_TCP_MD5SIG
778 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
779 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
780 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
781 &ipv6_hdr(skb)->saddr,
782 &ipv6_hdr(skb)->daddr, t1);
786 memset(&fl6, 0, sizeof(fl6));
787 fl6.daddr = ipv6_hdr(skb)->saddr;
788 fl6.saddr = ipv6_hdr(skb)->daddr;
790 buff->ip_summed = CHECKSUM_PARTIAL;
793 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
795 fl6.flowi6_proto = IPPROTO_TCP;
796 if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
797 fl6.flowi6_oif = inet6_iif(skb);
798 fl6.fl6_dport = t1->dest;
799 fl6.fl6_sport = t1->source;
800 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
802 /* Pass a socket to ip6_dst_lookup either it is for RST
803 * Underlying function will use this to retrieve the network
806 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
808 skb_dst_set(buff, dst);
809 ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
810 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
812 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
819 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
821 const struct tcphdr *th = tcp_hdr(skb);
822 u32 seq = 0, ack_seq = 0;
823 struct tcp_md5sig_key *key = NULL;
824 #ifdef CONFIG_TCP_MD5SIG
825 const __u8 *hash_location = NULL;
826 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
827 unsigned char newhash[16];
829 struct sock *sk1 = NULL;
835 if (!ipv6_unicast_destination(skb))
838 #ifdef CONFIG_TCP_MD5SIG
839 hash_location = tcp_parse_md5sig_option(th);
840 if (!sk && hash_location) {
842 * active side is lost. Try to find listening socket through
843 * source port, and then find md5 key through listening socket.
844 * we are not loose security here:
845 * Incoming packet is checked with md5 hash with finding key,
846 * no RST generated if md5 hash doesn't match.
848 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
849 &tcp_hashinfo, &ipv6h->saddr,
850 th->source, &ipv6h->daddr,
851 ntohs(th->source), inet6_iif(skb));
856 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
860 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb);
861 if (genhash || memcmp(hash_location, newhash, 16) != 0)
864 key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL;
869 seq = ntohl(th->ack_seq);
871 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
874 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0);
876 #ifdef CONFIG_TCP_MD5SIG
885 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
886 u32 win, u32 tsval, u32 tsecr,
887 struct tcp_md5sig_key *key, u8 tclass)
889 tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass);
892 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
894 struct inet_timewait_sock *tw = inet_twsk(sk);
895 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
897 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
898 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
899 tcp_time_stamp + tcptw->tw_ts_offset,
900 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw),
906 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
907 struct request_sock *req)
909 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1,
910 req->rcv_wnd, tcp_time_stamp, req->ts_recent,
911 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0);
915 static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
917 struct request_sock *req, **prev;
918 const struct tcphdr *th = tcp_hdr(skb);
921 /* Find possible connection requests. */
922 req = inet6_csk_search_req(sk, &prev, th->source,
923 &ipv6_hdr(skb)->saddr,
924 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
926 return tcp_check_req(sk, skb, req, prev, false);
928 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
929 &ipv6_hdr(skb)->saddr, th->source,
930 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
933 if (nsk->sk_state != TCP_TIME_WAIT) {
937 inet_twsk_put(inet_twsk(nsk));
941 #ifdef CONFIG_SYN_COOKIES
943 sk = cookie_v6_check(sk, skb);
948 /* FIXME: this is substantially similar to the ipv4 code.
949 * Can some kind of merge be done? -- erics
951 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
953 struct tcp_options_received tmp_opt;
954 struct request_sock *req;
955 struct inet_request_sock *ireq;
956 struct ipv6_pinfo *np = inet6_sk(sk);
957 struct tcp_sock *tp = tcp_sk(sk);
958 __u32 isn = TCP_SKB_CB(skb)->when;
959 struct dst_entry *dst = NULL;
961 bool want_cookie = false;
963 if (skb->protocol == htons(ETH_P_IP))
964 return tcp_v4_conn_request(sk, skb);
966 if (!ipv6_unicast_destination(skb))
969 if ((sysctl_tcp_syncookies == 2 ||
970 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
971 want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
976 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
977 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
981 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
985 #ifdef CONFIG_TCP_MD5SIG
986 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
989 tcp_clear_options(&tmp_opt);
990 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
991 tmp_opt.user_mss = tp->rx_opt.user_mss;
992 tcp_parse_options(skb, &tmp_opt, 0, NULL);
994 if (want_cookie && !tmp_opt.saw_tstamp)
995 tcp_clear_options(&tmp_opt);
997 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
998 tcp_openreq_init(req, &tmp_opt, skb);
1000 ireq = inet_rsk(req);
1001 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
1002 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
1003 if (!want_cookie || tmp_opt.tstamp_ok)
1004 TCP_ECN_create_request(req, skb, sock_net(sk));
1006 ireq->ir_iif = sk->sk_bound_dev_if;
1008 /* So that link locals have meaning */
1009 if (!sk->sk_bound_dev_if &&
1010 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
1011 ireq->ir_iif = inet6_iif(skb);
1014 if (ipv6_opt_accepted(sk, skb) ||
1015 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1016 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1017 atomic_inc(&skb->users);
1018 ireq->pktopts = skb;
1022 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1023 req->cookie_ts = tmp_opt.tstamp_ok;
1027 /* VJ's idea. We save last timestamp seen
1028 * from the destination in peer table, when entering
1029 * state TIME-WAIT, and check against it before
1030 * accepting new connection request.
1032 * If "isn" is not zero, this request hit alive
1033 * timewait bucket, so that all the necessary checks
1034 * are made in the function processing timewait state.
1036 if (tmp_opt.saw_tstamp &&
1037 tcp_death_row.sysctl_tw_recycle &&
1038 (dst = inet6_csk_route_req(sk, &fl6, req)) != NULL) {
1039 if (!tcp_peer_is_proven(req, dst, true)) {
1040 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1041 goto drop_and_release;
1044 /* Kill the following clause, if you dislike this way. */
1045 else if (!sysctl_tcp_syncookies &&
1046 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1047 (sysctl_max_syn_backlog >> 2)) &&
1048 !tcp_peer_is_proven(req, dst, false)) {
1049 /* Without syncookies last quarter of
1050 * backlog is filled with destinations,
1051 * proven to be alive.
1052 * It means that we continue to communicate
1053 * to destinations, already remembered
1054 * to the moment of synflood.
1056 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1057 &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));
1058 goto drop_and_release;
1061 isn = tcp_v6_init_sequence(skb);
1064 tcp_rsk(req)->snt_isn = isn;
1066 if (security_inet_conn_request(sk, skb, req))
1067 goto drop_and_release;
1069 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1070 skb_get_queue_mapping(skb)) ||
1074 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1075 tcp_rsk(req)->listener = NULL;
1076 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1084 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1085 return 0; /* don't send reset */
1088 static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1089 struct request_sock *req,
1090 struct dst_entry *dst)
1092 struct inet_request_sock *ireq;
1093 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1094 struct tcp6_sock *newtcp6sk;
1095 struct inet_sock *newinet;
1096 struct tcp_sock *newtp;
1098 #ifdef CONFIG_TCP_MD5SIG
1099 struct tcp_md5sig_key *key;
1103 if (skb->protocol == htons(ETH_P_IP)) {
1108 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1113 newtcp6sk = (struct tcp6_sock *)newsk;
1114 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1116 newinet = inet_sk(newsk);
1117 newnp = inet6_sk(newsk);
1118 newtp = tcp_sk(newsk);
1120 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1122 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
1124 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1126 newsk->sk_v6_rcv_saddr = newnp->saddr;
1128 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1129 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1130 #ifdef CONFIG_TCP_MD5SIG
1131 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1134 newnp->ipv6_ac_list = NULL;
1135 newnp->ipv6_fl_list = NULL;
1136 newnp->pktoptions = NULL;
1138 newnp->mcast_oif = inet6_iif(skb);
1139 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1140 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1143 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1144 * here, tcp_create_openreq_child now does this for us, see the comment in
1145 * that function for the gory details. -acme
1148 /* It is tricky place. Until this moment IPv4 tcp
1149 worked with IPv6 icsk.icsk_af_ops.
1152 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1157 ireq = inet_rsk(req);
1159 if (sk_acceptq_is_full(sk))
1163 dst = inet6_csk_route_req(sk, &fl6, req);
1168 newsk = tcp_create_openreq_child(sk, req, skb);
1173 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1174 * count here, tcp_create_openreq_child now does this for us, see the
1175 * comment in that function for the gory details. -acme
1178 newsk->sk_gso_type = SKB_GSO_TCPV6;
1179 __ip6_dst_store(newsk, dst, NULL, NULL);
1180 inet6_sk_rx_dst_set(newsk, skb);
1182 newtcp6sk = (struct tcp6_sock *)newsk;
1183 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1185 newtp = tcp_sk(newsk);
1186 newinet = inet_sk(newsk);
1187 newnp = inet6_sk(newsk);
1189 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1191 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1192 newnp->saddr = ireq->ir_v6_loc_addr;
1193 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1194 newsk->sk_bound_dev_if = ireq->ir_iif;
1196 /* Now IPv6 options...
1198 First: no IPv4 options.
1200 newinet->inet_opt = NULL;
1201 newnp->ipv6_ac_list = NULL;
1202 newnp->ipv6_fl_list = NULL;
1205 newnp->rxopt.all = np->rxopt.all;
1207 /* Clone pktoptions received with SYN */
1208 newnp->pktoptions = NULL;
1209 if (ireq->pktopts != NULL) {
1210 newnp->pktoptions = skb_clone(ireq->pktopts,
1211 sk_gfp_atomic(sk, GFP_ATOMIC));
1212 consume_skb(ireq->pktopts);
1213 ireq->pktopts = NULL;
1214 if (newnp->pktoptions)
1215 skb_set_owner_r(newnp->pktoptions, newsk);
1218 newnp->mcast_oif = inet6_iif(skb);
1219 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1220 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1222 /* Clone native IPv6 options from listening socket (if any)
1224 Yes, keeping reference count would be much more clever,
1225 but we make one more one thing there: reattach optmem
1229 newnp->opt = ipv6_dup_options(newsk, np->opt);
1231 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1233 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1234 newnp->opt->opt_flen);
1236 tcp_mtup_init(newsk);
1237 tcp_sync_mss(newsk, dst_mtu(dst));
1238 newtp->advmss = dst_metric_advmss(dst);
1239 if (tcp_sk(sk)->rx_opt.user_mss &&
1240 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1241 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1243 tcp_initialize_rcv_mss(newsk);
1245 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1246 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1248 #ifdef CONFIG_TCP_MD5SIG
1249 /* Copy over the MD5 key from the original socket */
1250 if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) {
1251 /* We're using one, so create a matching key
1252 * on the newsk structure. If we fail to get
1253 * memory, then we end up not copying the key
1256 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1257 AF_INET6, key->key, key->keylen,
1258 sk_gfp_atomic(sk, GFP_ATOMIC));
1262 if (__inet_inherit_port(sk, newsk) < 0) {
1263 inet_csk_prepare_forced_close(newsk);
1267 __inet6_hash(newsk, NULL);
1272 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1276 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1280 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1282 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1283 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1284 &ipv6_hdr(skb)->daddr, skb->csum)) {
1285 skb->ip_summed = CHECKSUM_UNNECESSARY;
1290 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1291 &ipv6_hdr(skb)->saddr,
1292 &ipv6_hdr(skb)->daddr, 0));
1294 if (skb->len <= 76) {
1295 return __skb_checksum_complete(skb);
1300 /* The socket must have it's spinlock held when we get
1303 * We have a potential double-lock case here, so even when
1304 * doing backlog processing we use the BH locking scheme.
1305 * This is because we cannot sleep with the original spinlock
1308 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1310 struct ipv6_pinfo *np = inet6_sk(sk);
1311 struct tcp_sock *tp;
1312 struct sk_buff *opt_skb = NULL;
1314 /* Imagine: socket is IPv6. IPv4 packet arrives,
1315 goes to IPv4 receive handler and backlogged.
1316 From backlog it always goes here. Kerboom...
1317 Fortunately, tcp_rcv_established and rcv_established
1318 handle them correctly, but it is not case with
1319 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1322 if (skb->protocol == htons(ETH_P_IP))
1323 return tcp_v4_do_rcv(sk, skb);
1325 #ifdef CONFIG_TCP_MD5SIG
1326 if (tcp_v6_inbound_md5_hash (sk, skb))
1330 if (sk_filter(sk, skb))
1334 * socket locking is here for SMP purposes as backlog rcv
1335 * is currently called with bh processing disabled.
1338 /* Do Stevens' IPV6_PKTOPTIONS.
1340 Yes, guys, it is the only place in our code, where we
1341 may make it not affecting IPv4.
1342 The rest of code is protocol independent,
1343 and I do not like idea to uglify IPv4.
1345 Actually, all the idea behind IPV6_PKTOPTIONS
1346 looks not very well thought. For now we latch
1347 options, received in the last packet, enqueued
1348 by tcp. Feel free to propose better solution.
1352 opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
1354 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1355 struct dst_entry *dst = sk->sk_rx_dst;
1357 sock_rps_save_rxhash(sk, skb);
1359 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1360 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1362 sk->sk_rx_dst = NULL;
1366 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1368 goto ipv6_pktoptions;
1372 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1375 if (sk->sk_state == TCP_LISTEN) {
1376 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1381 * Queue it on the new socket if the new socket is active,
1382 * otherwise we just shortcircuit this and continue with
1386 sock_rps_save_rxhash(nsk, skb);
1387 if (tcp_child_process(sk, nsk, skb))
1390 __kfree_skb(opt_skb);
1394 sock_rps_save_rxhash(sk, skb);
1396 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1399 goto ipv6_pktoptions;
1403 tcp_v6_send_reset(sk, skb);
1406 __kfree_skb(opt_skb);
1410 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1411 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1416 /* Do you ask, what is it?
1418 1. skb was enqueued by tcp.
1419 2. skb is added to tail of read queue, rather than out of order.
1420 3. socket is not in passive state.
1421 4. Finally, it really contains options, which user wants to receive.
1424 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1425 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1426 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1427 np->mcast_oif = inet6_iif(opt_skb);
1428 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1429 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1430 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1431 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1432 if (ipv6_opt_accepted(sk, opt_skb)) {
1433 skb_set_owner_r(opt_skb, sk);
1434 opt_skb = xchg(&np->pktoptions, opt_skb);
1436 __kfree_skb(opt_skb);
1437 opt_skb = xchg(&np->pktoptions, NULL);
1445 static int tcp_v6_rcv(struct sk_buff *skb)
1447 const struct tcphdr *th;
1448 const struct ipv6hdr *hdr;
1451 struct net *net = dev_net(skb->dev);
1453 if (skb->pkt_type != PACKET_HOST)
1457 * Count it even if it's bad.
1459 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1461 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1466 if (th->doff < sizeof(struct tcphdr)/4)
1468 if (!pskb_may_pull(skb, th->doff*4))
1471 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1475 hdr = ipv6_hdr(skb);
1476 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1477 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1478 skb->len - th->doff*4);
1479 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1480 TCP_SKB_CB(skb)->when = 0;
1481 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1482 TCP_SKB_CB(skb)->sacked = 0;
1484 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1489 if (sk->sk_state == TCP_TIME_WAIT)
1492 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1493 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1494 goto discard_and_relse;
1497 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1498 goto discard_and_relse;
1500 if (sk_filter(sk, skb))
1501 goto discard_and_relse;
1503 sk_mark_napi_id(sk, skb);
1506 bh_lock_sock_nested(sk);
1508 if (!sock_owned_by_user(sk)) {
1509 #ifdef CONFIG_NET_DMA
1510 struct tcp_sock *tp = tcp_sk(sk);
1511 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1512 tp->ucopy.dma_chan = net_dma_find_channel();
1513 if (tp->ucopy.dma_chan)
1514 ret = tcp_v6_do_rcv(sk, skb);
1518 if (!tcp_prequeue(sk, skb))
1519 ret = tcp_v6_do_rcv(sk, skb);
1521 } else if (unlikely(sk_add_backlog(sk, skb,
1522 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1524 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1525 goto discard_and_relse;
1530 return ret ? -1 : 0;
1533 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1536 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1538 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1540 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1542 tcp_v6_send_reset(NULL, skb);
1554 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1555 inet_twsk_put(inet_twsk(sk));
1559 if (skb->len < (th->doff<<2)) {
1560 inet_twsk_put(inet_twsk(sk));
1563 if (tcp_checksum_complete(skb)) {
1564 inet_twsk_put(inet_twsk(sk));
1568 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1573 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1574 &ipv6_hdr(skb)->saddr, th->source,
1575 &ipv6_hdr(skb)->daddr,
1576 ntohs(th->dest), inet6_iif(skb));
1578 struct inet_timewait_sock *tw = inet_twsk(sk);
1579 inet_twsk_deschedule(tw, &tcp_death_row);
1584 /* Fall through to ACK */
1587 tcp_v6_timewait_ack(sk, skb);
1591 case TCP_TW_SUCCESS:;
1596 static void tcp_v6_early_demux(struct sk_buff *skb)
1598 const struct ipv6hdr *hdr;
1599 const struct tcphdr *th;
1602 if (skb->pkt_type != PACKET_HOST)
1605 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1608 hdr = ipv6_hdr(skb);
1611 if (th->doff < sizeof(struct tcphdr) / 4)
1614 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1615 &hdr->saddr, th->source,
1616 &hdr->daddr, ntohs(th->dest),
1620 skb->destructor = sock_edemux;
1621 if (sk->sk_state != TCP_TIME_WAIT) {
1622 struct dst_entry *dst = sk->sk_rx_dst;
1625 dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
1627 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1628 skb_dst_set_noref(skb, dst);
1633 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1634 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1635 .twsk_unique = tcp_twsk_unique,
1636 .twsk_destructor= tcp_twsk_destructor,
1639 static const struct inet_connection_sock_af_ops ipv6_specific = {
1640 .queue_xmit = inet6_csk_xmit,
1641 .send_check = tcp_v6_send_check,
1642 .rebuild_header = inet6_sk_rebuild_header,
1643 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1644 .conn_request = tcp_v6_conn_request,
1645 .syn_recv_sock = tcp_v6_syn_recv_sock,
1646 .net_header_len = sizeof(struct ipv6hdr),
1647 .net_frag_header_len = sizeof(struct frag_hdr),
1648 .setsockopt = ipv6_setsockopt,
1649 .getsockopt = ipv6_getsockopt,
1650 .addr2sockaddr = inet6_csk_addr2sockaddr,
1651 .sockaddr_len = sizeof(struct sockaddr_in6),
1652 .bind_conflict = inet6_csk_bind_conflict,
1653 #ifdef CONFIG_COMPAT
1654 .compat_setsockopt = compat_ipv6_setsockopt,
1655 .compat_getsockopt = compat_ipv6_getsockopt,
1659 #ifdef CONFIG_TCP_MD5SIG
1660 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1661 .md5_lookup = tcp_v6_md5_lookup,
1662 .calc_md5_hash = tcp_v6_md5_hash_skb,
1663 .md5_parse = tcp_v6_parse_md5_keys,
1668 * TCP over IPv4 via INET6 API
1671 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1672 .queue_xmit = ip_queue_xmit,
1673 .send_check = tcp_v4_send_check,
1674 .rebuild_header = inet_sk_rebuild_header,
1675 .sk_rx_dst_set = inet_sk_rx_dst_set,
1676 .conn_request = tcp_v6_conn_request,
1677 .syn_recv_sock = tcp_v6_syn_recv_sock,
1678 .net_header_len = sizeof(struct iphdr),
1679 .setsockopt = ipv6_setsockopt,
1680 .getsockopt = ipv6_getsockopt,
1681 .addr2sockaddr = inet6_csk_addr2sockaddr,
1682 .sockaddr_len = sizeof(struct sockaddr_in6),
1683 .bind_conflict = inet6_csk_bind_conflict,
1684 #ifdef CONFIG_COMPAT
1685 .compat_setsockopt = compat_ipv6_setsockopt,
1686 .compat_getsockopt = compat_ipv6_getsockopt,
1690 #ifdef CONFIG_TCP_MD5SIG
1691 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1692 .md5_lookup = tcp_v4_md5_lookup,
1693 .calc_md5_hash = tcp_v4_md5_hash_skb,
1694 .md5_parse = tcp_v6_parse_md5_keys,
1698 /* NOTE: A lot of things set to zero explicitly by call to
1699 * sk_alloc() so need not be done here.
1701 static int tcp_v6_init_sock(struct sock *sk)
1703 struct inet_connection_sock *icsk = inet_csk(sk);
1707 icsk->icsk_af_ops = &ipv6_specific;
1709 #ifdef CONFIG_TCP_MD5SIG
1710 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1716 static void tcp_v6_destroy_sock(struct sock *sk)
1718 tcp_v4_destroy_sock(sk);
1719 inet6_destroy_sock(sk);
1722 #ifdef CONFIG_PROC_FS
1723 /* Proc filesystem TCPv6 sock list dumping. */
1724 static void get_openreq6(struct seq_file *seq,
1725 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
1727 int ttd = req->expires - jiffies;
1728 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1729 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1735 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1736 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1738 src->s6_addr32[0], src->s6_addr32[1],
1739 src->s6_addr32[2], src->s6_addr32[3],
1740 inet_rsk(req)->ir_num,
1741 dest->s6_addr32[0], dest->s6_addr32[1],
1742 dest->s6_addr32[2], dest->s6_addr32[3],
1743 ntohs(inet_rsk(req)->ir_rmt_port),
1745 0, 0, /* could print option size, but that is af dependent. */
1746 1, /* timers active (only the expire timer) */
1747 jiffies_to_clock_t(ttd),
1749 from_kuid_munged(seq_user_ns(seq), uid),
1750 0, /* non standard timer */
1751 0, /* open_requests have no inode */
1755 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1757 const struct in6_addr *dest, *src;
1760 unsigned long timer_expires;
1761 const struct inet_sock *inet = inet_sk(sp);
1762 const struct tcp_sock *tp = tcp_sk(sp);
1763 const struct inet_connection_sock *icsk = inet_csk(sp);
1765 dest = &sp->sk_v6_daddr;
1766 src = &sp->sk_v6_rcv_saddr;
1767 destp = ntohs(inet->inet_dport);
1768 srcp = ntohs(inet->inet_sport);
1770 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1772 timer_expires = icsk->icsk_timeout;
1773 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1775 timer_expires = icsk->icsk_timeout;
1776 } else if (timer_pending(&sp->sk_timer)) {
1778 timer_expires = sp->sk_timer.expires;
1781 timer_expires = jiffies;
1785 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1786 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1788 src->s6_addr32[0], src->s6_addr32[1],
1789 src->s6_addr32[2], src->s6_addr32[3], srcp,
1790 dest->s6_addr32[0], dest->s6_addr32[1],
1791 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1793 tp->write_seq-tp->snd_una,
1794 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1796 jiffies_delta_to_clock_t(timer_expires - jiffies),
1797 icsk->icsk_retransmits,
1798 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1799 icsk->icsk_probes_out,
1801 atomic_read(&sp->sk_refcnt), sp,
1802 jiffies_to_clock_t(icsk->icsk_rto),
1803 jiffies_to_clock_t(icsk->icsk_ack.ato),
1804 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1806 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
1810 static void get_timewait6_sock(struct seq_file *seq,
1811 struct inet_timewait_sock *tw, int i)
1813 const struct in6_addr *dest, *src;
1815 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1817 dest = &tw->tw_v6_daddr;
1818 src = &tw->tw_v6_rcv_saddr;
1819 destp = ntohs(tw->tw_dport);
1820 srcp = ntohs(tw->tw_sport);
1823 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1824 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1826 src->s6_addr32[0], src->s6_addr32[1],
1827 src->s6_addr32[2], src->s6_addr32[3], srcp,
1828 dest->s6_addr32[0], dest->s6_addr32[1],
1829 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1830 tw->tw_substate, 0, 0,
1831 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1832 atomic_read(&tw->tw_refcnt), tw);
1835 static int tcp6_seq_show(struct seq_file *seq, void *v)
1837 struct tcp_iter_state *st;
1838 struct sock *sk = v;
1840 if (v == SEQ_START_TOKEN) {
1845 "st tx_queue rx_queue tr tm->when retrnsmt"
1846 " uid timeout inode\n");
1851 switch (st->state) {
1852 case TCP_SEQ_STATE_LISTENING:
1853 case TCP_SEQ_STATE_ESTABLISHED:
1854 if (sk->sk_state == TCP_TIME_WAIT)
1855 get_timewait6_sock(seq, v, st->num);
1857 get_tcp6_sock(seq, v, st->num);
1859 case TCP_SEQ_STATE_OPENREQ:
1860 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1867 static const struct file_operations tcp6_afinfo_seq_fops = {
1868 .owner = THIS_MODULE,
1869 .open = tcp_seq_open,
1871 .llseek = seq_lseek,
1872 .release = seq_release_net
1875 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1878 .seq_fops = &tcp6_afinfo_seq_fops,
1880 .show = tcp6_seq_show,
1884 int __net_init tcp6_proc_init(struct net *net)
1886 return tcp_proc_register(net, &tcp6_seq_afinfo);
1889 void tcp6_proc_exit(struct net *net)
1891 tcp_proc_unregister(net, &tcp6_seq_afinfo);
1895 static void tcp_v6_clear_sk(struct sock *sk, int size)
1897 struct inet_sock *inet = inet_sk(sk);
1899 /* we do not want to clear pinet6 field, because of RCU lookups */
1900 sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
1902 size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
1903 memset(&inet->pinet6 + 1, 0, size);
1906 struct proto tcpv6_prot = {
1908 .owner = THIS_MODULE,
1910 .connect = tcp_v6_connect,
1911 .disconnect = tcp_disconnect,
1912 .accept = inet_csk_accept,
1914 .init = tcp_v6_init_sock,
1915 .destroy = tcp_v6_destroy_sock,
1916 .shutdown = tcp_shutdown,
1917 .setsockopt = tcp_setsockopt,
1918 .getsockopt = tcp_getsockopt,
1919 .recvmsg = tcp_recvmsg,
1920 .sendmsg = tcp_sendmsg,
1921 .sendpage = tcp_sendpage,
1922 .backlog_rcv = tcp_v6_do_rcv,
1923 .release_cb = tcp_release_cb,
1924 .mtu_reduced = tcp_v6_mtu_reduced,
1925 .hash = tcp_v6_hash,
1926 .unhash = inet_unhash,
1927 .get_port = inet_csk_get_port,
1928 .enter_memory_pressure = tcp_enter_memory_pressure,
1929 .stream_memory_free = tcp_stream_memory_free,
1930 .sockets_allocated = &tcp_sockets_allocated,
1931 .memory_allocated = &tcp_memory_allocated,
1932 .memory_pressure = &tcp_memory_pressure,
1933 .orphan_count = &tcp_orphan_count,
1934 .sysctl_mem = sysctl_tcp_mem,
1935 .sysctl_wmem = sysctl_tcp_wmem,
1936 .sysctl_rmem = sysctl_tcp_rmem,
1937 .max_header = MAX_TCP_HEADER,
1938 .obj_size = sizeof(struct tcp6_sock),
1939 .slab_flags = SLAB_DESTROY_BY_RCU,
1940 .twsk_prot = &tcp6_timewait_sock_ops,
1941 .rsk_prot = &tcp6_request_sock_ops,
1942 .h.hashinfo = &tcp_hashinfo,
1943 .no_autobind = true,
1944 #ifdef CONFIG_COMPAT
1945 .compat_setsockopt = compat_tcp_setsockopt,
1946 .compat_getsockopt = compat_tcp_getsockopt,
1948 #ifdef CONFIG_MEMCG_KMEM
1949 .proto_cgroup = tcp_proto_cgroup,
1951 .clear_sk = tcp_v6_clear_sk,
1954 static const struct inet6_protocol tcpv6_protocol = {
1955 .early_demux = tcp_v6_early_demux,
1956 .handler = tcp_v6_rcv,
1957 .err_handler = tcp_v6_err,
1958 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1961 static struct inet_protosw tcpv6_protosw = {
1962 .type = SOCK_STREAM,
1963 .protocol = IPPROTO_TCP,
1964 .prot = &tcpv6_prot,
1965 .ops = &inet6_stream_ops,
1967 .flags = INET_PROTOSW_PERMANENT |
1971 static int __net_init tcpv6_net_init(struct net *net)
1973 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
1974 SOCK_RAW, IPPROTO_TCP, net);
1977 static void __net_exit tcpv6_net_exit(struct net *net)
1979 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
1982 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
1984 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
1987 static struct pernet_operations tcpv6_net_ops = {
1988 .init = tcpv6_net_init,
1989 .exit = tcpv6_net_exit,
1990 .exit_batch = tcpv6_net_exit_batch,
1993 int __init tcpv6_init(void)
1997 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2001 /* register inet6 protocol */
2002 ret = inet6_register_protosw(&tcpv6_protosw);
2004 goto out_tcpv6_protocol;
2006 ret = register_pernet_subsys(&tcpv6_net_ops);
2008 goto out_tcpv6_protosw;
2013 inet6_unregister_protosw(&tcpv6_protosw);
2015 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2019 void tcpv6_exit(void)
2021 unregister_pernet_subsys(&tcpv6_net_ops);
2022 inet6_unregister_protosw(&tcpv6_protosw);
2023 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);