]> Pileus Git - ~andy/linux/blobdiff - net/ipv4/tcp.c
tcp: md5: remove spinlock usage in fast path
[~andy/linux] / net / ipv4 / tcp.c
index e22020790709dbde58759c771f47a69e883892e4..53d9c120fbb80df45ebdf68ca6b8857e52bdd8d5 100644 (file)
@@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk)
 
        icsk->icsk_sync_mss = tcp_sync_mss;
 
-       /* TCP Cookie Transactions */
-       if (sysctl_tcp_cookie_size > 0) {
-               /* Default, cookies without s_data_payload. */
-               tp->cookie_values =
-                       kzalloc(sizeof(*tp->cookie_values),
-                               sk->sk_allocation);
-               if (tp->cookie_values != NULL)
-                       kref_init(&tp->cookie_values->kref);
-       }
        /* Presumed zeroed, in order of appearance:
         *      cookie_in_always, cookie_out_never,
         *      s_data_constant, s_data_in, s_data_out
@@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                release_sock(sk);
                return err;
        }
-       case TCP_COOKIE_TRANSACTIONS: {
-               struct tcp_cookie_transactions ctd;
-               struct tcp_cookie_values *cvp = NULL;
-
-               if (sizeof(ctd) > optlen)
-                       return -EINVAL;
-               if (copy_from_user(&ctd, optval, sizeof(ctd)))
-                       return -EFAULT;
-
-               if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
-                   ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
-                       return -EINVAL;
-
-               if (ctd.tcpct_cookie_desired == 0) {
-                       /* default to global value */
-               } else if ((0x1 & ctd.tcpct_cookie_desired) ||
-                          ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
-                          ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
-                       return -EINVAL;
-               }
-
-               if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
-                       /* Supercedes all other values */
-                       lock_sock(sk);
-                       if (tp->cookie_values != NULL) {
-                               kref_put(&tp->cookie_values->kref,
-                                        tcp_cookie_values_release);
-                               tp->cookie_values = NULL;
-                       }
-                       tp->rx_opt.cookie_in_always = 0; /* false */
-                       tp->rx_opt.cookie_out_never = 1; /* true */
-                       release_sock(sk);
-                       return err;
-               }
-
-               /* Allocate ancillary memory before locking.
-                */
-               if (ctd.tcpct_used > 0 ||
-                   (tp->cookie_values == NULL &&
-                    (sysctl_tcp_cookie_size > 0 ||
-                     ctd.tcpct_cookie_desired > 0 ||
-                     ctd.tcpct_s_data_desired > 0))) {
-                       cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
-                                     GFP_KERNEL);
-                       if (cvp == NULL)
-                               return -ENOMEM;
-
-                       kref_init(&cvp->kref);
-               }
-               lock_sock(sk);
-               tp->rx_opt.cookie_in_always =
-                       (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
-               tp->rx_opt.cookie_out_never = 0; /* false */
-
-               if (tp->cookie_values != NULL) {
-                       if (cvp != NULL) {
-                               /* Changed values are recorded by a changed
-                                * pointer, ensuring the cookie will differ,
-                                * without separately hashing each value later.
-                                */
-                               kref_put(&tp->cookie_values->kref,
-                                        tcp_cookie_values_release);
-                       } else {
-                               cvp = tp->cookie_values;
-                       }
-               }
-
-               if (cvp != NULL) {
-                       cvp->cookie_desired = ctd.tcpct_cookie_desired;
-
-                       if (ctd.tcpct_used > 0) {
-                               memcpy(cvp->s_data_payload, ctd.tcpct_value,
-                                      ctd.tcpct_used);
-                               cvp->s_data_desired = ctd.tcpct_used;
-                               cvp->s_data_constant = 1; /* true */
-                       } else {
-                               /* No constant payload data. */
-                               cvp->s_data_desired = ctd.tcpct_s_data_desired;
-                               cvp->s_data_constant = 0; /* false */
-                       }
-
-                       tp->cookie_values = cvp;
-               }
-               release_sock(sk);
-               return err;
-       }
        default:
                /* fallthru */
                break;
@@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                        return -EFAULT;
                return 0;
 
-       case TCP_COOKIE_TRANSACTIONS: {
-               struct tcp_cookie_transactions ctd;
-               struct tcp_cookie_values *cvp = tp->cookie_values;
-
-               if (get_user(len, optlen))
-                       return -EFAULT;
-               if (len < sizeof(ctd))
-                       return -EINVAL;
-
-               memset(&ctd, 0, sizeof(ctd));
-               ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
-                                  TCP_COOKIE_IN_ALWAYS : 0)
-                               | (tp->rx_opt.cookie_out_never ?
-                                  TCP_COOKIE_OUT_NEVER : 0);
-
-               if (cvp != NULL) {
-                       ctd.tcpct_flags |= (cvp->s_data_in ?
-                                           TCP_S_DATA_IN : 0)
-                                        | (cvp->s_data_out ?
-                                           TCP_S_DATA_OUT : 0);
-
-                       ctd.tcpct_cookie_desired = cvp->cookie_desired;
-                       ctd.tcpct_s_data_desired = cvp->s_data_desired;
-
-                       memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
-                              cvp->cookie_pair_size);
-                       ctd.tcpct_used = cvp->cookie_pair_size;
-               }
-
-               if (put_user(sizeof(ctd), optlen))
-                       return -EFAULT;
-               if (copy_to_user(optval, &ctd, sizeof(ctd)))
-                       return -EFAULT;
-               return 0;
-       }
        case TCP_THIN_LINEAR_TIMEOUTS:
                val = tp->thin_lto;
                break;
@@ -3015,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
        __be32 delta;
        unsigned int oldlen;
        unsigned int mss;
+       struct sk_buff *gso_skb = skb;
+       __sum16 newcheck;
 
        if (!pskb_may_pull(skb, sizeof(*th)))
                goto out;
@@ -3044,6 +2916,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
                               SKB_GSO_TCP_ECN |
                               SKB_GSO_TCPV6 |
                               SKB_GSO_GRE |
+                              SKB_GSO_UDP_TUNNEL |
                               0) ||
                             !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
                        goto out;
@@ -3064,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
        th = tcp_hdr(skb);
        seq = ntohl(th->seq);
 
+       newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
+                                              (__force u32)delta));
+
        do {
                th->fin = th->psh = 0;
+               th->check = newcheck;
 
-               th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
-                                      (__force u32)delta));
                if (skb->ip_summed != CHECKSUM_PARTIAL)
                        th->check =
                             csum_fold(csum_partial(skb_transport_header(skb),
@@ -3082,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
                th->cwr = 0;
        } while (skb->next);
 
+       /* Following permits TCP Small Queues to work well with GSO :
+        * The callback to TCP stack will be called at the time last frag
+        * is freed at TX completion, and not right now when gso_skb
+        * is freed by GSO engine
+        */
+       if (gso_skb->destructor == tcp_wfree) {
+               swap(gso_skb->sk, skb->sk);
+               swap(gso_skb->destructor, skb->destructor);
+               swap(gso_skb->truesize, skb->truesize);
+       }
+
        delta = htonl(oldlen + (skb->tail - skb->transport_header) +
                      skb->data_len);
        th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
@@ -3209,9 +3095,8 @@ int tcp_gro_complete(struct sk_buff *skb)
 EXPORT_SYMBOL(tcp_gro_complete);
 
 #ifdef CONFIG_TCP_MD5SIG
-static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
-static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
+static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_MUTEX(tcp_md5sig_mutex);
 
 static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
 {
@@ -3226,30 +3111,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
        free_percpu(pool);
 }
 
-void tcp_free_md5sig_pool(void)
-{
-       struct tcp_md5sig_pool __percpu *pool = NULL;
-
-       spin_lock_bh(&tcp_md5sig_pool_lock);
-       if (--tcp_md5sig_users == 0) {
-               pool = tcp_md5sig_pool;
-               tcp_md5sig_pool = NULL;
-       }
-       spin_unlock_bh(&tcp_md5sig_pool_lock);
-       if (pool)
-               __tcp_free_md5sig_pool(pool);
-}
-EXPORT_SYMBOL(tcp_free_md5sig_pool);
-
-static struct tcp_md5sig_pool __percpu *
-__tcp_alloc_md5sig_pool(struct sock *sk)
+static void __tcp_alloc_md5sig_pool(void)
 {
        int cpu;
        struct tcp_md5sig_pool __percpu *pool;
 
        pool = alloc_percpu(struct tcp_md5sig_pool);
        if (!pool)
-               return NULL;
+               return;
 
        for_each_possible_cpu(cpu) {
                struct crypto_hash *hash;
@@ -3260,53 +3129,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk)
 
                per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
        }
-       return pool;
+       /* before setting tcp_md5sig_pool, we must commit all writes
+        * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+        */
+       smp_wmb();
+       tcp_md5sig_pool = pool;
+       return;
 out_free:
        __tcp_free_md5sig_pool(pool);
-       return NULL;
 }
 
-struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
+bool tcp_alloc_md5sig_pool(void)
 {
-       struct tcp_md5sig_pool __percpu *pool;
-       bool alloc = false;
-
-retry:
-       spin_lock_bh(&tcp_md5sig_pool_lock);
-       pool = tcp_md5sig_pool;
-       if (tcp_md5sig_users++ == 0) {
-               alloc = true;
-               spin_unlock_bh(&tcp_md5sig_pool_lock);
-       } else if (!pool) {
-               tcp_md5sig_users--;
-               spin_unlock_bh(&tcp_md5sig_pool_lock);
-               cpu_relax();
-               goto retry;
-       } else
-               spin_unlock_bh(&tcp_md5sig_pool_lock);
-
-       if (alloc) {
-               /* we cannot hold spinlock here because this may sleep. */
-               struct tcp_md5sig_pool __percpu *p;
-
-               p = __tcp_alloc_md5sig_pool(sk);
-               spin_lock_bh(&tcp_md5sig_pool_lock);
-               if (!p) {
-                       tcp_md5sig_users--;
-                       spin_unlock_bh(&tcp_md5sig_pool_lock);
-                       return NULL;
-               }
-               pool = tcp_md5sig_pool;
-               if (pool) {
-                       /* oops, it has already been assigned. */
-                       spin_unlock_bh(&tcp_md5sig_pool_lock);
-                       __tcp_free_md5sig_pool(p);
-               } else {
-                       tcp_md5sig_pool = pool = p;
-                       spin_unlock_bh(&tcp_md5sig_pool_lock);
-               }
+       if (unlikely(!tcp_md5sig_pool)) {
+               mutex_lock(&tcp_md5sig_mutex);
+
+               if (!tcp_md5sig_pool)
+                       __tcp_alloc_md5sig_pool();
+
+               mutex_unlock(&tcp_md5sig_mutex);
        }
-       return pool;
+       return tcp_md5sig_pool != NULL;
 }
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
@@ -3323,28 +3166,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
        struct tcp_md5sig_pool __percpu *p;
 
        local_bh_disable();
-
-       spin_lock(&tcp_md5sig_pool_lock);
-       p = tcp_md5sig_pool;
-       if (p)
-               tcp_md5sig_users++;
-       spin_unlock(&tcp_md5sig_pool_lock);
-
+       p = ACCESS_ONCE(tcp_md5sig_pool);
        if (p)
-               return this_cpu_ptr(p);
+               return __this_cpu_ptr(p);
 
        local_bh_enable();
        return NULL;
 }
 EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-void tcp_put_md5sig_pool(void)
-{
-       local_bh_enable();
-       tcp_free_md5sig_pool();
-}
-EXPORT_SYMBOL(tcp_put_md5sig_pool);
-
 int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
                        const struct tcphdr *th)
 {
@@ -3408,134 +3238,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
-/* Each Responder maintains up to two secret values concurrently for
- * efficient secret rollover.  Each secret value has 4 states:
- *
- * Generating.  (tcp_secret_generating != tcp_secret_primary)
- *    Generates new Responder-Cookies, but not yet used for primary
- *    verification.  This is a short-term state, typically lasting only
- *    one round trip time (RTT).
- *
- * Primary.  (tcp_secret_generating == tcp_secret_primary)
- *    Used both for generation and primary verification.
- *
- * Retiring.  (tcp_secret_retiring != tcp_secret_secondary)
- *    Used for verification, until the first failure that can be
- *    verified by the newer Generating secret.  At that time, this
- *    cookie's state is changed to Secondary, and the Generating
- *    cookie's state is changed to Primary.  This is a short-term state,
- *    typically lasting only one round trip time (RTT).
- *
- * Secondary.  (tcp_secret_retiring == tcp_secret_secondary)
- *    Used for secondary verification, after primary verification
- *    failures.  This state lasts no more than twice the Maximum Segment
- *    Lifetime (2MSL).  Then, the secret is discarded.
- */
-struct tcp_cookie_secret {
-       /* The secret is divided into two parts.  The digest part is the
-        * equivalent of previously hashing a secret and saving the state,
-        * and serves as an initialization vector (IV).  The message part
-        * serves as the trailing secret.
-        */
-       u32                             secrets[COOKIE_WORKSPACE_WORDS];
-       unsigned long                   expires;
-};
-
-#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
-#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
-#define TCP_SECRET_LIFE (HZ * 600)
-
-static struct tcp_cookie_secret tcp_secret_one;
-static struct tcp_cookie_secret tcp_secret_two;
-
-/* Essentially a circular list, without dynamic allocation. */
-static struct tcp_cookie_secret *tcp_secret_generating;
-static struct tcp_cookie_secret *tcp_secret_primary;
-static struct tcp_cookie_secret *tcp_secret_retiring;
-static struct tcp_cookie_secret *tcp_secret_secondary;
-
-static DEFINE_SPINLOCK(tcp_secret_locker);
-
-/* Select a pseudo-random word in the cookie workspace.
- */
-static inline u32 tcp_cookie_work(const u32 *ws, const int n)
-{
-       return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
-}
-
-/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
- * Called in softirq context.
- * Returns: 0 for success.
- */
-int tcp_cookie_generator(u32 *bakery)
-{
-       unsigned long jiffy = jiffies;
-
-       if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
-               spin_lock_bh(&tcp_secret_locker);
-               if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
-                       /* refreshed by another */
-                       memcpy(bakery,
-                              &tcp_secret_generating->secrets[0],
-                              COOKIE_WORKSPACE_WORDS);
-               } else {
-                       /* still needs refreshing */
-                       get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
-
-                       /* The first time, paranoia assumes that the
-                        * randomization function isn't as strong.  But,
-                        * this secret initialization is delayed until
-                        * the last possible moment (packet arrival).
-                        * Although that time is observable, it is
-                        * unpredictably variable.  Mash in the most
-                        * volatile clock bits available, and expire the
-                        * secret extra quickly.
-                        */
-                       if (unlikely(tcp_secret_primary->expires ==
-                                    tcp_secret_secondary->expires)) {
-                               struct timespec tv;
-
-                               getnstimeofday(&tv);
-                               bakery[COOKIE_DIGEST_WORDS+0] ^=
-                                       (u32)tv.tv_nsec;
-
-                               tcp_secret_secondary->expires = jiffy
-                                       + TCP_SECRET_1MSL
-                                       + (0x0f & tcp_cookie_work(bakery, 0));
-                       } else {
-                               tcp_secret_secondary->expires = jiffy
-                                       + TCP_SECRET_LIFE
-                                       + (0xff & tcp_cookie_work(bakery, 1));
-                               tcp_secret_primary->expires = jiffy
-                                       + TCP_SECRET_2MSL
-                                       + (0x1f & tcp_cookie_work(bakery, 2));
-                       }
-                       memcpy(&tcp_secret_secondary->secrets[0],
-                              bakery, COOKIE_WORKSPACE_WORDS);
-
-                       rcu_assign_pointer(tcp_secret_generating,
-                                          tcp_secret_secondary);
-                       rcu_assign_pointer(tcp_secret_retiring,
-                                          tcp_secret_primary);
-                       /*
-                        * Neither call_rcu() nor synchronize_rcu() needed.
-                        * Retiring data is not freed.  It is replaced after
-                        * further (locked) pointer updates, and a quiet time
-                        * (minimum 1MSL, maximum LIFE - 2MSL).
-                        */
-               }
-               spin_unlock_bh(&tcp_secret_locker);
-       } else {
-               rcu_read_lock_bh();
-               memcpy(bakery,
-                      &rcu_dereference(tcp_secret_generating)->secrets[0],
-                      COOKIE_WORKSPACE_WORDS);
-               rcu_read_unlock_bh();
-       }
-       return 0;
-}
-EXPORT_SYMBOL(tcp_cookie_generator);
-
 void tcp_done(struct sock *sk)
 {
        struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3590,7 +3292,6 @@ void __init tcp_init(void)
        unsigned long limit;
        int max_rshare, max_wshare, cnt;
        unsigned int i;
-       unsigned long jiffy = jiffies;
 
        BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -3666,13 +3367,5 @@ void __init tcp_init(void)
 
        tcp_register_congestion_control(&tcp_reno);
 
-       memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
-       memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
-       tcp_secret_one.expires = jiffy; /* past due */
-       tcp_secret_two.expires = jiffy; /* past due */
-       tcp_secret_generating = &tcp_secret_one;
-       tcp_secret_primary = &tcp_secret_one;
-       tcp_secret_retiring = &tcp_secret_two;
-       tcp_secret_secondary = &tcp_secret_two;
        tcp_tasklet_init();
 }