]> Pileus Git - ~andy/linux/blobdiff - net/netfilter/ipvs/ip_vs_core.c
ipvs: implement passive PMTUD for IPIP packets
[~andy/linux] / net / netfilter / ipvs / ip_vs_core.c
index a54b018c6eea72fc122aa47c6f66de76676c1fa1..58918e20f9d5b038c2181b893b0e0458dbd3a8f2 100644 (file)
@@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
        struct ip_vs_proto_data *pd;
-       unsigned int offset, ihl, verdict;
+       unsigned int offset, offset2, ihl, verdict;
+       bool ipip;
 
        *related = 1;
 
@@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        net = skb_net(skb);
 
+       /* Special case for errors for IPIP packets */
+       ipip = false;
+       if (cih->protocol == IPPROTO_IPIP) {
+               if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+                       return NF_ACCEPT;
+               /* Error for our IPIP must arrive at LOCAL_IN */
+               if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
+                       return NF_ACCEPT;
+               offset += cih->ihl * 4;
+               cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+               if (cih == NULL)
+                       return NF_ACCEPT; /* The packet looks wrong, ignore */
+               ipip = true;
+       }
+
        pd = ip_vs_proto_data_get(net, cih->protocol);
        if (!pd)
                return NF_ACCEPT;
@@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
                      "Checking incoming ICMP for");
 
+       offset2 = offset;
        offset += cih->ihl * 4;
 
        ip_vs_fill_iphdr(AF_INET, cih, &ciph);
-       /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
+       /* The embedded headers contain source and dest in reverse order.
+        * For IPIP this is error for request, not for reply.
+        */
+       cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1);
        if (!cp)
                return NF_ACCEPT;
 
@@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
                goto out;
        }
 
+       if (ipip) {
+               __be32 info = ic->un.gateway;
+
+               /* Update the MTU */
+               if (ic->type == ICMP_DEST_UNREACH &&
+                   ic->code == ICMP_FRAG_NEEDED) {
+                       struct ip_vs_dest *dest = cp->dest;
+                       u32 mtu = ntohs(ic->un.frag.mtu);
+
+                       /* Strip outer IP and ICMP, go to IPIP header */
+                       __skb_pull(skb, ihl + sizeof(_icmph));
+                       offset2 -= ihl + sizeof(_icmph);
+                       skb_reset_network_header(skb);
+                       IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
+                               &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
+                       rcu_read_lock();
+                       ipv4_update_pmtu(skb, dev_net(skb->dev),
+                                        mtu, 0, 0, 0, 0);
+                       rcu_read_unlock();
+                       /* Client uses PMTUD? */
+                       if (!(cih->frag_off & htons(IP_DF)))
+                               goto ignore_ipip;
+                       /* Prefer the resulting PMTU */
+                       if (dest) {
+                               spin_lock(&dest->dst_lock);
+                               if (dest->dst_cache)
+                                       mtu = dst_mtu(dest->dst_cache);
+                               spin_unlock(&dest->dst_lock);
+                       }
+                       if (mtu > 68 + sizeof(struct iphdr))
+                               mtu -= sizeof(struct iphdr);
+                       info = htonl(mtu);
+               }
+               /* Strip outer IP, ICMP and IPIP, go to IP header of
+                * original request.
+                */
+               __skb_pull(skb, offset2);
+               skb_reset_network_header(skb);
+               IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
+                       &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
+                       ic->type, ic->code, ntohl(info));
+               icmp_send(skb, ic->type, ic->code, info);
+               /* ICMP can be shorter but anyways, account it */
+               ip_vs_out_stats(cp, skb);
+
+ignore_ipip:
+               consume_skb(skb);
+               verdict = NF_STOLEN;
+               goto out;
+       }
+
        /* do the statistics and put it back */
        ip_vs_in_stats(cp, skb);
        if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
@@ -1742,7 +1812,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_reply4,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
+               .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP_PRI_NAT_SRC - 2,
        },
@@ -1752,7 +1822,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_remote_request4,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
+               .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP_PRI_NAT_SRC - 1,
        },
@@ -1760,7 +1830,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_local_reply4,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
+               .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP_PRI_NAT_DST + 1,
        },
@@ -1768,7 +1838,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_local_request4,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
+               .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP_PRI_NAT_DST + 2,
        },
@@ -1777,7 +1847,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_forward_icmp,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
+               .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_FORWARD,
                .priority       = 99,
        },
@@ -1785,7 +1855,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_reply4,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
+               .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_FORWARD,
                .priority       = 100,
        },
@@ -1794,7 +1864,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_reply6,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET6,
+               .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP6_PRI_NAT_SRC - 2,
        },
@@ -1804,7 +1874,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_remote_request6,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET6,
+               .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP6_PRI_NAT_SRC - 1,
        },
@@ -1812,7 +1882,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_local_reply6,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET,
+               .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP6_PRI_NAT_DST + 1,
        },
@@ -1820,7 +1890,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_local_request6,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET6,
+               .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP6_PRI_NAT_DST + 2,
        },
@@ -1829,7 +1899,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_forward_icmp_v6,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET6,
+               .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_FORWARD,
                .priority       = 99,
        },
@@ -1837,7 +1907,7 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        {
                .hook           = ip_vs_reply6,
                .owner          = THIS_MODULE,
-               .pf             = PF_INET6,
+               .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_FORWARD,
                .priority       = 100,
        },