All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS
@ 2019-05-05 12:14 Julian Anastasov
  2019-05-05 12:14 ` [PATCHv2 net-next 1/3] ipvs: allow rs_table to contain different real server types Julian Anastasov
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Julian Anastasov @ 2019-05-05 12:14 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Jacky Hu,
	jacky.hu, jason.niesz

This patchset is a followup to the commit that adds UDP/GUE tunnel:
"ipvs: allow tunneling with gue encapsulation".

What we do is to put tunnel real servers in hash table (patch 1),
add function to lookup tunnels (patch 2) and use it to strip the
embedded tunnel headers from ICMP errors (patch 3).

v1->v2:
patch 1: remove extra parentheses
patch 2: remove extra parentheses
patch 3: parse UDP header into ipvs_udp_decap
patch 3: v1 ignores forwarded ICMP errors for UDP, do not do that
patch 3: add comment for fragment check

Julian Anastasov (3):
  ipvs: allow rs_table to contain different real server types
  ipvs: add function to find tunnels
  ipvs: strip udp tunnel headers from icmp errors

 include/net/ip_vs.h             |  6 +++
 net/netfilter/ipvs/ip_vs_core.c | 68 +++++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_ctl.c  | 72 +++++++++++++++++++++++++++++----
 3 files changed, 138 insertions(+), 8 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCHv2 net-next 1/3] ipvs: allow rs_table to contain different real server types
  2019-05-05 12:14 [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Julian Anastasov
@ 2019-05-05 12:14 ` Julian Anastasov
  2019-05-05 12:14 ` [PATCHv2 net-next 2/3] ipvs: add function to find tunnels Julian Anastasov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Julian Anastasov @ 2019-05-05 12:14 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Jacky Hu,
	jacky.hu, jason.niesz

Before now rs_table was used only for NAT real servers.
Change it to allow TUN real severs from different types,
possibly hashed with different port key.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
 include/net/ip_vs.h            |  3 +++
 net/netfilter/ipvs/ip_vs_ctl.c | 43 +++++++++++++++++++++++++++-------
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 2ac40135b576..9a8ac8997e34 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1497,6 +1497,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
 static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
 #endif
 
+#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \
+				 IP_VS_CONN_F_FWD_MASK)
+
 /* ip_vs_fwd_tag returns the forwarding tag of the connection */
 #define IP_VS_FWD_METHOD(cp)  (cp->flags & IP_VS_CONN_F_FWD_MASK)
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 328683452229..7f624f4c402b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -515,15 +515,36 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
 	unsigned int hash;
+	__be16 port;
 
 	if (dest->in_rs_table)
 		return;
 
+	switch (IP_VS_DFWD_METHOD(dest)) {
+	case IP_VS_CONN_F_MASQ:
+		port = dest->port;
+		break;
+	case IP_VS_CONN_F_TUNNEL:
+		switch (dest->tun_type) {
+		case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
+			port = dest->tun_port;
+			break;
+		case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
+			port = 0;
+			break;
+		default:
+			return;
+		}
+		break;
+	default:
+		return;
+	}
+
 	/*
 	 *	Hash by proto,addr,port,
 	 *	which are the parameters of the real service.
 	 */
-	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
+	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
 
 	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
 	dest->in_rs_table = 1;
@@ -555,7 +576,8 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 		if (dest->port == dport &&
 		    dest->af == af &&
 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
-		    (dest->protocol == protocol || dest->vfwmark)) {
+		    (dest->protocol == protocol || dest->vfwmark) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
 			/* HIT */
 			return true;
 		}
@@ -585,7 +607,8 @@ struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
 		if (dest->port == dport &&
 		    dest->af == af &&
 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
-			(dest->protocol == protocol || dest->vfwmark)) {
+		    (dest->protocol == protocol || dest->vfwmark) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
 			/* HIT */
 			return dest;
 		}
@@ -831,6 +854,13 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
 	conn_flags |= IP_VS_CONN_F_INACTIVE;
 
+	/* Need to rehash? */
+	if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
+	    IP_VS_DFWD_METHOD(dest) ||
+	    udest->tun_type != dest->tun_type ||
+	    udest->tun_port != dest->tun_port)
+		ip_vs_rs_unhash(dest);
+
 	/* set the tunnel info */
 	dest->tun_type = udest->tun_type;
 	dest->tun_port = udest->tun_port;
@@ -839,16 +869,13 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
 	} else {
-		/*
-		 *    Put the real service in rs_table if not present.
-		 *    For now only for NAT!
-		 */
-		ip_vs_rs_hash(ipvs, dest);
 		/* FTP-NAT requires conntrack for mangling */
 		if (svc->port == FTPPORT)
 			ip_vs_register_conntrack(svc);
 	}
 	atomic_set(&dest->conn_flags, conn_flags);
+	/* Put the real service in rs_table if not present. */
+	ip_vs_rs_hash(ipvs, dest);
 
 	/* bind the service */
 	old_svc = rcu_dereference_protected(dest->svc, 1);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCHv2 net-next 2/3] ipvs: add function to find tunnels
  2019-05-05 12:14 [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Julian Anastasov
  2019-05-05 12:14 ` [PATCHv2 net-next 1/3] ipvs: allow rs_table to contain different real server types Julian Anastasov
@ 2019-05-05 12:14 ` Julian Anastasov
  2019-05-05 12:14 ` [PATCHv2 net-next 3/3] ipvs: strip udp tunnel headers from icmp errors Julian Anastasov
  2019-05-07 13:47 ` [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Simon Horman
  3 siblings, 0 replies; 6+ messages in thread
From: Julian Anastasov @ 2019-05-05 12:14 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Jacky Hu,
	jacky.hu, jason.niesz

Add ip_vs_find_tunnel() to match tunnel headers
by family, address and optional port. Use it to
properly find the tunnel real server used in
received ICMP errors.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
 include/net/ip_vs.h             |  3 +++
 net/netfilter/ipvs/ip_vs_core.c |  8 ++++++++
 net/netfilter/ipvs/ip_vs_ctl.c  | 29 +++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 9a8ac8997e34..b01a94ebfc0e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1404,6 +1404,9 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 struct ip_vs_dest *
 ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 			const union nf_inet_addr *daddr, __be16 dport);
+struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
+				     const union nf_inet_addr *daddr,
+				     __be16 tun_port);
 
 int ip_vs_use_count_inc(void);
 void ip_vs_use_count_dec(void);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 14457551bcb4..4447ee512b88 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1598,6 +1598,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
 	struct ip_vs_proto_data *pd;
 	unsigned int offset, offset2, ihl, verdict;
 	bool ipip, new_cp = false;
+	union nf_inet_addr *raddr;
 
 	*related = 1;
 
@@ -1636,15 +1637,22 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
 	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
 	if (cih == NULL)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
+	raddr = (union nf_inet_addr *)&cih->daddr;
 
 	/* Special case for errors for IPIP packets */
 	ipip = false;
 	if (cih->protocol == IPPROTO_IPIP) {
+		struct ip_vs_dest *dest;
+
 		if (unlikely(cih->frag_off & htons(IP_OFFSET)))
 			return NF_ACCEPT;
 		/* Error for our IPIP must arrive at LOCAL_IN */
 		if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
 			return NF_ACCEPT;
+		dest = ip_vs_find_tunnel(ipvs, AF_INET, raddr, 0);
+		/* Only for known tunnel */
+		if (!dest || dest->tun_type != IP_VS_CONN_F_TUNNEL_TYPE_IPIP)
+			return NF_ACCEPT;
 		offset += cih->ihl * 4;
 		cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
 		if (cih == NULL)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7f624f4c402b..e504aa45fcb9 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -617,6 +617,35 @@ struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
 	return NULL;
 }
 
+/* Find real service record by <af,addr,tun_port>.
+ * In case of multiple records with the same <af,addr,tun_port>, only
+ * the first found record is returned.
+ *
+ * To be called under RCU lock.
+ */
+struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
+				     const union nf_inet_addr *daddr,
+				     __be16 tun_port)
+{
+	struct ip_vs_dest *dest;
+	unsigned int hash;
+
+	/* Check for "full" addressed entries */
+	hash = ip_vs_rs_hashkey(af, daddr, tun_port);
+
+	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
+		if (dest->tun_port == tun_port &&
+		    dest->af == af &&
+		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
+		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
+			/* HIT */
+			return dest;
+		}
+	}
+
+	return NULL;
+}
+
 /* Lookup destination by {addr,port} in the given service
  * Called under RCU lock.
  */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCHv2 net-next 3/3] ipvs: strip udp tunnel headers from icmp errors
  2019-05-05 12:14 [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Julian Anastasov
  2019-05-05 12:14 ` [PATCHv2 net-next 1/3] ipvs: allow rs_table to contain different real server types Julian Anastasov
  2019-05-05 12:14 ` [PATCHv2 net-next 2/3] ipvs: add function to find tunnels Julian Anastasov
@ 2019-05-05 12:14 ` Julian Anastasov
  2019-05-07 13:47 ` [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Simon Horman
  3 siblings, 0 replies; 6+ messages in thread
From: Julian Anastasov @ 2019-05-05 12:14 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Jacky Hu,
	jacky.hu, jason.niesz

Recognize UDP tunnels in received ICMP errors and
properly strip the tunnel headers. GUE is what we
have for now.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
---
 net/netfilter/ipvs/ip_vs_core.c | 60 +++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4447ee512b88..d1d7b2483fd7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -39,6 +39,7 @@
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/icmp.h>                   /* for icmp_send */
+#include <net/gue.h>
 #include <net/route.h>
 #include <net/ip6_checksum.h>
 #include <net/netns/generic.h>		/* net_generic() */
@@ -1579,6 +1580,41 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 	return 1;
 }
 
+/* Check the UDP tunnel and return its header length */
+static int ipvs_udp_decap(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			  unsigned int offset, __u16 af,
+			  const union nf_inet_addr *daddr, __u8 *proto)
+{
+	struct udphdr _udph, *udph;
+	struct ip_vs_dest *dest;
+
+	udph = skb_header_pointer(skb, offset, sizeof(_udph), &_udph);
+	if (!udph)
+		goto unk;
+	offset += sizeof(struct udphdr);
+	dest = ip_vs_find_tunnel(ipvs, af, daddr, udph->dest);
+	if (!dest)
+		goto unk;
+	if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
+		struct guehdr _gueh, *gueh;
+
+		gueh = skb_header_pointer(skb, offset, sizeof(_gueh), &_gueh);
+		if (!gueh)
+			goto unk;
+		if (gueh->control != 0 || gueh->version != 0)
+			goto unk;
+		/* Later we can support also IPPROTO_IPV6 */
+		if (gueh->proto_ctype != IPPROTO_IPIP)
+			goto unk;
+		*proto = gueh->proto_ctype;
+		return sizeof(struct udphdr) + sizeof(struct guehdr) +
+		       (gueh->hlen << 2);
+	}
+
+unk:
+	return 0;
+}
+
 /*
  *	Handle ICMP messages in the outside-to-inside direction (incoming).
  *	Find any that might be relevant, check against existing connections,
@@ -1658,6 +1694,30 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
 		if (cih == NULL)
 			return NF_ACCEPT; /* The packet looks wrong, ignore */
 		ipip = true;
+	} else if (cih->protocol == IPPROTO_UDP &&	/* Can be UDP encap */
+		   /* Error for our tunnel must arrive at LOCAL_IN */
+		   (skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
+		__u8 iproto;
+		int ulen;
+
+		/* Non-first fragment has no UDP header */
+		if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+			return NF_ACCEPT;
+		offset2 = offset + cih->ihl * 4;
+		ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET, raddr,
+				      &iproto);
+		if (ulen > 0) {
+			/* Skip IP and UDP tunnel headers */
+			offset = offset2 + ulen;
+			/* Now we should be at the original IP header */
+			cih = skb_header_pointer(skb, offset, sizeof(_ciph),
+						 &_ciph);
+			if (cih && cih->version == 4 && cih->ihl >= 5 &&
+			    iproto == IPPROTO_IPIP)
+				ipip = true;
+			else
+				return NF_ACCEPT;
+		}
 	}
 
 	pd = ip_vs_proto_data_get(ipvs, cih->protocol);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS
  2019-05-05 12:14 [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Julian Anastasov
                   ` (2 preceding siblings ...)
  2019-05-05 12:14 ` [PATCHv2 net-next 3/3] ipvs: strip udp tunnel headers from icmp errors Julian Anastasov
@ 2019-05-07 13:47 ` Simon Horman
  2019-05-31 15:48   ` Pablo Neira Ayuso
  3 siblings, 1 reply; 6+ messages in thread
From: Simon Horman @ 2019-05-07 13:47 UTC (permalink / raw)
  To: Julian Anastasov
  Cc: lvs-devel, Pablo Neira Ayuso, netfilter-devel, Jacky Hu,
	jacky.hu, jason.niesz

On Sun, May 05, 2019 at 03:14:37PM +0300, Julian Anastasov wrote:
> This patchset is a followup to the commit that adds UDP/GUE tunnel:
> "ipvs: allow tunneling with gue encapsulation".
> 
> What we do is to put tunnel real servers in hash table (patch 1),
> add function to lookup tunnels (patch 2) and use it to strip the
> embedded tunnel headers from ICMP errors (patch 3).
> 
> v1->v2:
> patch 1: remove extra parentheses
> patch 2: remove extra parentheses
> patch 3: parse UDP header into ipvs_udp_decap
> patch 3: v1 ignores forwarded ICMP errors for UDP, do not do that
> patch 3: add comment for fragment check
> 
> Julian Anastasov (3):
>   ipvs: allow rs_table to contain different real server types
>   ipvs: add function to find tunnels
>   ipvs: strip udp tunnel headers from icmp errors

Thanks Julian,

this looks good for me.
For all patches:

Signed-off-by: Simon Horman <horms@verge.net.au>

Pablo, could you consider applying these to nf-next when appropriate?


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS
  2019-05-07 13:47 ` [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Simon Horman
@ 2019-05-31 15:48   ` Pablo Neira Ayuso
  0 siblings, 0 replies; 6+ messages in thread
From: Pablo Neira Ayuso @ 2019-05-31 15:48 UTC (permalink / raw)
  To: Simon Horman
  Cc: Julian Anastasov, lvs-devel, netfilter-devel, Jacky Hu, jacky.hu,
	jason.niesz

On Tue, May 07, 2019 at 03:47:45PM +0200, Simon Horman wrote:
> On Sun, May 05, 2019 at 03:14:37PM +0300, Julian Anastasov wrote:
> > This patchset is a followup to the commit that adds UDP/GUE tunnel:
> > "ipvs: allow tunneling with gue encapsulation".
> > 
> > What we do is to put tunnel real servers in hash table (patch 1),
> > add function to lookup tunnels (patch 2) and use it to strip the
> > embedded tunnel headers from ICMP errors (patch 3).
> > 
> > v1->v2:
> > patch 1: remove extra parentheses
> > patch 2: remove extra parentheses
> > patch 3: parse UDP header into ipvs_udp_decap
> > patch 3: v1 ignores forwarded ICMP errors for UDP, do not do that
> > patch 3: add comment for fragment check
> > 
> > Julian Anastasov (3):
> >   ipvs: allow rs_table to contain different real server types
> >   ipvs: add function to find tunnels
> >   ipvs: strip udp tunnel headers from icmp errors
> 
> Thanks Julian,
> 
> this looks good for me.
> For all patches:
> 
> Signed-off-by: Simon Horman <horms@verge.net.au>
> 
> Pablo, could you consider applying these to nf-next when appropriate?

Series applied, thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-05-31 15:48 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-05 12:14 [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Julian Anastasov
2019-05-05 12:14 ` [PATCHv2 net-next 1/3] ipvs: allow rs_table to contain different real server types Julian Anastasov
2019-05-05 12:14 ` [PATCHv2 net-next 2/3] ipvs: add function to find tunnels Julian Anastasov
2019-05-05 12:14 ` [PATCHv2 net-next 3/3] ipvs: strip udp tunnel headers from icmp errors Julian Anastasov
2019-05-07 13:47 ` [PATCHv2 net-next 0/3] Add UDP tunnel support for ICMP errors in IPVS Simon Horman
2019-05-31 15:48   ` Pablo Neira Ayuso

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.