All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander Frolkin <avf@eldamar.org.uk>
To: Julian Anastasov <ja@ssi.bg>
Cc: lvs-devel@vger.kernel.org
Subject: Re: [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling
Date: Wed, 12 Jun 2013 15:10:18 +0100	[thread overview]
Message-ID: <20130612141018.GC29327@eldamar.org.uk> (raw)
In-Reply-To: <alpine.LFD.2.00.1306112240280.1709@ja.ssi.bg>

Hi,

> > I just don't understand why rejecting a client connection when there are
> > servers available is desirable behaviour.
> 	The problem is that every move leads to problems:
> - add/remove destination => mapping is changed for all dests
> - set weight to 0 and allow fallback => mapping is changed for
> 	two connections from same IP

Fair enough, although I would guess two connections from the same IP
going to different servers wouldn't be an issue in many cases.

> - persistence implemented with SH => fallback is risky. Usually,
> we use expire_quiescent_template for such cases when persistence
> is used.

Can you elaborate on what you mean by "risky" here?

> - same mapping for many directors => fallback is desired when
> config is same on all directors and persistence behaviour is
> not desired.

Indeed.

> Not sure if we can apply the expire_quiescent_template flag to
> the SH scheduler to control fallback.

But then it's controlled by a sysctl, not per virtual server, which is
something we didn't want, I believe.

Are you happy for me to go ahead and add the per-service scheduler
flags (IP_VS_SVC_F_SCHED1, etc.), like you suggested previously?

At the moment, the patch looks like this (pending a decision on how to
enable the features):

diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 0df269d..abd8ed6 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -48,6 +48,10 @@
 
 #include <net/ip_vs.h>
 
+#include <net/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+
 
 /*
  *      IPVS SH bucket
@@ -74,7 +78,9 @@ struct ip_vs_sh_state {
 /*
  *	Returns hash value for IPVS SH entry
  */
-static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
+static inline unsigned int ip_vs_sh_hashkey(int af,
+	const union nf_inet_addr *addr, __be16 port,
+	unsigned int offset)
 {
 	__be32 addr_fold = addr->ip;
 
@@ -83,7 +89,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
 		addr_fold = addr->ip6[0]^addr->ip6[1]^
 			    addr->ip6[2]^addr->ip6[3];
 #endif
-	return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK;
+	return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
+		IP_VS_SH_TAB_MASK;
 }
 
 
@@ -91,9 +98,11 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
  *      Get ip_vs_dest associated with supplied parameters.
  */
 static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
+ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr,
+	__be16 port, unsigned int offset)
 {
-	return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
+	return rcu_dereference(
+		s->buckets[ip_vs_sh_hashkey(af, addr, port, offset)].dest);
 }
 
 
@@ -232,17 +241,43 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest;
 	struct ip_vs_sh_state *s;
 	struct ip_vs_iphdr iph;
+	unsigned int offset;
+	unsigned int found;
 
 	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
 
 	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
+	/* XXX if L4 hash */
+	if (0)
+		port = ip_vs_sh_get_port(svc, skb, iph);
+	else
+		port = 0;
+
 	s = (struct ip_vs_sh_state *) svc->sched_data;
-	dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
-	if (!dest
-	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-	    || atomic_read(&dest->weight) <= 0
-	    || is_overloaded(dest)) {
+	/* XXX if fallback */
+	if (0) {
+		found = 0;
+		for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
+			dest = ip_vs_sh_get(svc->af, s, &iph.saddr,
+				port, offset);
+			if (!is_available(dest)) {
+				IP_VS_DBG_BUF(6, "SH: selected unavailable"
+					"server %s:%d, retrying with offset"
+					"%d\n",
+					IP_VS_DBG_ADDR(svc->af, &dest->addr),
+					ntohs(dest->port),
+					offset);
+			} else {
+				found = 1;
+				break;
+			}
+		}
+	} else {
+		dest = ip_vs_sh_get(svc->af, s, &iph.saddr, port, 0);
+		found = 1;
+	}
+	if (!found || !is_available(dest)) {
 		ip_vs_scheduler_err(svc, "no destination available");
 		return NULL;
 	}
@@ -255,6 +290,50 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	return dest;
 }
 
+/*
+ *	Helper function to determine if server is available
+ */
+static inline int
+is_available(struct ip_vs_dest *dest)
+{
+	return (!dest ||
+		!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
+		atomic_read(&dest->weight) <= 0 ||
+		is_overloaded(dest))
+}
+
+/*
+ *	Helper function to get port number
+ */
+static inline __be16
+ip_vs_sh_get_port(struct ip_vs_service *svc, const struct sk_buff *skb,
+	struct ip_vs_iphdr iph)
+{
+	__be16 port;
+	struct tcphdr _tcph, *th;
+	struct udphdr _udph, *uh;
+	sctp_sctphdr_t _sctph, *sh;
+
+	switch (svc->protocol) {
+	case IPPROTO_TCP:
+		th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph);
+		port = th->source;
+		break;
+	case IPPROTO_UDP:
+		uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph);
+		port = uh->source;
+		break;
+	case IPPROTO_SCTP:
+		sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
+		port = sh->source;
+		break;
+	default:
+		port = 0;
+	}
+
+	return port;
+}
+
 
 /*
  *      IPVS SH Scheduler structure


Alex


  reply	other threads:[~2013-06-12 14:10 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-24 12:09 [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling Alexander Frolkin
2013-05-24 15:05 ` Julian Anastasov
2013-05-24 15:14   ` Alexander Frolkin
2013-05-24 16:18     ` Aleksey Chudov
2013-05-27 21:31       ` Julian Anastasov
2013-05-28 13:41         ` Aleksey Chudov
2013-05-30  6:37           ` Julian Anastasov
2013-06-07  7:53             ` Alexander Frolkin
2013-06-19  9:03           ` Julian Anastasov
2013-06-19 19:25             ` Julian Anastasov
2013-06-20 17:02               ` Aleksey Chudov
2013-06-20 20:09                 ` Julian Anastasov
2013-06-19 20:44             ` Aleksey Chudov
2013-06-22 11:20             ` [PATCH] ipvs: add sync_persist_mode flag Aleksey Chudov
2013-06-22 12:43               ` Julian Anastasov
2013-06-22 21:11                 ` Aleksey Chudov
2013-06-23  8:34                   ` Julian Anastasov
2013-06-24 14:37                     ` Aleksey Chudov
2013-06-24 19:57                       ` Julian Anastasov
2013-05-27 21:11     ` [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling Julian Anastasov
2013-06-07  8:12       ` Alexander Frolkin
2013-06-10 19:31         ` Julian Anastasov
2013-06-11  8:38           ` Alexander Frolkin
2013-06-11 19:57             ` Julian Anastasov
2013-06-12 14:10               ` Alexander Frolkin [this message]
2013-06-12 20:47                 ` Julian Anastasov
2013-06-13  8:38                   ` Alexander Frolkin
2013-06-13 12:56                   ` Alexander Frolkin
2013-06-13 19:50                     ` Julian Anastasov
2013-06-13 14:18                   ` Alexander Frolkin
2013-06-13 20:31                     ` Julian Anastasov
2013-06-14 10:22                       ` Alexander Frolkin
2013-06-16  6:52                         ` Julian Anastasov
2013-06-17  8:32                           ` Alexander Frolkin
2013-06-17  9:00                             ` Julian Anastasov
2013-06-17  9:04                             ` Julian Anastasov
2013-06-17 11:11                               ` Alexander Frolkin
2013-06-17 20:05                                 ` Julian Anastasov
2013-06-18  9:30                                   ` Alexander Frolkin
2013-06-18 20:52                                     ` Julian Anastasov
2013-06-14 11:47                       ` Alexander Frolkin
2013-06-16  8:30                         ` Julian Anastasov
2013-06-17 10:35                           ` Alexander Frolkin
2013-06-17 19:48                             ` Julian Anastasov
2013-06-18  9:08                               ` Alexander Frolkin
2013-06-18 20:41                                 ` Julian Anastasov
2013-06-10 15:12       ` Alexander Frolkin
2013-06-10 16:03         ` Alexander Frolkin
2013-06-10 20:52         ` Julian Anastasov
2013-06-11 12:38           ` Alexander Frolkin
2013-06-11 20:13             ` Julian Anastasov
2013-06-12 10:49               ` Alexander Frolkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130612141018.GC29327@eldamar.org.uk \
    --to=avf@eldamar.org.uk \
    --cc=ja@ssi.bg \
    --cc=lvs-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.