All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexander Frolkin <avf@eldamar.org.uk>
To: Julian Anastasov <ja@ssi.bg>
Cc: lvs-devel@vger.kernel.org
Subject: Re: [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling
Date: Tue, 18 Jun 2013 10:30:08 +0100	[thread overview]
Message-ID: <20130618093008.GA17536@eldamar.org.uk> (raw)
In-Reply-To: <alpine.LFD.2.00.1306172257060.1564@ja.ssi.bg>

Hi,

Latest version of SH patch:

diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index a245377..2945822 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -20,6 +20,12 @@
 #define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
 #define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
 #define IP_VS_SVC_F_ONEPACKET	0x0004		/* one-packet scheduling */
+#define IP_VS_SVC_F_SCHED1	0x0008		/* scheduler flag 1 */
+#define IP_VS_SVC_F_SCHED2	0x0010		/* scheduler flag 2 */
+#define IP_VS_SVC_F_SCHED3	0x0020		/* scheduler flag 3 */
+
+#define IP_VS_SVC_F_SCHED_SH_FALLBACK	IP_VS_SVC_F_SCHED1 /* SH fallback */
+#define IP_VS_SVC_F_SCHED_SH_PORT	IP_VS_SVC_F_SCHED2 /* SH use port */
 
 /*
  *      Destination Server Flags
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e0130f8..caa3eee 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -48,6 +48,10 @@
 
 #include <net/ip_vs.h>
 
+#include <net/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+
 
 /*
  *      IPVS SH bucket
@@ -71,10 +75,19 @@ struct ip_vs_sh_state {
 	struct rcu_head			rcu_head;
 };
 
+/* Helper function to determine if server is unavailable */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+	return atomic_read(&dest->weight) <= 0 ||
+	       dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
 /*
  *	Returns hash value for IPVS SH entry
  */
-static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
+static inline unsigned int
+ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
+		 __be16 port, unsigned int offset)
 {
 	__be32 addr_fold = addr->ip;
 
@@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
 		addr_fold = addr->ip6[0]^addr->ip6[1]^
 			    addr->ip6[2]^addr->ip6[3];
 #endif
-	return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK;
+	return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
+		IP_VS_SH_TAB_MASK;
 }
 
 
@@ -91,12 +105,43 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
  *      Get ip_vs_dest associated with supplied parameters.
  */
 static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
+ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+	     const union nf_inet_addr *addr, __be16 port)
 {
-	return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
+	unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+	struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
+
+	return (!dest || is_unavailable(dest)) ? NULL : dest;
 }
 
 
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+		      const union nf_inet_addr *addr, __be16 port)
+{
+	unsigned int offset;
+	unsigned int hash;
+	struct ip_vs_dest *dest;
+
+	for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
+		hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
+		dest = rcu_dereference(s->buckets[hash].dest);
+		if (!dest)
+			break;
+		if (is_unavailable(dest))
+			IP_VS_DBG_BUF(6, "SH: selected unavailable server "
+				      "%s:%d (offset %d)",
+				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
+				      ntohs(dest->port), offset);
+		else
+			return dest;
+	}
+
+	return NULL;
+}
+
 /*
  *      Assign all the hash buckets of the specified table with the service.
  */
@@ -213,13 +258,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
 }
 
 
-/*
- *      If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- *      consider that the server is overloaded here.
- */
-static inline int is_overloaded(struct ip_vs_dest *dest)
+/* Helper function to get port number */
+static inline __be16
+ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
 {
-	return dest->flags & IP_VS_DEST_F_OVERLOAD;
+	__be16 port;
+	struct tcphdr _tcph, *th;
+	struct udphdr _udph, *uh;
+	sctp_sctphdr_t _sctph, *sh;
+
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+		port = th->source;
+		break;
+	case IPPROTO_UDP:
+		uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+		port = uh->source;
+		break;
+	case IPPROTO_SCTP:
+		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+		port = sh->source;
+		break;
+	default:
+		port = 0;
+	}
+
+	return port;
 }
 
 
@@ -232,15 +297,21 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_sh_state *s;
+	__be16 port = 0;
 
 	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
+	if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
+		port = ip_vs_sh_get_port(skb, iph);
+
 	s = (struct ip_vs_sh_state *) svc->sched_data;
-	dest = ip_vs_sh_get(svc->af, s, &iph->saddr);
-	if (!dest
-	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
-	    || atomic_read(&dest->weight) <= 0
-	    || is_overloaded(dest)) {
+
+	if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
+		dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+	else
+		dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+
+	if (!dest) {
 		ip_vs_scheduler_err(svc, "no destination available");
 		return NULL;
 	}


Alex


  reply	other threads:[~2013-06-18  9:30 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-24 12:09 [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling Alexander Frolkin
2013-05-24 15:05 ` Julian Anastasov
2013-05-24 15:14   ` Alexander Frolkin
2013-05-24 16:18     ` Aleksey Chudov
2013-05-27 21:31       ` Julian Anastasov
2013-05-28 13:41         ` Aleksey Chudov
2013-05-30  6:37           ` Julian Anastasov
2013-06-07  7:53             ` Alexander Frolkin
2013-06-19  9:03           ` Julian Anastasov
2013-06-19 19:25             ` Julian Anastasov
2013-06-20 17:02               ` Aleksey Chudov
2013-06-20 20:09                 ` Julian Anastasov
2013-06-19 20:44             ` Aleksey Chudov
2013-06-22 11:20             ` [PATCH] ipvs: add sync_persist_mode flag Aleksey Chudov
2013-06-22 12:43               ` Julian Anastasov
2013-06-22 21:11                 ` Aleksey Chudov
2013-06-23  8:34                   ` Julian Anastasov
2013-06-24 14:37                     ` Aleksey Chudov
2013-06-24 19:57                       ` Julian Anastasov
2013-05-27 21:11     ` [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling Julian Anastasov
2013-06-07  8:12       ` Alexander Frolkin
2013-06-10 19:31         ` Julian Anastasov
2013-06-11  8:38           ` Alexander Frolkin
2013-06-11 19:57             ` Julian Anastasov
2013-06-12 14:10               ` Alexander Frolkin
2013-06-12 20:47                 ` Julian Anastasov
2013-06-13  8:38                   ` Alexander Frolkin
2013-06-13 12:56                   ` Alexander Frolkin
2013-06-13 19:50                     ` Julian Anastasov
2013-06-13 14:18                   ` Alexander Frolkin
2013-06-13 20:31                     ` Julian Anastasov
2013-06-14 10:22                       ` Alexander Frolkin
2013-06-16  6:52                         ` Julian Anastasov
2013-06-17  8:32                           ` Alexander Frolkin
2013-06-17  9:00                             ` Julian Anastasov
2013-06-17  9:04                             ` Julian Anastasov
2013-06-17 11:11                               ` Alexander Frolkin
2013-06-17 20:05                                 ` Julian Anastasov
2013-06-18  9:30                                   ` Alexander Frolkin [this message]
2013-06-18 20:52                                     ` Julian Anastasov
2013-06-14 11:47                       ` Alexander Frolkin
2013-06-16  8:30                         ` Julian Anastasov
2013-06-17 10:35                           ` Alexander Frolkin
2013-06-17 19:48                             ` Julian Anastasov
2013-06-18  9:08                               ` Alexander Frolkin
2013-06-18 20:41                                 ` Julian Anastasov
2013-06-10 15:12       ` Alexander Frolkin
2013-06-10 16:03         ` Alexander Frolkin
2013-06-10 20:52         ` Julian Anastasov
2013-06-11 12:38           ` Alexander Frolkin
2013-06-11 20:13             ` Julian Anastasov
2013-06-12 10:49               ` Alexander Frolkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130618093008.GA17536@eldamar.org.uk \
    --to=avf@eldamar.org.uk \
    --cc=ja@ssi.bg \
    --cc=lvs-devel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.