From: Alexander Frolkin <avf@eldamar.org.uk>
To: Julian Anastasov <ja@ssi.bg>
Cc: lvs-devel@vger.kernel.org
Subject: Re: [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling
Date: Mon, 17 Jun 2013 12:11:13 +0100 [thread overview]
Message-ID: <20130617111113.GC13101@eldamar.org.uk> (raw)
In-Reply-To: <alpine.LFD.2.00.1306171202330.10428@ja.ssi.bg>
Hi,
> Wait, it seems is_unavailable() can succeed for other
> reasons, so the 'if (!dest) ... break;' check should be first,
> may be out of is_unavailable().
I've moved things around a bit:
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index a245377..2945822 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -20,6 +20,12 @@
#define IP_VS_SVC_F_PERSISTENT 0x0001 /* persistent port */
#define IP_VS_SVC_F_HASHED 0x0002 /* hashed entry */
#define IP_VS_SVC_F_ONEPACKET 0x0004 /* one-packet scheduling */
+#define IP_VS_SVC_F_SCHED1 0x0008 /* scheduler flag 1 */
+#define IP_VS_SVC_F_SCHED2 0x0010 /* scheduler flag 2 */
+#define IP_VS_SVC_F_SCHED3 0x0020 /* scheduler flag 3 */
+
+#define IP_VS_SVC_F_SCHED_SH_FALLBACK IP_VS_SVC_F_SCHED1 /* SH fallback */
+#define IP_VS_SVC_F_SCHED_SH_PORT IP_VS_SVC_F_SCHED2 /* SH use port */
/*
* Destination Server Flags
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e0130f8..2f52129 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -48,6 +48,10 @@
#include <net/ip_vs.h>
+#include <net/tcp.h>
+#include <linux/udp.h>
+#include <linux/sctp.h>
+
/*
* IPVS SH bucket
@@ -71,10 +75,20 @@ struct ip_vs_sh_state {
struct rcu_head rcu_head;
};
+/* Helper function to determine if server is unavailable
+ */
+static inline bool is_unavailable(struct ip_vs_dest *dest)
+{
+ return atomic_read(&dest->weight) <= 0 ||
+ dest->flags & IP_VS_DEST_F_OVERLOAD;
+}
+
/*
* Returns hash value for IPVS SH entry
*/
-static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr)
+static inline unsigned int
+ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
+ __be16 port, unsigned int offset)
{
__be32 addr_fold = addr->ip;
@@ -83,7 +97,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
- return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK;
+ return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
+ IP_VS_SH_TAB_MASK;
}
@@ -91,12 +106,43 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
* Get ip_vs_dest associated with supplied parameters.
*/
static inline struct ip_vs_dest *
-ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
+ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+ const union nf_inet_addr *addr, __be16 port)
{
- return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
+ unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
+ struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
+
+ return (!dest || is_unavailable(dest)) ? NULL : dest;
}
+/* As ip_vs_sh_get, but with fallback if selected server is unavailable
+ */
+static inline struct ip_vs_dest *
+ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
+ const union nf_inet_addr *addr, __be16 port)
+{
+ unsigned int offset;
+ unsigned int hash;
+ struct ip_vs_dest *dest;
+
+ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
+ hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
+ dest = rcu_dereference(s->buckets[hash].dest);
+ if (!dest)
+ break;
+ else if (is_unavailable(dest))
+ IP_VS_DBG_BUF(6, "SH: selected unavailable server "
+ "%s:%d (offset %d)",
+ IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ ntohs(dest->port), offset);
+ else
+ return dest;
+ }
+
+ return NULL;
+}
+
/*
* Assign all the hash buckets of the specified table with the service.
*/
@@ -213,13 +259,34 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
}
-/*
- * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
- * consider that the server is overloaded here.
+/* Helper function to get port number
*/
-static inline int is_overloaded(struct ip_vs_dest *dest)
+static inline __be16
+ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
{
- return dest->flags & IP_VS_DEST_F_OVERLOAD;
+ __be16 port;
+ struct tcphdr _tcph, *th;
+ struct udphdr _udph, *uh;
+ sctp_sctphdr_t _sctph, *sh;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ port = th->source;
+ break;
+ case IPPROTO_UDP:
+ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+ port = uh->source;
+ break;
+ case IPPROTO_SCTP:
+ sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+ port = sh->source;
+ break;
+ default:
+ port = 0;
+ }
+
+ return port;
}
@@ -232,15 +299,21 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
{
struct ip_vs_dest *dest;
struct ip_vs_sh_state *s;
+ __be16 port = 0;
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
+ if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
+ port = ip_vs_sh_get_port(skb, &iph);
+
s = (struct ip_vs_sh_state *) svc->sched_data;
- dest = ip_vs_sh_get(svc->af, s, &iph->saddr);
- if (!dest
- || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
- || atomic_read(&dest->weight) <= 0
- || is_overloaded(dest)) {
+
+ if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
+ dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+ else
+ dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+
+ if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
return NULL;
}
Alex
next prev parent reply other threads:[~2013-06-17 11:11 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-24 12:09 [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling Alexander Frolkin
2013-05-24 15:05 ` Julian Anastasov
2013-05-24 15:14 ` Alexander Frolkin
2013-05-24 16:18 ` Aleksey Chudov
2013-05-27 21:31 ` Julian Anastasov
2013-05-28 13:41 ` Aleksey Chudov
2013-05-30 6:37 ` Julian Anastasov
2013-06-07 7:53 ` Alexander Frolkin
2013-06-19 9:03 ` Julian Anastasov
2013-06-19 19:25 ` Julian Anastasov
2013-06-20 17:02 ` Aleksey Chudov
2013-06-20 20:09 ` Julian Anastasov
2013-06-19 20:44 ` Aleksey Chudov
2013-06-22 11:20 ` [PATCH] ipvs: add sync_persist_mode flag Aleksey Chudov
2013-06-22 12:43 ` Julian Anastasov
2013-06-22 21:11 ` Aleksey Chudov
2013-06-23 8:34 ` Julian Anastasov
2013-06-24 14:37 ` Aleksey Chudov
2013-06-24 19:57 ` Julian Anastasov
2013-05-27 21:11 ` [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling Julian Anastasov
2013-06-07 8:12 ` Alexander Frolkin
2013-06-10 19:31 ` Julian Anastasov
2013-06-11 8:38 ` Alexander Frolkin
2013-06-11 19:57 ` Julian Anastasov
2013-06-12 14:10 ` Alexander Frolkin
2013-06-12 20:47 ` Julian Anastasov
2013-06-13 8:38 ` Alexander Frolkin
2013-06-13 12:56 ` Alexander Frolkin
2013-06-13 19:50 ` Julian Anastasov
2013-06-13 14:18 ` Alexander Frolkin
2013-06-13 20:31 ` Julian Anastasov
2013-06-14 10:22 ` Alexander Frolkin
2013-06-16 6:52 ` Julian Anastasov
2013-06-17 8:32 ` Alexander Frolkin
2013-06-17 9:00 ` Julian Anastasov
2013-06-17 9:04 ` Julian Anastasov
2013-06-17 11:11 ` Alexander Frolkin [this message]
2013-06-17 20:05 ` Julian Anastasov
2013-06-18 9:30 ` Alexander Frolkin
2013-06-18 20:52 ` Julian Anastasov
2013-06-14 11:47 ` Alexander Frolkin
2013-06-16 8:30 ` Julian Anastasov
2013-06-17 10:35 ` Alexander Frolkin
2013-06-17 19:48 ` Julian Anastasov
2013-06-18 9:08 ` Alexander Frolkin
2013-06-18 20:41 ` Julian Anastasov
2013-06-10 15:12 ` Alexander Frolkin
2013-06-10 16:03 ` Alexander Frolkin
2013-06-10 20:52 ` Julian Anastasov
2013-06-11 12:38 ` Alexander Frolkin
2013-06-11 20:13 ` Julian Anastasov
2013-06-12 10:49 ` Alexander Frolkin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130617111113.GC13101@eldamar.org.uk \
--to=avf@eldamar.org.uk \
--cc=ja@ssi.bg \
--cc=lvs-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.