From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexander Frolkin Subject: Re: [PATCH] Sloppy TCP, SH rebalancing, SHP scheduling Date: Tue, 18 Jun 2013 10:08:10 +0100 Message-ID: <20130618090810.GB13596@eldamar.org.uk> References: <20130611083806.GA25531@eldamar.org.uk> <20130612141018.GC29327@eldamar.org.uk> <20130613141804.GB31356@eldamar.org.uk> <20130614114711.GB31800@eldamar.org.uk> <20130617103532.GB13101@eldamar.org.uk> Mime-Version: 1.0 Return-path: Content-Disposition: inline In-Reply-To: Sender: lvs-devel-owner@vger.kernel.org List-ID: Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: Julian Anastasov Cc: lvs-devel@vger.kernel.org Hi, Latest version of ipvsadm patch: diff --git a/ipvsadm.8 b/ipvsadm.8 index 001ae74..9a9e9b3 100644 --- a/ipvsadm.8 +++ b/ipvsadm.8 @@ -37,7 +37,7 @@ ipvsadm \- Linux Virtual Server administration .SH SYNOPSIS .B ipvsadm -A|E -t|u|f \fIservice-address\fP [-s \fIscheduler\fP] .ti 15 -.B [-p [\fItimeout\fP]] [-M \fInetmask\fP] +.B [-p [\fItimeout\fP]] [-M \fInetmask\fP] [-b \fIsched-flags\fP] .br .B ipvsadm -D -t|u|f \fIservice-address\fP .br @@ -248,6 +248,9 @@ addresses. .sp \fBsh\fR - Source Hashing: assigns jobs to servers through looking up a statically assigned hash table by their source IP addresses. +This scheduler has two flags: sh-fallback, which enables fallback to a +different server if the selected server was unavailable, and sh-port, +which adds the source port number to the hash computation. .sp \fBsed\fR - Shortest Expected Delay: assigns an incoming job to the server with the shortest expected delay. The expected delay that the @@ -286,6 +289,11 @@ resolve problems with non-persistent cache clusters on the client side. IPv6 netmasks should be specified as a prefix length between 1 and 128. The default prefix length is 128. .TP +.B -b, --sched-flags \fIsched-flags\fP +Set scheduler flags for this virtual server. \fIsched-flags\fP is a +comma-separated list of flags. See the scheduler descriptions for +valid scheduler flags. +.TP .B -r, --real-server \fIserver-address\fP Real server that an associated request for service may be assigned to. The \fIserver-address\fP is the \fIhost\fP address of a real server, diff --git a/ipvsadm.c b/ipvsadm.c index 0197515..5b8c036 100644 --- a/ipvsadm.c +++ b/ipvsadm.c @@ -182,7 +182,8 @@ static const char* cmdnames[] = { #define OPT_EXACT 0x100000 #define OPT_ONEPACKET 0x200000 #define OPT_PERSISTENCE_ENGINE 0x400000 -#define NUMBER_OF_OPT 23 +#define OPT_SCHED_FLAGS 0x800000 +#define NUMBER_OF_OPT 24 static const char* optnames[] = { "numeric", @@ -208,6 +209,7 @@ static const char* optnames[] = { "exact", "ops", "pe", + "sched-flags", }; /* @@ -220,21 +222,21 @@ static const char* optnames[] = { */ static const char commands_v_options[NUMBER_OF_CMD][NUMBER_OF_OPT] = { - /* -n -c svc -s -p -M -r fwd -w -x -y -mc tot dmn -st -rt thr -pc srt sid -ex ops -pe */ -/*ADD*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', ' '}, -/*EDIT*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', ' '}, -/*DEL*/ {'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*FLUSH*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*LIST*/ {' ', '1', '1', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', '1', '1', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x'}, -/*ADDSRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*DELSRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*EDITSRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*TIMEOUT*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*STARTD*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x'}, -/*STOPD*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x'}, -/*RESTORE*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*SAVE*/ {' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, -/*ZERO*/ {'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, + /* -n -c svc -s -p -M -r fwd -w -x -y -mc tot dmn -st -rt thr -pc srt sid -ex ops -pe -b */ +/*ADD*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', ' ', ' '}, +/*EDIT*/ {'x', 'x', '+', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', ' ', ' '}, +/*DEL*/ {'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*FLUSH*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*LIST*/ {' ', '1', '1', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', '1', '1', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x', 'x'}, +/*ADDSRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*DELSRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*EDITSRV*/ {'x', 'x', '+', 'x', 'x', 'x', '+', ' ', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*TIMEOUT*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*STARTD*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x', 'x'}, +/*STOPD*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x', 'x'}, +/*RESTORE*/ {'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*SAVE*/ {' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, +/*ZERO*/ {'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'}, }; /* printing format flags */ @@ -302,6 +304,7 @@ static int parse_service(char *buf, ipvs_service_t *svc); static int parse_netmask(char *buf, u_int32_t *addr); static int parse_timeout(char *buf, int min, int max); static unsigned int parse_fwmark(char *buf); +static unsigned int parse_sched_flags(const char *sched, char *optarg); /* check the options based on the commands_v_options table */ static void generic_opt_check(int command, int options); @@ -363,7 +366,7 @@ parse_options(int argc, char **argv, struct ipvs_command_entry *ce, { int c, parse; poptContext context; - char *optarg=NULL; + char *optarg=NULL, sched_flags_arg[128]; struct poptOption options_table[] = { { "add-service", 'A', POPT_ARG_NONE, NULL, 'A', NULL, NULL }, { "edit-service", 'E', POPT_ARG_NONE, NULL, 'E', NULL, NULL }, @@ -426,9 +429,12 @@ parse_options(int argc, char **argv, struct ipvs_command_entry *ce, { "ops", 'o', POPT_ARG_NONE, NULL, 'o', NULL, NULL }, { "pe", '\0', POPT_ARG_STRING, &optarg, TAG_PERSISTENCE_ENGINE, NULL, NULL }, + { "sched-flags", 'b', POPT_ARG_STRING, &optarg, 'b', NULL, NULL }, { NULL, 0, 0, NULL, 0, NULL, NULL } }; + sched_flags_arg[0] = '\0'; + context = poptGetContext("ipvsadm", argc, (const char **)argv, options_table, 0); @@ -656,6 +662,10 @@ parse_options(int argc, char **argv, struct ipvs_command_entry *ce, set_option(options, OPT_PERSISTENCE_ENGINE); strncpy(ce->svc.pe_name, optarg, IP_VS_PENAME_MAXLEN); break; + case 'b': + set_option(options, OPT_SCHED_FLAGS); + snprintf(sched_flags_arg, sizeof(sched_flags_arg), "%s", optarg); + break; default: fail(2, "invalid option `%s'", poptBadOption(context, POPT_BADOPTION_NOALIAS)); @@ -690,6 +700,14 @@ parse_options(int argc, char **argv, struct ipvs_command_entry *ce, if ((optarg=(char *)poptGetArg(context))) fail(2, "unexpected argument %s", optarg); + if (sched_flags_arg[0]) { + ce->svc.flags &= ~(IP_VS_SVC_F_SCHED1 | + IP_VS_SVC_F_SCHED2 | + IP_VS_SVC_F_SCHED3); + ce->svc.flags |= parse_sched_flags(ce->svc.sched_name, + sched_flags_arg); + } + poptFreeContext(context); return 0; @@ -989,6 +1007,38 @@ parse_service(char *buf, ipvs_service_t *svc) return result; } +static unsigned int parse_sched_flags(const char *sched, char *optarg) +{ + unsigned int flags = 0; + char *flag; + + sched = (sched && *sched) ? sched : DEF_SCHED; + + flag = strtok(optarg, ","); + do { + if (!strcmp(flag, "flag-1")) { + flags |= IP_VS_SVC_F_SCHED1; + } else if (!strcmp(flag, "flag-2")) { + flags |= IP_VS_SVC_F_SCHED2; + } else if (!strcmp(flag, "flag-3")) { + flags |= IP_VS_SVC_F_SCHED3; + } else if (!strcmp(flag, "sh-fallback")) { + flags |= IP_VS_SVC_F_SCHED_SH_FALLBACK; + if (strcmp(sched, "sh")) + fail(2, "incompatible scheduler flag `%s'", + flag); + } else if (!strcmp(flag, "sh-port")) { + flags |= IP_VS_SVC_F_SCHED_SH_PORT; + if (strcmp(sched, "sh")) + fail(2, "incompatible scheduler flag `%s'", + flag); + } else { + fail(2, "invalid scheduler flag `%s'", flag); + } + } while ((flag = strtok(NULL, ",")) != NULL); + + return flags; +} static void generic_opt_check(int command, int options) @@ -1070,7 +1120,7 @@ static void usage_exit(const char *program, const int exit_status) version(stream); fprintf(stream, "Usage:\n" - " %s -A|E -t|u|f service-address [-s scheduler] [-p [timeout]] [-M netmask] [--pe persistence_engine]\n" + " %s -A|E -t|u|f service-address [-s scheduler] [-p [timeout]] [-M netmask] [--pe persistence_engine] [-b sched-flags]\n" " %s -D -t|u|f service-address\n" " %s -C\n" " %s -R\n" @@ -1139,7 +1189,8 @@ static void usage_exit(const char *program, const int exit_status) " --nosort disable sorting output of service/server entries\n" " --sort does nothing, for backwards compatibility\n" " --ops -o one-packet scheduling\n" - " --numeric -n numeric output of addresses and ports\n", + " --numeric -n numeric output of addresses and ports\n" + " --sched-flags -b flags scheduler flags (comma-separated)\n", DEF_SCHED); exit(exit_status); @@ -1396,6 +1447,32 @@ static void print_largenum(unsigned long long i, unsigned int format) printf("%8lluT", i / 1000000000000ULL); } +static void print_sched_flags(ipvs_service_entry_t *se) { + char flags[64]; + + flags[0] = '\0'; + + if (!strcmp(se->sched_name, "sh")) { + if (se->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) + strcat(flags, "sh-fallback,"); + if (se->flags & IP_VS_SVC_F_SCHED_SH_PORT) + strcat(flags, "sh-port,"); + if (se->flags & IP_VS_SVC_F_SCHED3) + strcat(flags, "flag-3,"); + } else { + if (se->flags & IP_VS_SVC_F_SCHED1) + strcat(flags, "flag-1,"); + if (se->flags & IP_VS_SVC_F_SCHED2) + strcat(flags, "flag-2,"); + if (se->flags & IP_VS_SVC_F_SCHED3) + strcat(flags, "flag-3,"); + } + + if (flags[0]) { + flags[strlen(flags)-1] = '\0'; + printf("%s", flags); + } +} static void print_title(unsigned int format) { @@ -1488,6 +1565,12 @@ print_service_entry(ipvs_service_entry_t *se, unsigned int format) printf(" pe %s", se->pe_name); if (se->flags & IP_VS_SVC_F_ONEPACKET) printf(" -o"); + if (se->flags & (IP_VS_SVC_F_SCHED1 | + IP_VS_SVC_F_SCHED2 | + IP_VS_SVC_F_SCHED3)) { + printf(" -b "); + print_sched_flags(se); + } } else if (format & FMT_STATS) { printf("%-33s", svc_name); print_largenum(se->stats.conns, format); @@ -1504,6 +1587,13 @@ print_service_entry(ipvs_service_entry_t *se, unsigned int format) print_largenum(se->stats.outbps, format); } else { printf("%s %s", svc_name, se->sched_name); + if (se->flags & (IP_VS_SVC_F_SCHED1 | + IP_VS_SVC_F_SCHED2 | + IP_VS_SVC_F_SCHED3)) { + printf(" ("); + print_sched_flags(se); + printf(")"); + } if (se->flags & IP_VS_SVC_F_PERSISTENT) { printf(" persistent %u", se->timeout); if (se->af == AF_INET) diff --git a/libipvs/ip_vs.h b/libipvs/ip_vs.h index 5e1d544..4db14ff 100644 --- a/libipvs/ip_vs.h +++ b/libipvs/ip_vs.h @@ -29,6 +29,13 @@ #define IP_VS_SVC_F_PERSISTENT 0x0001 /* persistent port */ #define IP_VS_SVC_F_HASHED 0x0002 /* hashed entry */ #define IP_VS_SVC_F_ONEPACKET 0x0004 /* one-packet scheduling */ +#define IP_VS_SVC_F_SCHED1 0x0008 /* scheduler flag 1 */ +#define IP_VS_SVC_F_SCHED2 0x0010 /* scheduler flag 2 */ +#define IP_VS_SVC_F_SCHED3 0x0020 /* scheduler flag 3 */ + +#define IP_VS_SVC_F_SCHED_SH_FALLBACK IP_VS_SVC_F_SCHED1 /* SH fallback */ +#define IP_VS_SVC_F_SCHED_SH_PORT IP_VS_SVC_F_SCHED2 /* SH use port */ + /* * IPVS sync daemon states Alex