* [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers
@ 2010-02-11 2:09 Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 1/3] sysctl: refactor integer handling proc code Octavian Purdila
` (4 more replies)
0 siblings, 5 replies; 11+ messages in thread
From: Octavian Purdila @ 2010-02-11 2:09 UTC (permalink / raw)
To: David Miller
Cc: Octavian Purdila, netdev, linux-kernel, WANG Cong, Neil Horman,
Eric Dumazet
This patch series is based on Amerigo's v2 but it now uses a bitmap
for port reservation.
I've ran a while (1) { bind(0) } test (with ip_local_port_range
1024 65000) to see if there is any performance difference between the
two approaches (ranges vs bitmap). I could not detect any significant
difference, both cases scored in 2.76s +/- 0.01 on my setup.
I've based this patch series on current net-next, but it contains a
significant non networking part. Please let me know if I should handle
this differently.
Octavian Purdila (3):
sysctl: refactor integer handling proc code
sysctl: add proc_dobitmap
net: reserve ports for applications using fixed port numbers
Documentation/networking/ip-sysctl.txt | 12 +
drivers/infiniband/core/cma.c | 7 +-
include/linux/sysctl.h | 2 +
include/net/ip.h | 6 +
kernel/sysctl.c | 374 +++++++++++++++++++-------------
net/ipv4/inet_connection_sock.c | 5 +
net/ipv4/inet_hashtables.c | 2 +
net/ipv4/sysctl_net_ipv4.c | 7 +
net/ipv4/udp.c | 3 +-
net/sctp/socket.c | 2 +
10 files changed, 264 insertions(+), 156 deletions(-)
^ permalink raw reply [flat|nested] 11+ messages in thread
* [net-next PATCH v3 1/3] sysctl: refactor integer handling proc code
2010-02-11 2:09 [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
@ 2010-02-11 2:09 ` Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 2/3] sysctl: add proc_dobitmap Octavian Purdila
` (3 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Octavian Purdila @ 2010-02-11 2:09 UTC (permalink / raw)
To: David Miller
Cc: Octavian Purdila, netdev, linux-kernel, WANG Cong, Neil Horman,
Eric Dumazet
As we are about to add another integer handling proc function a little
bit of cleanup is in order: add a few helper functions to improve code
readability and decrease code duplication.
In the process a bug is fixed as well: if the user specifies a number
with more then 20 digits it will be interpreted as two integers
(e.g. 10000...13 will be interpreted as 100.... and 13).
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Cc: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
---
kernel/sysctl.c | 298 +++++++++++++++++++++++++++----------------------------
1 files changed, 144 insertions(+), 154 deletions(-)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a68b24..b0f9618 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2039,8 +2039,98 @@ int proc_dostring(struct ctl_table *table, int write,
buffer, lenp, ppos);
}
+static int proc_skip_wspace(char __user **buf, size_t *size)
+{
+ char c;
+
+ while (*size) {
+ if (get_user(c, *buf))
+ return -EFAULT;
+ if (!isspace(c))
+ break;
+ (*size)--; (*buf)++;
+ }
+
+ return 0;
+}
+
+#define TMPBUFLEN 22
+static int proc_get_next_ulong(char __user **buf, size_t *size,
+ unsigned long *val, bool *neg)
+{
+ int len;
+ char *p, tmp[TMPBUFLEN];
+ int err;
+
+ err = proc_skip_wspace(buf, size);
+ if (err)
+ return err;
+ if (!*size)
+ return -EINVAL;
+
+ len = *size;
+ if (len > TMPBUFLEN-1)
+ len = TMPBUFLEN-1;
+
+ if (copy_from_user(tmp, *buf, len))
+ return -EFAULT;
+
+ tmp[len] = 0;
+ p = tmp;
+ if (*p == '-' && *size > 1) {
+ *neg = 1;
+ p++;
+ } else
+ *neg = 0;
+ if (*p < '0' || *p > '9')
+ return -EINVAL;
+
+ *val = simple_strtoul(p, &p, 0);
+
+ len = p - tmp;
+ if (((len < *size) && *p && !isspace(*p)) ||
+ /* We don't know if the next char is whitespace thus we may accept
+ * invalid integers (e.g. 1234...a) or two integers instead of one
+ * (e.g. 123...1). So lets not allow such large numbers. */
+ len == TMPBUFLEN - 1)
+ return -EINVAL;
-static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
+ *buf += len; *size -= len;
+
+ return 0;
+}
+
+static int proc_put_ulong(char __user **buf, size_t *size, unsigned long val,
+ bool neg, bool first)
+{
+ int len;
+ char tmp[TMPBUFLEN], *p = tmp;
+
+ if (!first)
+ *p++ = '\t';
+ sprintf(p, "%s%lu", neg ? "-" : "", val);
+ len = strlen(tmp);
+ if (len > *size)
+ len = *size;
+ if (copy_to_user(*buf, tmp, len))
+ return -EFAULT;
+ *size -= len;
+ *buf += len;
+ return 0;
+}
+#undef TMPBUFLEN
+
+static int proc_put_newline(char __user **buf, size_t *size)
+{
+ if (*size) {
+ if (put_user('\n', *buf))
+ return -EFAULT;
+ (*size)--, (*buf)++;
+ }
+ return 0;
+}
+
+static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2049,7 +2139,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
} else {
int val = *valp;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
*lvalp = (unsigned long)-val;
} else {
*negp = 0;
@@ -2060,19 +2150,15 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
}
static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
- int write, void __user *buffer,
+ int write, void __user *_buffer,
size_t *lenp, loff_t *ppos,
- int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+ int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
int write, void *data),
void *data)
{
-#define TMPBUFLEN 21
- int *i, vleft, first = 1, neg;
- unsigned long lval;
- size_t left, len;
-
- char buf[TMPBUFLEN], *p;
- char __user *s = buffer;
+ int *i, vleft, first = 1, err = 0;
+ size_t left;
+ char __user *buffer = (char __user *) _buffer;
if (!tbl_data || !table->maxlen || !*lenp ||
(*ppos && !write)) {
@@ -2088,88 +2174,39 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
conv = do_proc_dointvec_conv;
for (; left && vleft--; i++, first=0) {
- if (write) {
- while (left) {
- char c;
- if (get_user(c, s))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
- s++;
- }
- if (!left)
- break;
- neg = 0;
- len = left;
- if (len > sizeof(buf) - 1)
- len = sizeof(buf) - 1;
- if (copy_from_user(buf, s, len))
- return -EFAULT;
- buf[len] = 0;
- p = buf;
- if (*p == '-' && left > 1) {
- neg = 1;
- p++;
- }
- if (*p < '0' || *p > '9')
- break;
-
- lval = simple_strtoul(p, &p, 0);
+ unsigned long lval;
+ bool neg;
- len = p-buf;
- if ((len < left) && *p && !isspace(*p))
+ if (write) {
+ err = proc_get_next_ulong(&buffer, &left, &lval, &neg);
+ if (err)
break;
- s += len;
- left -= len;
-
if (conv(&neg, &lval, i, 1, data))
break;
} else {
- p = buf;
- if (!first)
- *p++ = '\t';
-
if (conv(&neg, &lval, i, 0, data))
break;
-
- sprintf(p, "%s%lu", neg ? "-" : "", lval);
- len = strlen(buf);
- if (len > left)
- len = left;
- if(copy_to_user(s, buf, len))
- return -EFAULT;
- left -= len;
- s += len;
- }
- }
-
- if (!write && !first && left) {
- if(put_user('\n', s))
- return -EFAULT;
- left--, s++;
- }
- if (write) {
- while (left) {
- char c;
- if (get_user(c, s++))
- return -EFAULT;
- if (!isspace(c))
+ err = proc_put_ulong(&buffer, &left, lval, neg, first);
+ if (err)
break;
- left--;
}
}
- if (write && first)
- return -EINVAL;
+
+ if (!write && !first && left && !err)
+ err = proc_put_newline(&buffer, &left);
+ if (write && !err)
+ err = proc_skip_wspace(&buffer, &left);
+ if (err == -EFAULT /* do we really need to check for -EFAULT? */ ||
+ (write && first))
+ return err ? : -EINVAL;
*lenp -= left;
*ppos += *lenp;
return 0;
-#undef TMPBUFLEN
}
static int do_proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos,
- int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+ int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
int write, void *data),
void *data)
{
@@ -2237,8 +2274,8 @@ struct do_proc_dointvec_minmax_conv_param {
int *max;
};
-static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
- int *valp,
+static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
+ int *valp,
int write, void *data)
{
struct do_proc_dointvec_minmax_conv_param *param = data;
@@ -2251,7 +2288,7 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
} else {
int val = *valp;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
*lvalp = (unsigned long)-val;
} else {
*negp = 0;
@@ -2289,17 +2326,15 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
}
static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
- void __user *buffer,
+ void __user *_buffer,
size_t *lenp, loff_t *ppos,
unsigned long convmul,
unsigned long convdiv)
{
-#define TMPBUFLEN 21
- unsigned long *i, *min, *max, val;
- int vleft, first=1, neg;
- size_t len, left;
- char buf[TMPBUFLEN], *p;
- char __user *s = buffer;
+ unsigned long *i, *min, *max;
+ int vleft, first = 1, err = 0;
+ size_t left;
+ char __user *buffer = (char __user *) _buffer;
if (!data || !table->maxlen || !*lenp ||
(*ppos && !write)) {
@@ -2314,82 +2349,37 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
left = *lenp;
for (; left && vleft--; i++, min++, max++, first=0) {
+ unsigned long val;
+
if (write) {
- while (left) {
- char c;
- if (get_user(c, s))
- return -EFAULT;
- if (!isspace(c))
- break;
- left--;
- s++;
- }
- if (!left)
- break;
- neg = 0;
- len = left;
- if (len > TMPBUFLEN-1)
- len = TMPBUFLEN-1;
- if (copy_from_user(buf, s, len))
- return -EFAULT;
- buf[len] = 0;
- p = buf;
- if (*p == '-' && left > 1) {
- neg = 1;
- p++;
- }
- if (*p < '0' || *p > '9')
- break;
- val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
- len = p-buf;
- if ((len < left) && *p && !isspace(*p))
+ bool neg;
+
+ err = proc_get_next_ulong(&buffer, &left, &val, &neg);
+ if (err)
break;
if (neg)
- val = -val;
- s += len;
- left -= len;
-
- if(neg)
continue;
if ((min && val < *min) || (max && val > *max))
continue;
*i = val;
} else {
- p = buf;
- if (!first)
- *p++ = '\t';
- sprintf(p, "%lu", convdiv * (*i) / convmul);
- len = strlen(buf);
- if (len > left)
- len = left;
- if(copy_to_user(s, buf, len))
- return -EFAULT;
- left -= len;
- s += len;
- }
- }
-
- if (!write && !first && left) {
- if(put_user('\n', s))
- return -EFAULT;
- left--, s++;
- }
- if (write) {
- while (left) {
- char c;
- if (get_user(c, s++))
- return -EFAULT;
- if (!isspace(c))
+ val = convdiv * (*i) / convmul;
+ err = proc_put_ulong(&buffer, &left, val, 0, first);
+ if (err)
break;
- left--;
}
}
- if (write && first)
- return -EINVAL;
+
+ if (!write && !first && left && !err)
+ err = proc_put_newline(&buffer, &left);
+ if (write && !err)
+ err = proc_skip_wspace(&buffer, &left);
+ if (err == -EFAULT /* do we really need to check for -EFAULT? */ ||
+ (write && first))
+ return err ? : -EINVAL;
*lenp -= left;
*ppos += *lenp;
return 0;
-#undef TMPBUFLEN
}
static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
@@ -2450,7 +2440,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
}
-static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2462,7 +2452,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
int val = *valp;
unsigned long lval;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
lval = (unsigned long)-val;
} else {
*negp = 0;
@@ -2473,7 +2463,7 @@ static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
return 0;
}
-static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2485,7 +2475,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
int val = *valp;
unsigned long lval;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
lval = (unsigned long)-val;
} else {
*negp = 0;
@@ -2496,7 +2486,7 @@ static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
return 0;
}
-static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
+static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
@@ -2506,7 +2496,7 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
int val = *valp;
unsigned long lval;
if (val < 0) {
- *negp = -1;
+ *negp = 1;
lval = (unsigned long)-val;
} else {
*negp = 0;
--
1.5.6.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [net-next PATCH v3 2/3] sysctl: add proc_dobitmap
2010-02-11 2:09 [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 1/3] sysctl: refactor integer handling proc code Octavian Purdila
@ 2010-02-11 2:09 ` Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 3/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
` (2 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Octavian Purdila @ 2010-02-11 2:09 UTC (permalink / raw)
To: David Miller
Cc: Octavian Purdila, netdev, linux-kernel, WANG Cong, Neil Horman,
Eric Dumazet
The new function can be used to update bitmaps via /proc. Bits can be
set by writing positive values in the file and cleared by writing
negative values (e.g. 0 2 will set bits 1 and 3, -0 -2 will clear
them). Reading will show only the set bits.
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Cc: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
---
include/linux/sysctl.h | 2 +
kernel/sysctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 78 insertions(+), 0 deletions(-)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 9f236cd..ba89bf2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -985,6 +985,8 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
void __user *, size_t *, loff_t *);
+extern int proc_dobitmap(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
/*
* Register a set of sysctl names by calling register_sysctl_table
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b0f9618..b8959f4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2596,6 +2596,82 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
return 0;
}
+/**
+ * proc_dobitmap - read/write from/to a bitmap
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ * @ppos: the current position in the file
+ *
+ * The bitmap is stored at table->data and the bitmap length (in bits)
+ * in table->maxlen. Reading from the proc file will show the set bits.
+ * Writing positive values sets the bits, negative values clears them
+ * (e.g. 0 2 sets the first and 3rd bit, -0 -2 clears them).
+ *
+ * Returns 0 on success.
+ */
+int proc_dobitmap(struct ctl_table *table, int write,
+ void __user *_buffer, size_t *lenp, loff_t *ppos)
+{
+ bool first = 1;
+ unsigned long *bitmap = (unsigned long *) table->data;
+ unsigned long bitmap_len = table->maxlen;
+ int left = *lenp, err = 0;
+ char __user *buffer = (char __user *) _buffer;
+
+ if (!bitmap_len || !left || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ if (write) {
+ while (left) {
+ unsigned long val;
+ bool neg;
+
+ err = proc_get_next_ulong(&buffer, &left, &val, &neg);
+ if (err)
+ break;
+ if (val >= bitmap_len) {
+ err = -EINVAL;
+ break;
+ }
+ if (neg)
+ clear_bit(val, bitmap);
+ else
+ set_bit(val, bitmap);
+ first = 0;
+ }
+ if (!err)
+ err = proc_skip_wspace(&buffer, &left);
+ } else {
+ unsigned long bit = 0;
+
+ while (left) {
+ bit = find_next_bit(bitmap, bitmap_len, bit);
+ if (bit >= bitmap_len)
+ break;
+ err = proc_put_ulong(&buffer, &left, bit, 0, first);
+ if (err)
+ break;
+ first = 0; bit++;
+ }
+ if (!err)
+ err = proc_put_newline(&buffer, &left);
+ }
+
+ if (first && write && !err)
+ err = -EINVAL;
+ if (err == -EFAULT /* do we really need to check for -EFAULT? */ ||
+ (write && first))
+ return err ? : -EINVAL;
+ *lenp -= left;
+ *ppos += *lenp;
+ return 0;
+}
+
#else /* CONFIG_PROC_FS */
int proc_dostring(struct ctl_table *table, int write,
--
1.5.6.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [net-next PATCH v3 3/3] net: reserve ports for applications using fixed port numbers
2010-02-11 2:09 [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 1/3] sysctl: refactor integer handling proc code Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 2/3] sysctl: add proc_dobitmap Octavian Purdila
@ 2010-02-11 2:09 ` Octavian Purdila
2010-02-15 12:36 ` [net-next PATCH v3 0/3] " Cong Wang
2010-02-15 12:39 ` Cong Wang
4 siblings, 0 replies; 11+ messages in thread
From: Octavian Purdila @ 2010-02-11 2:09 UTC (permalink / raw)
To: David Miller
Cc: Octavian Purdila, netdev, linux-kernel, WANG Cong, Neil Horman,
Eric Dumazet
This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports
(bitmap type) which allows users to reserve ports for third-party
applications.
The reserved ports will not be used by automatic port assignments
(e.g. when calling connect() or bind() with port number 0). Explicit
port allocation behavior is unchanged.
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
---
Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
drivers/infiniband/core/cma.c | 7 ++++++-
include/net/ip.h | 6 ++++++
net/ipv4/inet_connection_sock.c | 5 +++++
net/ipv4/inet_hashtables.c | 2 ++
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
net/ipv4/udp.c | 3 ++-
net/sctp/socket.c | 2 ++
8 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 2dc7a1d..23be7a4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -564,6 +564,18 @@ ip_local_port_range - 2 INTEGERS
(i.e. by default) range 1024-4999 is enough to issue up to
2000 connections per second to systems supporting timestamps.
+ip_local_reserved_ports - BITMAP of 65536 ports
+ Specify the ports which are reserved for known third-party
+ applications. These ports will not be used by automatic port assignments
+ (e.g. when calling connect() or bind() with port number 0). Explicit
+ port allocation behavior is unchanged.
+
+ Reserving ports is done by writing positive numbers in this proc entry,
+ clearing them is done by writing negative numbers (e.g. 8080 reserves
+ port number, -8080 makes it available for automatic assignment again).
+
+ Default: Empty
+
ip_nonlocal_bind - BOOLEAN
If set, allows processes to bind() to non-local IP addresses,
which can be quite useful - but may break some applications.
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index cc9b594..8248fc6 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1979,6 +1979,8 @@ retry:
/* FIXME: add proper port randomization per like inet_csk_get_port */
do {
ret = idr_get_new_above(ps, bind_list, next_port, &port);
+ if (inet_is_reserved_local_port(port))
+ ret = -EAGAIN;
} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
if (ret)
@@ -2997,10 +2999,13 @@ static int __init cma_init(void)
{
int ret, low, high, remaining;
- get_random_bytes(&next_port, sizeof next_port);
inet_get_local_port_range(&low, &high);
+again:
+ get_random_bytes(&next_port, sizeof next_port);
remaining = (high - low) + 1;
next_port = ((unsigned int) next_port % remaining) + low;
+ if (inet_is_reserved_local_port(next_port))
+ goto again;
cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
diff --git a/include/net/ip.h b/include/net/ip.h
index fb63371..ada8589 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -184,6 +184,12 @@ extern struct local_ports {
} sysctl_local_ports;
extern void inet_get_local_port_range(int *low, int *high);
+extern unsigned long sysctl_local_reserved_ports[65536/8/sizeof(unsigned long)];
+static inline int inet_is_reserved_local_port(int port)
+{
+ return test_bit(port, sysctl_local_reserved_ports);
+}
+
extern int sysctl_ip_default_ttl;
extern int sysctl_ip_nonlocal_bind;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429..febfc6c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,8 @@ struct local_ports sysctl_local_ports __read_mostly = {
.range = { 32768, 61000 },
};
+unsigned long sysctl_local_reserved_ports[65536/BITS_PER_LONG];
+
void inet_get_local_port_range(int *low, int *high)
{
unsigned seq;
@@ -108,6 +110,8 @@ again:
smallest_size = -1;
do {
+ if (inet_is_reserved_local_port(rover))
+ goto next_nolock;
head = &hashinfo->bhash[inet_bhashfn(net, rover,
hashinfo->bhash_size)];
spin_lock(&head->lock);
@@ -130,6 +134,7 @@ again:
break;
next:
spin_unlock(&head->lock);
+ next_nolock:
if (++rover > high)
rover = low;
} while (--remaining > 0);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377..d3e160a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
+ if (inet_is_reserved_local_port(port))
+ continue;
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712c..48ca149 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -298,6 +298,13 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
+ {
+ .procname = "ip_local_reserved_ports",
+ .data = sysctl_local_reserved_ports,
+ .maxlen = 65536,
+ .mode = 0644,
+ .proc_handler = proc_dobitmap,
+ },
#ifdef CONFIG_IP_MULTICAST
{
.procname = "igmp_max_memberships",
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4f7d212..705e032 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -232,7 +232,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
*/
do {
if (low <= snum && snum <= high &&
- !test_bit(snum >> udptable->log, bitmap))
+ !test_bit(snum >> udptable->log, bitmap) &&
+ !inet_is_reserved_local_port(snum))
goto found;
snum += rand;
} while (snum != first);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f6d1e59..1f839d0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5432,6 +5432,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover++;
if ((rover < low) || (rover > high))
rover = low;
+ if (inet_is_reserved_local_port(rover))
+ continue;
index = sctp_phashfn(rover);
head = &sctp_port_hashtable[index];
sctp_spin_lock(&head->lock);
--
1.5.6.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers
2010-02-11 2:09 [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
` (2 preceding siblings ...)
2010-02-11 2:09 ` [net-next PATCH v3 3/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
@ 2010-02-15 12:36 ` Cong Wang
2010-02-15 12:39 ` Cong Wang
4 siblings, 0 replies; 11+ messages in thread
From: Cong Wang @ 2010-02-15 12:36 UTC (permalink / raw)
To: Octavian Purdila
Cc: David Miller, netdev, linux-kernel, Neil Horman, Eric Dumazet
Octavian Purdila wrote:
> This patch series is based on Amerigo's v2 but it now uses a bitmap
> for port reservation.
>
> I've ran a while (1) { bind(0) } test (with ip_local_port_range
> 1024 65000) to see if there is any performance difference between the
> two approaches (ranges vs bitmap). I could not detect any significant
> difference, both cases scored in 2.76s +/- 0.01 on my setup.
>
> I've based this patch series on current net-next, but it contains a
> significant non networking part. Please let me know if I should handle
> this differently.
>
> Octavian Purdila (3):
> sysctl: refactor integer handling proc code
> sysctl: add proc_dobitmap
> net: reserve ports for applications using fixed port numbers
>
(Sorry for the delay, we are having Chinese new year here.)
Thanks for your work, Octavian!
Your patches look nice, but I don't have much time to review them today,
I will have a detailed look tomorrow.
Thanks.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers
2010-02-15 12:39 ` Cong Wang
@ 2010-02-15 12:37 ` Octavian Purdila
2010-02-15 12:48 ` Cong Wang
0 siblings, 1 reply; 11+ messages in thread
From: Octavian Purdila @ 2010-02-15 12:37 UTC (permalink / raw)
To: Cong Wang; +Cc: netdev
On Monday 15 February 2010 14:39:30 you wrote:
> Octavian Purdila wrote:
> > This patch series is based on Amerigo's v2 but it now uses a bitmap
> > for port reservation.
> >
> > I've ran a while (1) { bind(0) } test (with ip_local_port_range
> > 1024 65000) to see if there is any performance difference between the
> > two approaches (ranges vs bitmap). I could not detect any significant
> > difference, both cases scored in 2.76s +/- 0.01 on my setup.
> >
> > I've based this patch series on current net-next, but it contains a
> > significant non networking part. Please let me know if I should handle
> > this differently.
> >
> > Octavian Purdila (3):
> > sysctl: refactor integer handling proc code
> > sysctl: add proc_dobitmap
> > net: reserve ports for applications using fixed port numbers
> >
> > Documentation/networking/ip-sysctl.txt | 12 +
> > drivers/infiniband/core/cma.c | 7 +-
> > include/linux/sysctl.h | 2 +
> > include/net/ip.h | 6 +
> > kernel/sysctl.c | 374
> > +++++++++++++++++++------------- net/ipv4/inet_connection_sock.c |
> > 5 +
> > net/ipv4/inet_hashtables.c | 2 +
> > net/ipv4/sysctl_net_ipv4.c | 7 +
> > net/ipv4/udp.c | 3 +-
> > net/sctp/socket.c | 2 +
> > 10 files changed, 264 insertions(+), 156 deletions(-)
>
> Hey, Octavian, typo in netdev list name...
>
> Could you please fix it and resend? So that this will get more reviews.
>
Sorry for that. I've already spotted it and resent it. I plan to send a new
take end of this day (GMT + 2) which will also address Eric's comments by
allocating the bitmap at init time.
Thanks,
tavi
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers
2010-02-11 2:09 [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
` (3 preceding siblings ...)
2010-02-15 12:36 ` [net-next PATCH v3 0/3] " Cong Wang
@ 2010-02-15 12:39 ` Cong Wang
2010-02-15 12:37 ` Octavian Purdila
4 siblings, 1 reply; 11+ messages in thread
From: Cong Wang @ 2010-02-15 12:39 UTC (permalink / raw)
To: Octavian Purdila
Cc: David Miller, netdev, linux-kernel, Neil Horman, Eric Dumazet
Octavian Purdila wrote:
> This patch series is based on Amerigo's v2 but it now uses a bitmap
> for port reservation.
>
> I've ran a while (1) { bind(0) } test (with ip_local_port_range
> 1024 65000) to see if there is any performance difference between the
> two approaches (ranges vs bitmap). I could not detect any significant
> difference, both cases scored in 2.76s +/- 0.01 on my setup.
>
> I've based this patch series on current net-next, but it contains a
> significant non networking part. Please let me know if I should handle
> this differently.
>
> Octavian Purdila (3):
> sysctl: refactor integer handling proc code
> sysctl: add proc_dobitmap
> net: reserve ports for applications using fixed port numbers
>
> Documentation/networking/ip-sysctl.txt | 12 +
> drivers/infiniband/core/cma.c | 7 +-
> include/linux/sysctl.h | 2 +
> include/net/ip.h | 6 +
> kernel/sysctl.c | 374 +++++++++++++++++++-------------
> net/ipv4/inet_connection_sock.c | 5 +
> net/ipv4/inet_hashtables.c | 2 +
> net/ipv4/sysctl_net_ipv4.c | 7 +
> net/ipv4/udp.c | 3 +-
> net/sctp/socket.c | 2 +
> 10 files changed, 264 insertions(+), 156 deletions(-)
>
Hey, Octavian, typo in netdev list name...
Could you please fix it and resend? So that this will get more reviews.
Thanks!
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers
2010-02-15 12:37 ` Octavian Purdila
@ 2010-02-15 12:48 ` Cong Wang
0 siblings, 0 replies; 11+ messages in thread
From: Cong Wang @ 2010-02-15 12:48 UTC (permalink / raw)
To: Octavian Purdila; +Cc: netdev
Octavian Purdila wrote:
> On Monday 15 February 2010 14:39:30 you wrote:
>> Octavian Purdila wrote:
>>> This patch series is based on Amerigo's v2 but it now uses a bitmap
>>> for port reservation.
>>>
>>> I've ran a while (1) { bind(0) } test (with ip_local_port_range
>>> 1024 65000) to see if there is any performance difference between the
>>> two approaches (ranges vs bitmap). I could not detect any significant
>>> difference, both cases scored in 2.76s +/- 0.01 on my setup.
>>>
>>> I've based this patch series on current net-next, but it contains a
>>> significant non networking part. Please let me know if I should handle
>>> this differently.
>>>
>>> Octavian Purdila (3):
>>> sysctl: refactor integer handling proc code
>>> sysctl: add proc_dobitmap
>>> net: reserve ports for applications using fixed port numbers
>>>
>>> Documentation/networking/ip-sysctl.txt | 12 +
>>> drivers/infiniband/core/cma.c | 7 +-
>>> include/linux/sysctl.h | 2 +
>>> include/net/ip.h | 6 +
>>> kernel/sysctl.c | 374
>>> +++++++++++++++++++------------- net/ipv4/inet_connection_sock.c |
>>> 5 +
>>> net/ipv4/inet_hashtables.c | 2 +
>>> net/ipv4/sysctl_net_ipv4.c | 7 +
>>> net/ipv4/udp.c | 3 +-
>>> net/sctp/socket.c | 2 +
>>> 10 files changed, 264 insertions(+), 156 deletions(-)
>> Hey, Octavian, typo in netdev list name...
>>
>> Could you please fix it and resend? So that this will get more reviews.
>>
>
> Sorry for that. I've already spotted it and resent it. I plan to send a new
> take end of this day (GMT + 2) which will also address Eric's comments by
> allocating the bitmap at init time.
>
Ok, but I was not Cc'ed. :) Please keep me in Cc.
Thanks!
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [net-next PATCH v3 3/3] net: reserve ports for applications using fixed port numbers
2010-02-11 6:12 ` Eric Dumazet
@ 2010-02-11 6:14 ` Eric Dumazet
0 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2010-02-11 6:14 UTC (permalink / raw)
To: Octavian Purdila; +Cc: netdev
Le jeudi 11 février 2010 à 07:12 +0100, Eric Dumazet a écrit :
> Octavian, please resubmit all patches to lkml, netdev, David, because
> patches 1 & 2 are changing kernel core services.
>
Ooops, I just saw your second submission, please ignore my comment :)
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [net-next PATCH v3 3/3] net: reserve ports for applications using fixed port numbers
2010-02-11 2:53 [net-next PATCH v3 3/3] " Octavian Purdila
@ 2010-02-11 6:12 ` Eric Dumazet
2010-02-11 6:14 ` Eric Dumazet
0 siblings, 1 reply; 11+ messages in thread
From: Eric Dumazet @ 2010-02-11 6:12 UTC (permalink / raw)
To: Octavian Purdila; +Cc: netdev
Octavian, please resubmit all patches to lkml, netdev, David, because
patches 1 & 2 are changing kernel core services.
However, I'll take some time in a couple of hours to review them.
Le jeudi 11 février 2010 à 04:53 +0200, Octavian Purdila a écrit :
> This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports
> (bitmap type) which allows users to reserve ports for third-party
> applications.
>
> The reserved ports will not be used by automatic port assignments
> (e.g. when calling connect() or bind() with port number 0). Explicit
> port allocation behavior is unchanged.
>
> +extern unsigned long sysctl_local_reserved_ports[65536/8/sizeof(unsigned long)];
I am sure we have a special macro for this.
extern DECLARE_BITMAP(reserved_ports, 65536);
> +unsigned long sysctl_local_reserved_ports[65536/BITS_PER_LONG];
> +
Same point here, plus I am not sure adding 8192 bytes in BSS zone is a
problem nowadays. (It was ten years ago for some arches)
^ permalink raw reply [flat|nested] 11+ messages in thread
* [net-next PATCH v3 3/3] net: reserve ports for applications using fixed port numbers
@ 2010-02-11 2:53 Octavian Purdila
2010-02-11 6:12 ` Eric Dumazet
0 siblings, 1 reply; 11+ messages in thread
From: Octavian Purdila @ 2010-02-11 2:53 UTC (permalink / raw)
To: netdev
This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports
(bitmap type) which allows users to reserve ports for third-party
applications.
The reserved ports will not be used by automatic port assignments
(e.g. when calling connect() or bind() with port number 0). Explicit
port allocation behavior is unchanged.
Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
---
Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
drivers/infiniband/core/cma.c | 7 ++++++-
include/net/ip.h | 6 ++++++
net/ipv4/inet_connection_sock.c | 5 +++++
net/ipv4/inet_hashtables.c | 2 ++
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
net/ipv4/udp.c | 3 ++-
net/sctp/socket.c | 2 ++
8 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 2dc7a1d..23be7a4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -564,6 +564,18 @@ ip_local_port_range - 2 INTEGERS
(i.e. by default) range 1024-4999 is enough to issue up to
2000 connections per second to systems supporting timestamps.
+ip_local_reserved_ports - BITMAP of 65536 ports
+ Specify the ports which are reserved for known third-party
+ applications. These ports will not be used by automatic port assignments
+ (e.g. when calling connect() or bind() with port number 0). Explicit
+ port allocation behavior is unchanged.
+
+ Reserving ports is done by writing positive numbers in this proc entry,
+ clearing them is done by writing negative numbers (e.g. 8080 reserves
+ port number, -8080 makes it available for automatic assignment again).
+
+ Default: Empty
+
ip_nonlocal_bind - BOOLEAN
If set, allows processes to bind() to non-local IP addresses,
which can be quite useful - but may break some applications.
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index cc9b594..8248fc6 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1979,6 +1979,8 @@ retry:
/* FIXME: add proper port randomization per like inet_csk_get_port */
do {
ret = idr_get_new_above(ps, bind_list, next_port, &port);
+ if (inet_is_reserved_local_port(port))
+ ret = -EAGAIN;
} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
if (ret)
@@ -2997,10 +2999,13 @@ static int __init cma_init(void)
{
int ret, low, high, remaining;
- get_random_bytes(&next_port, sizeof next_port);
inet_get_local_port_range(&low, &high);
+again:
+ get_random_bytes(&next_port, sizeof next_port);
remaining = (high - low) + 1;
next_port = ((unsigned int) next_port % remaining) + low;
+ if (inet_is_reserved_local_port(next_port))
+ goto again;
cma_wq = create_singlethread_workqueue("rdma_cm");
if (!cma_wq)
diff --git a/include/net/ip.h b/include/net/ip.h
index fb63371..ada8589 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -184,6 +184,12 @@ extern struct local_ports {
} sysctl_local_ports;
extern void inet_get_local_port_range(int *low, int *high);
+extern unsigned long sysctl_local_reserved_ports[65536/8/sizeof(unsigned long)];
+static inline int inet_is_reserved_local_port(int port)
+{
+ return test_bit(port, sysctl_local_reserved_ports);
+}
+
extern int sysctl_ip_default_ttl;
extern int sysctl_ip_nonlocal_bind;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429..febfc6c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,8 @@ struct local_ports sysctl_local_ports __read_mostly = {
.range = { 32768, 61000 },
};
+unsigned long sysctl_local_reserved_ports[65536/BITS_PER_LONG];
+
void inet_get_local_port_range(int *low, int *high)
{
unsigned seq;
@@ -108,6 +110,8 @@ again:
smallest_size = -1;
do {
+ if (inet_is_reserved_local_port(rover))
+ goto next_nolock;
head = &hashinfo->bhash[inet_bhashfn(net, rover,
hashinfo->bhash_size)];
spin_lock(&head->lock);
@@ -130,6 +134,7 @@ again:
break;
next:
spin_unlock(&head->lock);
+ next_nolock:
if (++rover > high)
rover = low;
} while (--remaining > 0);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377..d3e160a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
+ if (inet_is_reserved_local_port(port))
+ continue;
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712c..48ca149 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -298,6 +298,13 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
+ {
+ .procname = "ip_local_reserved_ports",
+ .data = sysctl_local_reserved_ports,
+ .maxlen = 65536,
+ .mode = 0644,
+ .proc_handler = proc_dobitmap,
+ },
#ifdef CONFIG_IP_MULTICAST
{
.procname = "igmp_max_memberships",
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4f7d212..705e032 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -232,7 +232,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
*/
do {
if (low <= snum && snum <= high &&
- !test_bit(snum >> udptable->log, bitmap))
+ !test_bit(snum >> udptable->log, bitmap) &&
+ !inet_is_reserved_local_port(snum))
goto found;
snum += rand;
} while (snum != first);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f6d1e59..1f839d0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5432,6 +5432,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover++;
if ((rover < low) || (rover > high))
rover = low;
+ if (inet_is_reserved_local_port(rover))
+ continue;
index = sctp_phashfn(rover);
head = &sctp_port_hashtable[index];
sctp_spin_lock(&head->lock);
--
1.5.6.5
^ permalink raw reply related [flat|nested] 11+ messages in thread
end of thread, other threads:[~2010-02-15 12:44 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-11 2:09 [net-next PATCH v3 0/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 1/3] sysctl: refactor integer handling proc code Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 2/3] sysctl: add proc_dobitmap Octavian Purdila
2010-02-11 2:09 ` [net-next PATCH v3 3/3] net: reserve ports for applications using fixed port numbers Octavian Purdila
2010-02-15 12:36 ` [net-next PATCH v3 0/3] " Cong Wang
2010-02-15 12:39 ` Cong Wang
2010-02-15 12:37 ` Octavian Purdila
2010-02-15 12:48 ` Cong Wang
2010-02-11 2:53 [net-next PATCH v3 3/3] " Octavian Purdila
2010-02-11 6:12 ` Eric Dumazet
2010-02-11 6:14 ` Eric Dumazet
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.