All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters
@ 2016-03-16 18:38 Sowmini Varadhan
  2016-03-16 18:38 ` [PATCH net-next v4 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Sowmini Varadhan @ 2016-03-16 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, sowmini.varadhan, santosh.shilimkar, hannes, eric.dumazet

Patch 1 uses sysctl to create tunable socket buffer size parameters.

Patch 2 removes an unuused constant.

v2: use sysctl
v3: review comments from Santosh Shilimkar, Eric Dumazet
v4: review comments from Hannes Sowa

Sowmini Varadhan (2):
  RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
  RDS: TCP: Remove unused constant

 net/rds/tcp.c |  145 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 134 insertions(+), 11 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH net-next v4 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
  2016-03-16 18:38 [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters Sowmini Varadhan
@ 2016-03-16 18:38 ` Sowmini Varadhan
  2016-03-16 18:38 ` [PATCH net-next v4 2/2] RDS: TCP: Remove unused constant Sowmini Varadhan
  2016-03-19  2:26 ` [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: Sowmini Varadhan @ 2016-03-16 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, sowmini.varadhan, santosh.shilimkar, hannes, eric.dumazet

Add per-net sysctl tunables to set the size of sndbuf and
rcvbuf on the kernel tcp socket.

The tunables are added at /proc/sys/net/rds/tcp/rds_tcp_sndbuf
and /proc/sys/net/rds/tcp/rds_tcp_rcvbuf.

These values must be set before accept() or connect(),
and there may be an arbitrary number of existing rds-tcp
sockets when the tunable is modified. To make sure that all
connections in the netns pick up the same value for the tunable,
we reset existing rds-tcp connections in the netns, so that
they can reconnect with the new parameters.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
v2; use sysctl instead of module param. Tunabes are now per netns,
    and can be dynamically modified without restarting all namespaces.
v3: review comments from Santosh Shilimkar. Sockbuf size comments from
    Eric Dumazet
v4: review comments from Hannes Sowa

 net/rds/tcp.c |  145 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 135 insertions(+), 10 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index ad60299..3802785 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -54,6 +54,35 @@ static struct kmem_cache *rds_tcp_conn_slab;
 
 #define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
 
+static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *fpos);
+
+int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
+int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
+
+static struct ctl_table rds_tcp_sysctl_table[] = {
+#define	RDS_TCP_SNDBUF	0
+	{
+		.procname       = "rds_tcp_sndbuf",
+		/* data is per-net pointer */
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = rds_tcp_skbuf_handler,
+		.extra1		= &rds_tcp_min_sndbuf,
+	},
+#define	RDS_TCP_RCVBUF	1
+	{
+		.procname       = "rds_tcp_rcvbuf",
+		/* data is per-net pointer */
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = rds_tcp_skbuf_handler,
+		.extra1		= &rds_tcp_min_rcvbuf,
+	},
+	{ }
+};
+
 /* doing it this way avoids calling tcp_sk() */
 void rds_tcp_nonagle(struct socket *sock)
 {
@@ -66,15 +95,6 @@ void rds_tcp_nonagle(struct socket *sock)
 	set_fs(oldfs);
 }
 
-/* All module specific customizations to the RDS-TCP socket should be done in
- * rds_tcp_tune() and applied after socket creation. In general these
- * customizations should be tunable via module_param()
- */
-void rds_tcp_tune(struct socket *sock)
-{
-	rds_tcp_nonagle(sock);
-}
-
 u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
 {
 	return tcp_sk(tc->t_sock->sk)->snd_nxt;
@@ -272,8 +292,34 @@ static int rds_tcp_netid;
 struct rds_tcp_net {
 	struct socket *rds_tcp_listen_sock;
 	struct work_struct rds_tcp_accept_w;
+	struct ctl_table_header *rds_tcp_sysctl;
+	struct ctl_table *ctl_table;
+	int sndbuf_size;
+	int rcvbuf_size;
 };
 
+/* All module specific customizations to the RDS-TCP socket should be done in
+ * rds_tcp_tune() and applied after socket creation.
+ */
+void rds_tcp_tune(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+	rds_tcp_nonagle(sock);
+	lock_sock(sk);
+	if (rtn->sndbuf_size > 0) {
+		sk->sk_sndbuf = rtn->sndbuf_size;
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+	}
+	if (rtn->rcvbuf_size > 0) {
+		sk->sk_sndbuf = rtn->rcvbuf_size;
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+	}
+	release_sock(sk);
+}
+
 static void rds_tcp_accept_worker(struct work_struct *work)
 {
 	struct rds_tcp_net *rtn = container_of(work,
@@ -295,20 +341,60 @@ void rds_tcp_accept_work(struct sock *sk)
 static __net_init int rds_tcp_init_net(struct net *net)
 {
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+	struct ctl_table *tbl;
+	int err = 0;
 
+	memset(rtn, 0, sizeof(*rtn));
+
+	/* {snd, rcv}buf_size default to 0, which implies we let the
+	 * stack pick the value, and permit auto-tuning of buffer size.
+	 */
+	if (net == &init_net) {
+		tbl = rds_tcp_sysctl_table;
+	} else {
+		tbl = kmemdup(rds_tcp_sysctl_table,
+			      sizeof(rds_tcp_sysctl_table), GFP_KERNEL);
+		if (!tbl) {
+			pr_warn("could not set allocate syctl table\n");
+			return -ENOMEM;
+		}
+		rtn->ctl_table = tbl;
+	}
+	tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size;
+	tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size;
+	rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl);
+	if (!rtn->rds_tcp_sysctl) {
+		pr_warn("could not register sysctl\n");
+		err = -ENOMEM;
+		goto fail;
+	}
 	rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
 	if (!rtn->rds_tcp_listen_sock) {
 		pr_warn("could not set up listen sock\n");
-		return -EAFNOSUPPORT;
+		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+		rtn->rds_tcp_sysctl = NULL;
+		err = -EAFNOSUPPORT;
+		goto fail;
 	}
 	INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
 	return 0;
+
+fail:
+	if (net != &init_net)
+		kfree(tbl);
+	return err;
 }
 
 static void __net_exit rds_tcp_exit_net(struct net *net)
 {
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 
+	if (rtn->rds_tcp_sysctl)
+		unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
+
+	if (net != &init_net && rtn->ctl_table)
+		kfree(rtn->ctl_table);
+
 	/* If rds_tcp_exit_net() is called as a result of netns deletion,
 	 * the rds_tcp_kill_sock() device notifier would already have cleaned
 	 * up the listen socket, thus there is no work to do in this function.
@@ -383,6 +469,45 @@ static struct notifier_block rds_tcp_dev_notifier = {
 	.priority = -10, /* must be called after other network notifiers */
 };
 
+/* when sysctl is used to modify some kernel socket parameters,this
+ * function  resets the RDS connections in that netns  so that we can
+ * restart with new parameters.  The assumption is that such reset
+ * events are few and far-between.
+ */
+static void rds_tcp_sysctl_reset(struct net *net)
+{
+	struct rds_tcp_connection *tc, *_tc;
+
+	spin_lock_irq(&rds_tcp_conn_lock);
+	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+		struct net *c_net = read_pnet(&tc->conn->c_net);
+
+		if (net != c_net || !tc->t_sock)
+			continue;
+
+		rds_conn_drop(tc->conn); /* reconnect with new parameters */
+	}
+	spin_unlock_irq(&rds_tcp_conn_lock);
+}
+
+static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *fpos)
+{
+	struct net *net = current->nsproxy->net_ns;
+	int err;
+
+	err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos);
+	if (err < 0) {
+		pr_warn("Invalid input. Must be >= %d\n",
+			*(int *)(ctl->extra1));
+		return err;
+	}
+	if (write)
+		rds_tcp_sysctl_reset(net);
+	return 0;
+}
+
 static void rds_tcp_exit(void)
 {
 	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH net-next v4 2/2] RDS: TCP: Remove unused constant
  2016-03-16 18:38 [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters Sowmini Varadhan
  2016-03-16 18:38 ` [PATCH net-next v4 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
@ 2016-03-16 18:38 ` Sowmini Varadhan
  2016-03-19  2:26 ` [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: Sowmini Varadhan @ 2016-03-16 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, sowmini.varadhan, santosh.shilimkar, hannes, eric.dumazet

RDS_TCP_DEFAULT_BUFSIZE has been unused since commit 1edd6a14d24f
("RDS-TCP: Do not bloat sndbuf/rcvbuf in rds_tcp_tune").

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
---
v3: review comments from Santosh Shilimkar

 net/rds/tcp.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 3802785..61ed2a8 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -52,8 +52,6 @@ static LIST_HEAD(rds_tcp_conn_list);
 
 static struct kmem_cache *rds_tcp_conn_slab;
 
-#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
-
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *fpos);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters
  2016-03-16 18:38 [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters Sowmini Varadhan
  2016-03-16 18:38 ` [PATCH net-next v4 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
  2016-03-16 18:38 ` [PATCH net-next v4 2/2] RDS: TCP: Remove unused constant Sowmini Varadhan
@ 2016-03-19  2:26 ` David Miller
  2 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2016-03-19  2:26 UTC (permalink / raw)
  To: sowmini.varadhan; +Cc: netdev, santosh.shilimkar, hannes, eric.dumazet

From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Wed, 16 Mar 2016 11:38:11 -0700

> Patch 1 uses sysctl to create tunable socket buffer size parameters.
> 
> Patch 2 removes an unuused constant.
> 
> v2: use sysctl
> v3: review comments from Santosh Shilimkar, Eric Dumazet
> v4: review comments from Hannes Sowa

Series applied, thanks.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-03-19  2:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-16 18:38 [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters Sowmini Varadhan
2016-03-16 18:38 ` [PATCH net-next v4 1/2] RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket Sowmini Varadhan
2016-03-16 18:38 ` [PATCH net-next v4 2/2] RDS: TCP: Remove unused constant Sowmini Varadhan
2016-03-19  2:26 ` [PATCH net-next v4 0/2] RDS: TCP: tunable socket buffer parameters David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.