All of lore.kernel.org
 help / color / mirror / Atom feed
* Add a SOCK_DESTROY operation to close sockets from userspace
@ 2015-11-18  1:43 Lorenzo Colitti
  2015-11-18  1:43 ` [PATCH 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
                   ` (6 more replies)
  0 siblings, 7 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18  1:43 UTC (permalink / raw)
  To: netdev; +Cc: edumazet, ek, maze, dtor

This patch series adds the ability for a privileged process to
destroy sockets belonging to other userspace processes via the
sock_diag interface, and implements that for TCP sockets.

This functionality is needed on laptops and mobile hosts to
ensure that network switches / disconnects do not result in
applications being blocked for long periods of time (minutes) in
read or connect calls on TCP sockets that will never succeed
because the IP address they are bound to is gone. Closing the
sockets in the protocol layer causes these calls to fail fast and
allows applications to reconnect on another network.

For many years Android kernels have done this via an out-of-tree
SIOCKILLADDR ioctl that is called when networks disconnect, but
this solution is cleaner, more robust and more flexible. The
system can iterate over all connections on the deleted IP address
and close all of them. But it can also close all sockets opened
by a given process on a given network, for example if the user
has restricted that process from using that network, or if a
secure network such as a VPN is now being applied to the
application and thus previously-established connections are
blackholed.

The patch series only implements SOCK_DESTROY for TCP sockets,
but the mechanism can be extended to any protocol family that
supports the sock_diag interface.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* [PATCH 1/4] net: diag: split inet_diag_dump_one_icsk into two
  2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
@ 2015-11-18  1:43 ` Lorenzo Colitti
  2015-11-18  1:43 ` [PATCH 2/4] net: diag: Add the ability to destroy a socket from userspace Lorenzo Colitti
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18  1:43 UTC (permalink / raw)
  To: netdev; +Cc: edumazet, ek, maze, dtor, Lorenzo Colitti

Currently, inet_diag_dump_one_icsk finds a socket and then dumps
its information to userspace. Split it into a part that finds the
socket and a part that dumps the information.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  5 +++++
 net/ipv4/inet_diag.c      | 41 ++++++++++++++++++++++++++---------------
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0..e7032f04 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -3,6 +3,7 @@
 
 #include <uapi/linux/inet_diag.h>
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a6..a4c7ef4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 				  nlmsg_flags, unlh);
 }
 
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-			    struct sk_buff *in_skb,
-			    const struct nlmsghdr *nlh,
-			    const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req)
 {
-	struct net *net = sock_net(in_skb->sk);
-	struct sk_buff *rep;
 	struct sock *sk;
-	int err;
 
-	err = -EINVAL;
 	if (req->sdiag_family == AF_INET)
 		sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
 				 req->id.idiag_dport, req->id.idiag_src[0],
@@ -375,15 +370,32 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 				  req->id.idiag_if);
 #endif
 	else
-		goto out_nosk;
+		return ERR_PTR(-EINVAL);
 
-	err = -ENOENT;
 	if (!sk)
-		goto out_nosk;
+		return ERR_PTR(-ENOENT);
 
-	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-	if (err)
-		goto out;
+	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+		sock_gen_put(sk);
+		return ERR_PTR(-ENOENT);
+	}
+
+	return sk;
+}
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+			    struct sk_buff *in_skb,
+			    const struct nlmsghdr *nlh,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *rep;
+	struct sock *sk;
+	int err;
+
+	sk = inet_diag_find_one_icsk(net, hashinfo, req);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
 
 	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 	if (!rep) {
@@ -409,7 +421,6 @@ out:
 	if (sk)
 		sock_gen_put(sk);
 
-out_nosk:
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH 2/4] net: diag: Add the ability to destroy a socket from userspace.
  2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
  2015-11-18  1:43 ` [PATCH 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
@ 2015-11-18  1:43 ` Lorenzo Colitti
  2015-11-18  1:43 ` [PATCH 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18  1:43 UTC (permalink / raw)
  To: netdev; +Cc: edumazet, ek, maze, dtor, Lorenzo Colitti

This allows a privileged userspace process, such as a connection
manager or system administration tool, to close sockets belonging
to other apps when the network they were established on has
disconnected. It is needed on laptops and mobile hosts to ensure
that network switches / disconnects do not result in applications
being blocked for long periods of time (minutes) in read or
connect calls on TCP sockets that will never succeed because the
IP address they are bound to is no longer on the system. Closing
the sockets causes these calls to fail fast and allows the apps
to reconnect on another network.

For many years Android kernels have supported this via an
out-of-tree SIOCKILLADDR ioctl that is called on every
RTM_DELADDR event, but this solution is cleaner, more robust and
more flexible: the connection manager can iterate over all
connections on the deleted IP address and close all of them. It
can also be used to close all sockets opened by a given app
process, for example if the user has restricted that app from
using the network.

This patch adds a SOCK_DESTROY operation and a destroy function
pointer to sock_diag_handler. It does not include any
implementation code.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/sock_diag.h      |  1 +
 include/uapi/linux/sock_diag.h |  1 +
 net/core/sock_diag.c           | 11 ++++++++---
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index fddebc6..e15e8e2 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -15,6 +15,7 @@ struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
 	int (*get_info)(struct sk_buff *skb, struct sock *sk);
+	int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 49230d3..bae2d80 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY 21
 
 struct sock_diag_req {
 	__u8	sdiag_family;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d..64e3d81 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int err;
 	struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	hndl = sock_diag_handlers[req->sdiag_family];
 	if (hndl == NULL)
 		err = -ENOENT;
-	else
+	else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
 		err = hndl->dump(skb, nlh);
+	else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
+		err = hndl->destroy(skb, nlh);
+	else
+		err = -EOPNOTSUPP;
 	mutex_unlock(&sock_diag_table_mutex);
 
 	return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		return ret;
 	case SOCK_DIAG_BY_FAMILY:
-		return __sock_diag_rcv_msg(skb, nlh);
+	case SOCK_DESTROY:
+		return __sock_diag_cmd(skb, nlh);
 	default:
 		return -EINVAL;
 	}
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH 3/4] net: diag: Support SOCK_DESTROY for inet sockets.
  2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
  2015-11-18  1:43 ` [PATCH 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
  2015-11-18  1:43 ` [PATCH 2/4] net: diag: Add the ability to destroy a socket from userspace Lorenzo Colitti
@ 2015-11-18  1:43 ` Lorenzo Colitti
  2015-11-18  1:43 ` [PATCH 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18  1:43 UTC (permalink / raw)
  To: netdev; +Cc: edumazet, ek, maze, dtor, Lorenzo Colitti

This passes the SOCK_DESTROY operation to the underlying
protocol diag handler, or returns -EINVAL if that handler does
not define a destroy operation.

Most of this patch is just renaming functions. This is not
strictly necessary, but it would be fairly counterintuitive to
have the code to destroy inet sockets be in a function whose name
starts with inet_diag_get.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  4 ++++
 net/ipv4/inet_diag.c      | 23 +++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index e7032f04..7c27fa1 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,6 +24,10 @@ struct inet_diag_handler {
 	void		(*idiag_get_info)(struct sock *sk,
 					  struct inet_diag_msg *r,
 					  void *info);
+
+	int		(*destroy)(struct sk_buff *in_skb,
+				   const struct inet_diag_req_v2 *req);
+
 	__u16		idiag_type;
 	__u16		idiag_info_size;
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a4c7ef4..cacbd8b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -425,7 +425,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
 			       const struct inet_diag_req_v2 *req)
 {
@@ -435,8 +435,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	handler = inet_diag_lock_handler(req->sdiag_protocol);
 	if (IS_ERR(handler))
 		err = PTR_ERR(handler);
-	else
+	else if (cmd == SOCK_DIAG_BY_FAMILY)
 		err = handler->dump_one(in_skb, nlh, req);
+	else if (cmd == SOCK_DESTROY && handler->destroy)
+		err = handler->destroy(in_skb, req);
+	else
+		err = -EOPNOTSUPP;
 	inet_diag_unlock_handler(handler);
 
 	return err;
@@ -949,7 +953,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 	req.idiag_states = rc->idiag_states;
 	req.id = rc->id;
 
-	return inet_diag_get_exact(in_skb, nlh, &req);
+	return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
 }
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -983,7 +987,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact_compat(skb, nlh);
 }
 
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 {
 	int hdrlen = sizeof(struct inet_diag_req_v2);
 	struct net *net = sock_net(skb->sk);
@@ -991,7 +995,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
 
-	if (h->nlmsg_flags & NLM_F_DUMP) {
+	if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+	    h->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(h, hdrlen)) {
 			struct nlattr *attr;
 
@@ -1010,7 +1015,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 		}
 	}
 
-	return inet_diag_get_exact(skb, h, nlmsg_data(h));
+	return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
 }
 
 static
@@ -1061,14 +1066,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
 
 static const struct sock_diag_handler inet_diag_handler = {
 	.family = AF_INET,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
 	.family = AF_INET6,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH 4/4] net: diag: Support destroying TCP sockets.
  2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
                   ` (2 preceding siblings ...)
  2015-11-18  1:43 ` [PATCH 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
@ 2015-11-18  1:43 ` Lorenzo Colitti
  2015-11-18  3:43   ` kbuild test robot
  2015-11-18  4:25   ` kbuild test robot
  2015-11-18  3:27 ` Add a SOCK_DESTROY operation to close sockets from userspace Stephen Hemminger
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18  1:43 UTC (permalink / raw)
  To: netdev; +Cc: edumazet, ek, maze, dtor, Lorenzo Colitti

This implements SOCK_DESTROY for TCP sockets. It causes all
blocking calls on the socket to fail fast with ETIMEDOUT, which
is the same thing that would eventually happen if the socket was
left stuck on an IP address that the host no longer has.

Change-Id: Icce9db8b832e84c9b6477e5a901c927b942f2bb9
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 net/ipv4/tcp_diag.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index b316040..867159c 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
 #include <linux/inet_diag.h>
 
 #include <linux/tcp.h>
@@ -46,12 +47,52 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
 
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct sock *sk;
+	struct net *net = sock_net(in_skb->sk);
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	if (!sk_fullsock(sk)) {
+		sock_gen_put(sk);
+		return -EOPNOTSUPP;
+	}
+
+	/* Don't race with userspace socket closes such as tcp_close. */
+	lock_sock(sk);
+
+	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		smp_wmb();  /* Be consistent with tcp_reset */
+		sk->sk_err = ETIMEDOUT;
+		sk->sk_error_report(sk);
+		tcp_done(sk);
+	}
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+
 static const struct inet_diag_handler tcp_diag_handler = {
 	.dump		 = tcp_diag_dump,
 	.dump_one	 = tcp_diag_dump_one,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
 	.idiag_info_size = sizeof(struct tcp_info),
+	.destroy	 = tcp_diag_destroy,
 };
 
 static int __init tcp_diag_init(void)
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
                   ` (3 preceding siblings ...)
  2015-11-18  1:43 ` [PATCH 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
@ 2015-11-18  3:27 ` Stephen Hemminger
       [not found]   ` <CAAedzxqiXnKzCyevNipNnXEc_+TEjnVphLfseTo4ykZ8SAVt_w@mail.gmail.com>
                     ` (2 more replies)
  2015-11-18  3:56 ` Tom Herbert
  2015-11-18 10:12 ` Hannes Frederic Sowa
  6 siblings, 3 replies; 110+ messages in thread
From: Stephen Hemminger @ 2015-11-18  3:27 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev, edumazet, ek, maze, dtor

On Wed, 18 Nov 2015 10:43:40 +0900
Lorenzo Colitti <lorenzo@google.com> wrote:

> This patch series adds the ability for a privileged process to
> destroy sockets belonging to other userspace processes via the
> sock_diag interface, and implements that for TCP sockets.
> 
> This functionality is needed on laptops and mobile hosts to
> ensure that network switches / disconnects do not result in
> applications being blocked for long periods of time (minutes) in
> read or connect calls on TCP sockets that will never succeed
> because the IP address they are bound to is gone. Closing the
> sockets in the protocol layer causes these calls to fail fast and
> allows applications to reconnect on another network.
> 
> For many years Android kernels have done this via an out-of-tree
> SIOCKILLADDR ioctl that is called when networks disconnect, but
> this solution is cleaner, more robust and more flexible. The
> system can iterate over all connections on the deleted IP address
> and close all of them. But it can also close all sockets opened
> by a given process on a given network, for example if the user
> has restricted that process from using that network, or if a
> secure network such as a VPN is now being applied to the
> application and thus previously-established connections are
> blackholed.
> 
> The patch series only implements SOCK_DESTROY for TCP sockets,
> but the mechanism can be extended to any protocol family that
> supports the sock_diag interface.
> 

I understand why you might want this, but it smells like the same
kind of problems that the "forced unmount" patch had which eventually
led to it not being accepted in mainline.  Lots of corner
cases and race conditions waiting to blow up.

Look at the issues that the multi-thread socket close has.
This looks worse.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
       [not found]   ` <CAAedzxqiXnKzCyevNipNnXEc_+TEjnVphLfseTo4ykZ8SAVt_w@mail.gmail.com>
@ 2015-11-18  3:36     ` Erik Kline
  0 siblings, 0 replies; 110+ messages in thread
From: Erik Kline @ 2015-11-18  3:36 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Lorenzo Colitti, netdev, Eric Dumazet, Maciej Żenczykowski, dtor

On 18 November 2015 at 12:34, Erik Kline <ek@google.com> wrote:
>
>
> On 18 November 2015 at 12:27, Stephen Hemminger <stephen@networkplumber.org>
> wrote:
>>
>> On Wed, 18 Nov 2015 10:43:40 +0900
>> Lorenzo Colitti <lorenzo@google.com> wrote:
>>
>> > This patch series adds the ability for a privileged process to
>> > destroy sockets belonging to other userspace processes via the
>> > sock_diag interface, and implements that for TCP sockets.
>> >
>> > This functionality is needed on laptops and mobile hosts to
>> > ensure that network switches / disconnects do not result in
>> > applications being blocked for long periods of time (minutes) in
>> > read or connect calls on TCP sockets that will never succeed
>> > because the IP address they are bound to is gone. Closing the
>> > sockets in the protocol layer causes these calls to fail fast and
>> > allows applications to reconnect on another network.
>> >
>> > For many years Android kernels have done this via an out-of-tree
>> > SIOCKILLADDR ioctl that is called when networks disconnect, but
>> > this solution is cleaner, more robust and more flexible. The
>> > system can iterate over all connections on the deleted IP address
>> > and close all of them. But it can also close all sockets opened
>> > by a given process on a given network, for example if the user
>> > has restricted that process from using that network, or if a
>> > secure network such as a VPN is now being applied to the
>> > application and thus previously-established connections are
>> > blackholed.
>> >
>> > The patch series only implements SOCK_DESTROY for TCP sockets,
>> > but the mechanism can be extended to any protocol family that
>> > supports the sock_diag interface.
>> >
>>
>> I understand why you might want this, but it smells like the same
>> kind of problems that the "forced unmount" patch had which eventually
>> led to it not being accepted in mainline.  Lots of corner
>> cases and race conditions waiting to blow up.
>>
>> Look at the issues that the multi-thread socket close has.
>> This looks worse.
>
>
> I'm unsure of the specific issue to which you refer with "multi-thread
> socket close".  This is basically just a user-space forced tcp_close(),
> leaving the file descriptor still valid in the user context for the
> application to manage (alternatively: it aims to be the same as if a
> correctly crafted TCP RST had arrived).

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH 4/4] net: diag: Support destroying TCP sockets.
  2015-11-18  1:43 ` [PATCH 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
@ 2015-11-18  3:43   ` kbuild test robot
  2015-11-18  4:46     ` Lorenzo Colitti
  2015-11-18  4:25   ` kbuild test robot
  1 sibling, 1 reply; 110+ messages in thread
From: kbuild test robot @ 2015-11-18  3:43 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: kbuild-all, netdev, edumazet, ek, maze, dtor, Lorenzo Colitti

[-- Attachment #1: Type: text/plain, Size: 637 bytes --]

Hi Lorenzo,

[auto build test ERROR on net/master]
[also build test ERROR on v4.4-rc1 next-20151117]

url:    https://github.com/0day-ci/linux/commits/Lorenzo-Colitti/net-diag-split-inet_diag_dump_one_icsk-into-two/20151118-094638
config: x86_64-rhel (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> ERROR: "inet_diag_find_one_icsk" [net/ipv4/tcp_diag.ko] undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 35596 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
                   ` (4 preceding siblings ...)
  2015-11-18  3:27 ` Add a SOCK_DESTROY operation to close sockets from userspace Stephen Hemminger
@ 2015-11-18  3:56 ` Tom Herbert
  2015-11-18  4:23   ` Lorenzo Colitti
  2015-11-18 10:12 ` Hannes Frederic Sowa
  6 siblings, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-11-18  3:56 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Linux Kernel Network Developers, Eric Dumazet, Erik Kline, maze, dtor

On Tue, Nov 17, 2015 at 5:43 PM, Lorenzo Colitti <lorenzo@google.com> wrote:
> This patch series adds the ability for a privileged process to
> destroy sockets belonging to other userspace processes via the
> sock_diag interface, and implements that for TCP sockets.
>
> This functionality is needed on laptops and mobile hosts to
> ensure that network switches / disconnects do not result in
> applications being blocked for long periods of time (minutes) in
> read or connect calls on TCP sockets that will never succeed
> because the IP address they are bound to is gone. Closing the
> sockets in the protocol layer causes these calls to fail fast and
> allows applications to reconnect on another network.
>
> For many years Android kernels have done this via an out-of-tree
> SIOCKILLADDR ioctl that is called when networks disconnect, but
> this solution is cleaner, more robust and more flexible. The
> system can iterate over all connections on the deleted IP address
> and close all of them. But it can also close all sockets opened
> by a given process on a given network, for example if the user
> has restricted that process from using that network, or if a
> secure network such as a VPN is now being applied to the
> application and thus previously-established connections are
> blackholed.
>
> The patch series only implements SOCK_DESTROY for TCP sockets,
> but the mechanism can be extended to any protocol family that
> supports the sock_diag interface.
>
I assume that SIOCKILLADDR was restricted to only closing connections
related to add addresses going away, but SOCK_DESTROY seems to allow
arbitrarily killing connections without publicized cause. This
interface, even though it is for a privileged user, should be no more
powerful than it needs to be. Minimally, the application should get at
least get a clear error that the local host administratively killed
the connection, ETIMEDOUT does not provide that.

Tom

>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  3:27 ` Add a SOCK_DESTROY operation to close sockets from userspace Stephen Hemminger
       [not found]   ` <CAAedzxqiXnKzCyevNipNnXEc_+TEjnVphLfseTo4ykZ8SAVt_w@mail.gmail.com>
@ 2015-11-18  3:57   ` Maciej Żenczykowski
  2015-11-18 11:56     ` David Laight
  2015-11-18  4:04   ` Eric Dumazet
  2 siblings, 1 reply; 110+ messages in thread
From: Maciej Żenczykowski @ 2015-11-18  3:57 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Lorenzo Colitti, Linux NetDev, Eric Dumazet, ek, dtor

I don't know what the right fix is...

However, speaking as an end user with laptops on wifi and/or home
gateways on dialup connections where the IP address occasionally (or
constantly) changes, I find it very frustrating that by default as IP
addresses get removed from interfaces all the related state (whether
conntrack or open connections) doesn't get cleaned up.

[side note: I realize there is tooling to do this manually from
userspace for conntrack and that there are even some gateways that
correctly make use of it.]

Sure this might not be desirable on servers (where configuration is
usually static and complex) but on most end user devices (CPE, cell
phone, laptop) - that are prone to roaming in todays world - this (or
something like this) would be super useful.

I would almost argue it should be the default (or controlled by
sysctl) - hung connections are super frustrating, and they often
prevent normal retry logic (ie. establishing a new connection) from
functioning correctly, because the kernel is just waiting for some
enormous tcp (retransmit) timeouts that only make sense if we still
own the IP, and userspace thinks everything is still ok...  If we
don't even own the IP any more often retransmits just get blackholed
so you don't even get notifications from the network of packet
delivery problems.

Something like this either needs to be implemented in kernel, or APIs
need to be provided so that network manager (or your favourite
userspace network management utility) can act on behalf of the user to
clean stuff up.

In general I'm not in favour of embedding logic like this in the
kernel since you usually get more configurability if you leave it in
userspace.

Sure you can hack something together via firewall hacks and routing
hacks or injecting tcp resets via raw sockets, but that requires a lot
of work, and still doesn't cover everything (firewall and routing
hacks won't fix idle sockets, in particular those waiting for a
message from the other side of the connection, ie. push notifications
for a cell phone).

- Maciej

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  3:27 ` Add a SOCK_DESTROY operation to close sockets from userspace Stephen Hemminger
       [not found]   ` <CAAedzxqiXnKzCyevNipNnXEc_+TEjnVphLfseTo4ykZ8SAVt_w@mail.gmail.com>
  2015-11-18  3:57   ` Maciej Żenczykowski
@ 2015-11-18  4:04   ` Eric Dumazet
  2015-11-18 10:19     ` Hannes Frederic Sowa
  2 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-18  4:04 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Lorenzo Colitti, netdev, edumazet, ek, maze, dtor

On Tue, 2015-11-17 at 19:27 -0800, Stephen Hemminger wrote:

> I understand why you might want this, but it smells like the same
> kind of problems that the "forced unmount" patch had which eventually
> led to it not being accepted in mainline.  Lots of corner
> cases and race conditions waiting to blow up.

Well, disconnecting a TCP socket seems straightforward, once you get a
sk pointer.

Code looks good.

> 
> Look at the issues that the multi-thread socket close has.
> This looks worse.

I do not see a problem here. A RST packet has roughly same effect, and
we do process them.

Cookies are 64bits and uniquely identify a socket.

Once you make sure the request comes from a privileged user, we are
good.

This user could easily install some iptables rules to generate RST
packets anyway.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  3:56 ` Tom Herbert
@ 2015-11-18  4:23   ` Lorenzo Colitti
  2015-11-18  4:31     ` Tom Herbert
  0 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18  4:23 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015 at 12:56 PM, Tom Herbert <tom@herbertland.com> wrote:
>> The patch series only implements SOCK_DESTROY for TCP sockets,
>> but the mechanism can be extended to any protocol family that
>> supports the sock_diag interface.
>>
> I assume that SIOCKILLADDR was restricted to only closing connections
> related to add addresses going away, but SOCK_DESTROY seems to allow
> arbitrarily killing connections without publicized cause.

Closing connections on deleted IP addresses is the main use of
SIOCKILLADDR, but the ioctl also supports closing all connections on a
userspace-provided IP address.

However, those semantics are not enough in some cases. For example:
when a VPN client connects, for security reasons (e.g., to make it
harder for on-link attackers to attack cleartext apps), traffic from
unprivileged applications is no longer permitted on non-VPN
interfaces. All unprivileged traffic is routed over the VPN.
Currently, existing TCP connections just hang, which typically annoys
the user. It would be better if we had the ability to close those
connections with SOCK_DESTROY.

SIOCKILLADDR cannot do that because it is inappropriate to
indiscriminately kill TCP connections that are not on the VPN - if
nothing else, one of those TCP connections could be the one that is
providing the VPN itself.

> Minimally, the application should get at
> least get a clear error that the local host administratively killed
> the connection, ETIMEDOUT does not provide that.

I don't know why ETIMEDOUT was chosen (we've been running this since
2008), but I think it's because:

1. All Internet applications have to be prepared to receive ETIMEDOUT
on TCP sockets.
2. ETIMEDOUT is the error that those applications would have gotten
anyway if they had waited a few minutes.

It's certainly possible to use something other than ETIMEDOUT, though
I don't know what error code would be appropriate. Perhaps we can make
it configurable by the caller.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH 4/4] net: diag: Support destroying TCP sockets.
  2015-11-18  1:43 ` [PATCH 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
  2015-11-18  3:43   ` kbuild test robot
@ 2015-11-18  4:25   ` kbuild test robot
  1 sibling, 0 replies; 110+ messages in thread
From: kbuild test robot @ 2015-11-18  4:25 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: kbuild-all, netdev, edumazet, ek, maze, dtor, Lorenzo Colitti

[-- Attachment #1: Type: text/plain, Size: 618 bytes --]

Hi Lorenzo,

[auto build test ERROR on net/master]
[also build test ERROR on v4.4-rc1 next-20151117]

url:    https://github.com/0day-ci/linux/commits/Lorenzo-Colitti/net-diag-split-inet_diag_dump_one_icsk-into-two/20151118-094638
config: i386-allmodconfig (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "inet_diag_find_one_icsk" undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 52602 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  4:23   ` Lorenzo Colitti
@ 2015-11-18  4:31     ` Tom Herbert
  0 siblings, 0 replies; 110+ messages in thread
From: Tom Herbert @ 2015-11-18  4:31 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski, Dmitry Torokhov

On Tue, Nov 17, 2015 at 8:23 PM, Lorenzo Colitti <lorenzo@google.com> wrote:
> On Wed, Nov 18, 2015 at 12:56 PM, Tom Herbert <tom@herbertland.com> wrote:
>>> The patch series only implements SOCK_DESTROY for TCP sockets,
>>> but the mechanism can be extended to any protocol family that
>>> supports the sock_diag interface.
>>>
>> I assume that SIOCKILLADDR was restricted to only closing connections
>> related to add addresses going away, but SOCK_DESTROY seems to allow
>> arbitrarily killing connections without publicized cause.
>
> Closing connections on deleted IP addresses is the main use of
> SIOCKILLADDR, but the ioctl also supports closing all connections on a
> userspace-provided IP address.
>
> However, those semantics are not enough in some cases. For example:
> when a VPN client connects, for security reasons (e.g., to make it
> harder for on-link attackers to attack cleartext apps), traffic from
> unprivileged applications is no longer permitted on non-VPN
> interfaces. All unprivileged traffic is routed over the VPN.
> Currently, existing TCP connections just hang, which typically annoys
> the user. It would be better if we had the ability to close those
> connections with SOCK_DESTROY.
>
> SIOCKILLADDR cannot do that because it is inappropriate to
> indiscriminately kill TCP connections that are not on the VPN - if
> nothing else, one of those TCP connections could be the one that is
> providing the VPN itself.
>
>> Minimally, the application should get at
>> least get a clear error that the local host administratively killed
>> the connection, ETIMEDOUT does not provide that.
>
> I don't know why ETIMEDOUT was chosen (we've been running this since
> 2008), but I think it's because:
>
> 1. All Internet applications have to be prepared to receive ETIMEDOUT
> on TCP sockets.
> 2. ETIMEDOUT is the error that those applications would have gotten
> anyway if they had waited a few minutes.
>
> It's certainly possible to use something other than ETIMEDOUT, though
> I don't know what error code would be appropriate. Perhaps we can make
> it configurable by the caller.

How about ENETRESET?

Also, it would be nice if the part about doing the close on the
protocol socket were abstracted out as protoop in itself, maybe
something like admin_close op. We'll have other use cases where the
stack wants to close a socket outside of user request or normal
protocol operations (KCM does this for instance).

Tom

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH 4/4] net: diag: Support destroying TCP sockets.
  2015-11-18  3:43   ` kbuild test robot
@ 2015-11-18  4:46     ` Lorenzo Colitti
  0 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18  4:46 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, netdev, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015 at 12:43 PM, kbuild test robot <lkp@intel.com> wrote:
> All errors (new ones prefixed by >>):
>
>>> ERROR: "inet_diag_find_one_icsk" [net/ipv4/tcp_diag.ko] undefined!

Oops. Missing export. Should be fixed in v2.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
                   ` (5 preceding siblings ...)
  2015-11-18  3:56 ` Tom Herbert
@ 2015-11-18 10:12 ` Hannes Frederic Sowa
  6 siblings, 0 replies; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 10:12 UTC (permalink / raw)
  To: Lorenzo Colitti, netdev; +Cc: edumazet, ek, maze, dtor

Hello,

On Wed, Nov 18, 2015, at 02:43, Lorenzo Colitti wrote:
> This patch series adds the ability for a privileged process to
> destroy sockets belonging to other userspace processes via the
> sock_diag interface, and implements that for TCP sockets.
> 
> This functionality is needed on laptops and mobile hosts to
> ensure that network switches / disconnects do not result in
> applications being blocked for long periods of time (minutes) in
> read or connect calls on TCP sockets that will never succeed
> because the IP address they are bound to is gone. Closing the
> sockets in the protocol layer causes these calls to fail fast and
> allows applications to reconnect on another network.

I regularly do this with gdb, connecting to the process, looking up the
filedescriptors in /proc/pid/fd and closing the socket. Actually it will
also be removed from the poll tables and thus unlocks the system. I
think a user space approach would be preferred to do so, or some out of
band signalling (which we actually already have in terms of netlink
monitor).

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  4:04   ` Eric Dumazet
@ 2015-11-18 10:19     ` Hannes Frederic Sowa
  2015-11-18 10:47       ` Lorenzo Colitti
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 10:19 UTC (permalink / raw)
  To: Eric Dumazet, Stephen Hemminger
  Cc: Lorenzo Colitti, netdev, edumazet, ek, maze, dtor

Hi,

On Wed, Nov 18, 2015, at 05:04, Eric Dumazet wrote:
> On Tue, 2015-11-17 at 19:27 -0800, Stephen Hemminger wrote:
> 
> > I understand why you might want this, but it smells like the same
> > kind of problems that the "forced unmount" patch had which eventually
> > led to it not being accepted in mainline.  Lots of corner
> > cases and race conditions waiting to blow up.
> 
> Well, disconnecting a TCP socket seems straightforward, once you get a
> sk pointer.
> 
> Code looks good.
> 
> > 
> > Look at the issues that the multi-thread socket close has.
> > This looks worse.
> 
> I do not see a problem here. A RST packet has roughly same effect, and
> we do process them.
> 
> Cookies are 64bits and uniquely identify a socket.
> 
> Once you make sure the request comes from a privileged user, we are
> good.
> 
> This user could easily install some iptables rules to generate RST
> packets anyway.

I bet there will soon be a timewaitd which handles the not configurable
(David has rejected all those patches so far) timeout of TIME_WAIT
sockets. And I bet it will be used. :/

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 10:19     ` Hannes Frederic Sowa
@ 2015-11-18 10:47       ` Lorenzo Colitti
  2015-11-18 11:19         ` Hannes Frederic Sowa
  2015-11-18 20:35         ` David Miller
  0 siblings, 2 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18 10:47 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Eric Dumazet, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015 at 7:19 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> I bet there will soon be a timewaitd which handles the not configurable
> (David has rejected all those patches so far) timeout of TIME_WAIT
> sockets. And I bet it will be used. :/

No, SOCK_DESTROY has no effect on TCP_TIME_WAIT sockets or any other
non-full socket.

When called on any socket where sk_fullsock(sk) is false, it returns
EOPNOTSUPP because there is nothing to do. Its purpose is to interrupt
blocked userspace socket calls, not to release resources.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 10:47       ` Lorenzo Colitti
@ 2015-11-18 11:19         ` Hannes Frederic Sowa
  2015-11-18 12:54           ` Eric Dumazet
  2015-11-18 13:04           ` Lorenzo Colitti
  2015-11-18 20:35         ` David Miller
  1 sibling, 2 replies; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 11:19 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Eric Dumazet, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015, at 11:47, Lorenzo Colitti wrote:
> On Wed, Nov 18, 2015 at 7:19 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > I bet there will soon be a timewaitd which handles the not configurable
> > (David has rejected all those patches so far) timeout of TIME_WAIT
> > sockets. And I bet it will be used. :/
> 
> No, SOCK_DESTROY has no effect on TCP_TIME_WAIT sockets or any other
> non-full socket.
> 
> When called on any socket where sk_fullsock(sk) is false, it returns
> EOPNOTSUPP because there is nothing to do. Its purpose is to interrupt
> blocked userspace socket calls, not to release resources.

Okay, thanks for clarification! Still I don't see how you enter
TIME_WAIT in case you kill an active socket. At least my start-up idea
timewaitd seems to be not implementable. ;)

I was wondering why you didn't use tcp_close function, because still we
could have the address and we would like to do a proper shutdown of the
connection. While this patchset wants to tear down sockets for addresses
no longer alive, it still can be used with full sockets.

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* RE: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18  3:57   ` Maciej Żenczykowski
@ 2015-11-18 11:56     ` David Laight
  0 siblings, 0 replies; 110+ messages in thread
From: David Laight @ 2015-11-18 11:56 UTC (permalink / raw)
  To: 'Maciej Zenczykowski', Stephen Hemminger
  Cc: Lorenzo Colitti, Linux NetDev, Eric Dumazet, ek, dtor

From: Maciej Zenczykowski
> Sent: 18 November 2015 03:57
> I don't know what the right fix is...
> 
> However, speaking as an end user with laptops on wifi and/or home
> gateways on dialup connections where the IP address occasionally (or
> constantly) changes, I find it very frustrating that by default as IP
> addresses get removed from interfaces all the related state (whether
> conntrack or open connections) doesn't get cleaned up.

Right, but I don't want it happening when an interface temporarily
goes down for some reason.
Trip over the cable to a USB network interface and you want the connections
to still be alive when you plug it back in.
We've had to put USB interfaces into a 'bond' to get a stable address.

Now, if you have a DHCP assigned address you may want to force a
lease renewal after a network 'glitch'. If you get back a different
address then it might be reasonable for the stack to error any
connections using the old address - after all another system
could be using the old IP address.

I suspect that the only way to avoid horrid timing windows is
to simulate the receipt of RST packets.

	David


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 11:19         ` Hannes Frederic Sowa
@ 2015-11-18 12:54           ` Eric Dumazet
  2015-11-18 13:04           ` Lorenzo Colitti
  1 sibling, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-11-18 12:54 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Lorenzo Colitti, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, 2015-11-18 at 12:19 +0100, Hannes Frederic Sowa wrote:
> On Wed, Nov 18, 2015, at 11:47, Lorenzo Colitti wrote:
> > On Wed, Nov 18, 2015 at 7:19 PM, Hannes Frederic Sowa
> > <hannes@stressinduktion.org> wrote:
> > > I bet there will soon be a timewaitd which handles the not configurable
> > > (David has rejected all those patches so far) timeout of TIME_WAIT
> > > sockets. And I bet it will be used. :/
> > 
> > No, SOCK_DESTROY has no effect on TCP_TIME_WAIT sockets or any other
> > non-full socket.
> > 
> > When called on any socket where sk_fullsock(sk) is false, it returns
> > EOPNOTSUPP because there is nothing to do. Its purpose is to interrupt
> > blocked userspace socket calls, not to release resources.
> 
> Okay, thanks for clarification! Still I don't see how you enter
> TIME_WAIT in case you kill an active socket. At least my start-up idea
> timewaitd seems to be not implementable. ;)
> 
> I was wondering why you didn't use tcp_close function, because still we
> could have the address and we would like to do a proper shutdown of the
> connection. While this patchset wants to tear down sockets for addresses
> no longer alive, it still can be used with full sockets.

I guess this could work, but we would have to also propose a mechanism
where no FIN/RST message is sent.

This might be the time for upstreaming our TCP_SILENT_CLOSE
implementation.

/* on close(): free sock, no FIN/RST */

It seems there is a lot of confusion on this thread about these patches.

Proposal is _not_ changing TCP stack behavior, as David Laight seems to
fear.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 11:19         ` Hannes Frederic Sowa
  2015-11-18 12:54           ` Eric Dumazet
@ 2015-11-18 13:04           ` Lorenzo Colitti
  2015-11-18 13:31             ` Hannes Frederic Sowa
  1 sibling, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18 13:04 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Eric Dumazet, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015 at 8:19 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> I was wondering why you didn't use tcp_close function, because still we
> could have the address and we would like to do a proper shutdown of the
> connection. While this patchset wants to tear down sockets for addresses
> no longer alive, it still can be used with full sockets.

>From the perspective of the TCP state machine, there's not much
difference. In most TCP states, tcp_close takes the socket straight to
TCP_CLOSE (not into TCP_TIME_WAIT).

There is a difference in that tcp_close() sends a RST by calling
tcp_send_active_reset. We could make tcp_diag_destroy do that too. Not
sure it's worth it because in most of the the cases where you'd want
to use SOCK_DESTROY (e.g., you've lost a network connection, a VPN
connected, etc.), tcp_send_active_reset is either not going to send a
RST at all or it's going send on the wrong network. Even if we're
still connected to the same network (e.g., in the case where you're
running "ss --kill" to close a socket instead of the bad old days
where you had to load your process in gdb and call close() from there
:-)), not sending a RST is not the end of the world, because as soon
as the peer sends us a packet we'll send a RST anyway.

In any case calling tcp_close itself won't work - that's intended for
userspace closes. It calls sock_orphan, which nulls out the
backpointer to the userspace socket structure, and assumes that there
are no userspace references to the protocol socket. If we make
SOCK_DESTROY call tcp_close without releasing the userspace
components, things blow up as soon as the app calls close().

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 13:04           ` Lorenzo Colitti
@ 2015-11-18 13:31             ` Hannes Frederic Sowa
  2015-11-18 14:45               ` Lorenzo Colitti
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 13:31 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Eric Dumazet, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

Hello,

On Wed, Nov 18, 2015, at 14:04, Lorenzo Colitti wrote:
> On Wed, Nov 18, 2015 at 8:19 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > I was wondering why you didn't use tcp_close function, because still we
> > could have the address and we would like to do a proper shutdown of the
> > connection. While this patchset wants to tear down sockets for addresses
> > no longer alive, it still can be used with full sockets.
> 
> From the perspective of the TCP state machine, there's not much
> difference. In most TCP states, tcp_close takes the socket straight to
> TCP_CLOSE (not into TCP_TIME_WAIT).

Active close will end up in time wait in the end anyway (with some
exceptions).

> There is a difference in that tcp_close() sends a RST by calling
> tcp_send_active_reset. We could make tcp_diag_destroy do that too. Not
> sure it's worth it because in most of the the cases where you'd want
> to use SOCK_DESTROY (e.g., you've lost a network connection, a VPN
> connected, etc.), tcp_send_active_reset is either not going to send a
> RST at all or it's going send on the wrong network. Even if we're
> still connected to the same network (e.g., in the case where you're
> running "ss --kill" to close a socket instead of the bad old days
> where you had to load your process in gdb and call close() from there
> :-)), not sending a RST is not the end of the world, because as soon
> as the peer sends us a packet we'll send a RST anyway.

Ack. I don't think it makes sense to provide a FIN/RST less way of
closing a socket, just invoke a shutdown() from an interface might be
okayish IMHO.

> In any case calling tcp_close itself won't work - that's intended for
> userspace closes. It calls sock_orphan, which nulls out the
> backpointer to the userspace socket structure, and assumes that there
> are no userspace references to the protocol socket. If we make
> SOCK_DESTROY call tcp_close without releasing the userspace
> components, things blow up as soon as the app calls close().

I was not saying using tcp_close literally, sorry for not making that
clear, but just model the state transitions after tcp_close. At least it
seems like a normal close to me.

Thanks,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 13:31             ` Hannes Frederic Sowa
@ 2015-11-18 14:45               ` Lorenzo Colitti
  2015-11-18 14:56                 ` Hannes Frederic Sowa
  0 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-18 14:45 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Eric Dumazet, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015 at 10:31 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> I was not saying using tcp_close literally, sorry for not making that
> clear, but just model the state transitions after tcp_close. At least it
> seems like a normal close to me.

But it shouldn't be a normal close. Consider calling SOCK_DESTROY on a
socket that is streaming data to a peer. If SOCK_DESTROY results in
the kernel sending a FIN, the remote side might think that the sender
closed the connection gracefully, even though the local side aborted
the connection.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 14:45               ` Lorenzo Colitti
@ 2015-11-18 14:56                 ` Hannes Frederic Sowa
  2015-11-18 15:16                   ` Eric Dumazet
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 14:56 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Eric Dumazet, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015, at 15:45, Lorenzo Colitti wrote:
> On Wed, Nov 18, 2015 at 10:31 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > I was not saying using tcp_close literally, sorry for not making that
> > clear, but just model the state transitions after tcp_close. At least it
> > seems like a normal close to me.
> 
> But it shouldn't be a normal close. Consider calling SOCK_DESTROY on a
> socket that is streaming data to a peer. If SOCK_DESTROY results in
> the kernel sending a FIN, the remote side might think that the sender
> closed the connection gracefully, even though the local side aborted
> the connection.

Oh, yes, I understand. The connection wasn't closed by the application
but by the administrator forcefully. So we should never indicate a
successful TCP shutdown with FIN but with RST. A TIME_WAIT period
actuallty still seems useful to me, maybe with different semantics, only
RST incoming data?

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 14:56                 ` Hannes Frederic Sowa
@ 2015-11-18 15:16                   ` Eric Dumazet
  2015-11-18 15:32                     ` Hannes Frederic Sowa
  0 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-18 15:16 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Lorenzo Colitti, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, 2015-11-18 at 15:56 +0100, Hannes Frederic Sowa wrote:
> On Wed, Nov 18, 2015, at 15:45, Lorenzo Colitti wrote:
> > On Wed, Nov 18, 2015 at 10:31 PM, Hannes Frederic Sowa
> > <hannes@stressinduktion.org> wrote:
> > > I was not saying using tcp_close literally, sorry for not making that
> > > clear, but just model the state transitions after tcp_close. At least it
> > > seems like a normal close to me.
> > 
> > But it shouldn't be a normal close. Consider calling SOCK_DESTROY on a
> > socket that is streaming data to a peer. If SOCK_DESTROY results in
> > the kernel sending a FIN, the remote side might think that the sender
> > closed the connection gracefully, even though the local side aborted
> > the connection.
> 
> Oh, yes, I understand. The connection wasn't closed by the application
> but by the administrator forcefully. So we should never indicate a
> successful TCP shutdown with FIN but with RST. A TIME_WAIT period
> actuallty still seems useful to me, maybe with different semantics, only
> RST incoming data?

There is some confusion.

TIME_WAIT are used to be able to send ACK packets to incoming valid
packets.

To send RST, you need nothing at all.

TIME_WAIT are hints for normal ending connections, to handle old packets
(and or duplicates)

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 15:16                   ` Eric Dumazet
@ 2015-11-18 15:32                     ` Hannes Frederic Sowa
  2015-11-18 15:33                       ` Hannes Frederic Sowa
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 15:32 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Lorenzo Colitti, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015, at 16:16, Eric Dumazet wrote:
> On Wed, 2015-11-18 at 15:56 +0100, Hannes Frederic Sowa wrote:
> > On Wed, Nov 18, 2015, at 15:45, Lorenzo Colitti wrote:
> > > On Wed, Nov 18, 2015 at 10:31 PM, Hannes Frederic Sowa
> > > <hannes@stressinduktion.org> wrote:
> > > > I was not saying using tcp_close literally, sorry for not making that
> > > > clear, but just model the state transitions after tcp_close. At least it
> > > > seems like a normal close to me.
> > > 
> > > But it shouldn't be a normal close. Consider calling SOCK_DESTROY on a
> > > socket that is streaming data to a peer. If SOCK_DESTROY results in
> > > the kernel sending a FIN, the remote side might think that the sender
> > > closed the connection gracefully, even though the local side aborted
> > > the connection.
> > 
> > Oh, yes, I understand. The connection wasn't closed by the application
> > but by the administrator forcefully. So we should never indicate a
> > successful TCP shutdown with FIN but with RST. A TIME_WAIT period
> > actuallty still seems useful to me, maybe with different semantics, only
> > RST incoming data?
> 
> There is some confusion.
> 
> TIME_WAIT are used to be able to send ACK packets to incoming valid
> packets.

I was only talking to prevent fast address/port reuse on the same socket
and preventing delayed packets being accepted by a 

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 15:32                     ` Hannes Frederic Sowa
@ 2015-11-18 15:33                       ` Hannes Frederic Sowa
  0 siblings, 0 replies; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 15:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Lorenzo Colitti, Stephen Hemminger, netdev, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov


On Wed, Nov 18, 2015, at 16:32, Hannes Frederic Sowa wrote:
> On Wed, Nov 18, 2015, at 16:16, Eric Dumazet wrote:
> > On Wed, 2015-11-18 at 15:56 +0100, Hannes Frederic Sowa wrote:
> > > On Wed, Nov 18, 2015, at 15:45, Lorenzo Colitti wrote:
> > > > On Wed, Nov 18, 2015 at 10:31 PM, Hannes Frederic Sowa
> > > > <hannes@stressinduktion.org> wrote:
> > > > > I was not saying using tcp_close literally, sorry for not making that
> > > > > clear, but just model the state transitions after tcp_close. At least it
> > > > > seems like a normal close to me.
> > > > 
> > > > But it shouldn't be a normal close. Consider calling SOCK_DESTROY on a
> > > > socket that is streaming data to a peer. If SOCK_DESTROY results in
> > > > the kernel sending a FIN, the remote side might think that the sender
> > > > closed the connection gracefully, even though the local side aborted
> > > > the connection.
> > > 
> > > Oh, yes, I understand. The connection wasn't closed by the application
> > > but by the administrator forcefully. So we should never indicate a
> > > successful TCP shutdown with FIN but with RST. A TIME_WAIT period
> > > actuallty still seems useful to me, maybe with different semantics, only
> > > RST incoming data?
> > 
> > There is some confusion.
> > 
> > TIME_WAIT are used to be able to send ACK packets to incoming valid
> > packets.
> 
> I was only talking to prevent fast address/port reuse on the same socket
> and preventing delayed packets being accepted by a 

sorry...

accepted by a later connection reusing the same quadrupel.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 10:47       ` Lorenzo Colitti
  2015-11-18 11:19         ` Hannes Frederic Sowa
@ 2015-11-18 20:35         ` David Miller
  2015-11-18 20:43           ` Hannes Frederic Sowa
  1 sibling, 1 reply; 110+ messages in thread
From: David Miller @ 2015-11-18 20:35 UTC (permalink / raw)
  To: lorenzo; +Cc: hannes, eric.dumazet, stephen, netdev, edumazet, ek, maze, dtor

From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 18 Nov 2015 19:47:21 +0900

> On Wed, Nov 18, 2015 at 7:19 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
>> I bet there will soon be a timewaitd which handles the not configurable
>> (David has rejected all those patches so far) timeout of TIME_WAIT
>> sockets. And I bet it will be used. :/
> 
> No, SOCK_DESTROY has no effect on TCP_TIME_WAIT sockets or any other
> non-full socket.
> 
> When called on any socket where sk_fullsock(sk) is false, it returns
> EOPNOTSUPP because there is nothing to do. Its purpose is to interrupt
> blocked userspace socket calls, not to release resources.

+1

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 20:35         ` David Miller
@ 2015-11-18 20:43           ` Hannes Frederic Sowa
  2015-11-19  3:49             ` David Miller
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-18 20:43 UTC (permalink / raw)
  To: David Miller, lorenzo
  Cc: eric.dumazet, stephen, netdev, edumazet, ek, maze, dtor

On Wed, Nov 18, 2015, at 21:35, David Miller wrote:
> From: Lorenzo Colitti <lorenzo@google.com>
> Date: Wed, 18 Nov 2015 19:47:21 +0900
> 
> > On Wed, Nov 18, 2015 at 7:19 PM, Hannes Frederic Sowa
> > <hannes@stressinduktion.org> wrote:
> >> I bet there will soon be a timewaitd which handles the not configurable
> >> (David has rejected all those patches so far) timeout of TIME_WAIT
> >> sockets. And I bet it will be used. :/
> > 
> > No, SOCK_DESTROY has no effect on TCP_TIME_WAIT sockets or any other
> > non-full socket.
> > 
> > When called on any socket where sk_fullsock(sk) is false, it returns
> > EOPNOTSUPP because there is nothing to do. Its purpose is to interrupt
> > blocked userspace socket calls, not to release resources.
> 
> +1

Basically my concern is the same one I tried to express in the other
patch about Florian's patch "[PATCH -next] net: tcp: move to timewait
when receiving data post active-close": we could give the socket back
way too early so the quadruple can be reused. If timestamps are not in
use or we are dealing with NAT were we have dozens of synchronized
clocks behind the masquerading device, we could end up in accepting
delayed data. Especially this scenario can come up when the address is
actually not released but someone uses this feature on a server.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-18 20:43           ` Hannes Frederic Sowa
@ 2015-11-19  3:49             ` David Miller
  2015-11-19  5:12               ` Tom Herbert
  2015-11-19  5:13               ` Lorenzo Colitti
  0 siblings, 2 replies; 110+ messages in thread
From: David Miller @ 2015-11-19  3:49 UTC (permalink / raw)
  To: hannes; +Cc: lorenzo, eric.dumazet, stephen, netdev, edumazet, ek, maze, dtor

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 18 Nov 2015 21:43:36 +0100

> Basically my concern is the same one I tried to express in the other
> patch about Florian's patch "[PATCH -next] net: tcp: move to
> timewait when receiving data post active-close": we could give the
> socket back way too early so the quadruple can be reused. If
> timestamps are not in use or we are dealing with NAT were we have
> dozens of synchronized clocks behind the masquerading device, we
> could end up in accepting delayed data. Especially this scenario can
> come up when the address is actually not released but someone uses
> this feature on a server.

Ok, these are legitimate concerns.

What if we implemented this the other way.  The operations that make
the sockets no longer connected to the world, close them.  The route
delete during address removal does the socket scan and then the done
calls on those sockets.

Likewise a VPN or network realm/namespace configuration change can do
similarly.

That way we are guaranteed to only tcp_done() these sockets strictly
in situations where we know that they have been fully disconnected
from the network.

The more I think about it more the more I agree with him and dislike
having user space make sure "it's ok", that isn't where TCP protocol
semantic rules are implemented.  It belongs in the kernel.

Whether we do this or not, that's the policy part and userland can
therefore tell us what it wants when it removes addresses or whatever.

But userland should not be doing the socket scan and triggering the
closes, _no_ _way_.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19  3:49             ` David Miller
@ 2015-11-19  5:12               ` Tom Herbert
  2015-11-19 15:54                 ` Hannes Frederic Sowa
  2015-11-19  5:13               ` Lorenzo Colitti
  1 sibling, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-11-19  5:12 UTC (permalink / raw)
  To: David Miller
  Cc: Hannes Frederic Sowa, Lorenzo Colitti, Eric Dumazet,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Maciej Żenczykowski, Dmitry Torokhov

On Wed, Nov 18, 2015 at 7:49 PM, David Miller <davem@davemloft.net> wrote:
> From: Hannes Frederic Sowa <hannes@stressinduktion.org>
> Date: Wed, 18 Nov 2015 21:43:36 +0100
>
>> Basically my concern is the same one I tried to express in the other
>> patch about Florian's patch "[PATCH -next] net: tcp: move to
>> timewait when receiving data post active-close": we could give the
>> socket back way too early so the quadruple can be reused. If
>> timestamps are not in use or we are dealing with NAT were we have
>> dozens of synchronized clocks behind the masquerading device, we
>> could end up in accepting delayed data. Especially this scenario can
>> come up when the address is actually not released but someone uses
>> this feature on a server.
>
> Ok, these are legitimate concerns.
>
> What if we implemented this the other way.  The operations that make
> the sockets no longer connected to the world, close them.  The route
> delete during address removal does the socket scan and then the done
> calls on those sockets.
>
> Likewise a VPN or network realm/namespace configuration change can do
> similarly.
>
> That way we are guaranteed to only tcp_done() these sockets strictly
> in situations where we know that they have been fully disconnected
> from the network.
>
> The more I think about it more the more I agree with him and dislike
> having user space make sure "it's ok", that isn't where TCP protocol
> semantic rules are implemented.  It belongs in the kernel.
>
> Whether we do this or not, that's the policy part and userland can
> therefore tell us what it wants when it removes addresses or whatever.
>
> But userland should not be doing the socket scan and triggering the
> closes, _no_ _way_.

I think this solution presumes some out of band signaling about a path
failure deep in the network that is not reported via the TCP
connection. This solution is obviously only as good as the signaling,
but clearly the most general solution to maintaining reachability has
been (and unfortunately will probably continue to be) application
keepalives.

btw, the commit log mentioned that the long timeout problem applies to
sockets doing reads or connects. For connects this should be
uninteresting since the connect can be configured for a quick timeout.

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19  3:49             ` David Miller
  2015-11-19  5:12               ` Tom Herbert
@ 2015-11-19  5:13               ` Lorenzo Colitti
  2015-11-19  5:53                 ` David Miller
  1 sibling, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-19  5:13 UTC (permalink / raw)
  To: David Miller
  Cc: Hannes Frederic Sowa, Eric Dumazet, Stephen Hemminger, netdev,
	Eric Dumazet, Erik Kline, Maciej Żenczykowski,
	Dmitry Torokhov

On Thu, Nov 19, 2015 at 12:49 PM, David Miller <davem@davemloft.net> wrote:
> What if we implemented this the other way.  The operations that make
> the sockets no longer connected to the world, close them.  The route
> delete during address removal does the socket scan and then the done
> calls on those sockets.

In many cases it's not that simple. Routing can be as complex as the
RPDB allows it to be, and in general the kernel cannot know if a
socket is routable or not. As an example, a system might use
mark-based routing, like so:

100 from all fwmark aaaa/0xffff lookup wifi
200 from all fwmark bbbb/0xffff lookup cell
9999 from all lookup wifi

(This is the basic idea of what Android >= 5.0 does). Suppose that a
VPN connects and routing needs to be moved to the VPN. The system
might implement this by adding the following rule:

50 from all fwmark 0x0/0x10000 lookup vpn

Now all sockets where the fwmark matches aaaa/0x1ffff are dead in the
water. They have the wifi source address, but they are routed to the
VPN and go nowhere. The system can't remove the wifi rule or take wifi
down, because the VPN socket itself (which will have a mark of
0x1aaaa/0x1000) needs to continue to work on wifi. It can't route
those sockets over wifi, because the user expects that the VPN is
securing all network traffic.

In this situation, even if the kernel were to examine all sockets when
the rule is added, how would it know that sockets with a mark of 1aaaa
should now be closed? The IP address is still there. Routing lookups
on those sockets will succeed just fine - they just now point to the
VPN, which doesn't work.

> The more I think about it more the more I agree with him and dislike
> having user space make sure "it's ok", that isn't where TCP protocol
> semantic rules are implemented.  It belongs in the kernel.

Today any app can always, on one of its sockets, set SO_LINGER with a
timeout of 0 and call tcp_close. That results in immediately sending a
RST and forgetting about local state. (Those semantics are the ones of
RFC 793 ABORT.) If SOCK_DESTROY did that instead of just calling
tcp_done, would that be acceptable?

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19  5:13               ` Lorenzo Colitti
@ 2015-11-19  5:53                 ` David Miller
  2015-11-19  7:19                   ` Maciej Żenczykowski
  2015-11-20  0:19                   ` Lorenzo Colitti
  0 siblings, 2 replies; 110+ messages in thread
From: David Miller @ 2015-11-19  5:53 UTC (permalink / raw)
  To: lorenzo; +Cc: hannes, eric.dumazet, stephen, netdev, edumazet, ek, maze, dtor

From: Lorenzo Colitti <lorenzo@google.com>
Date: Thu, 19 Nov 2015 14:13:48 +0900

> On Thu, Nov 19, 2015 at 12:49 PM, David Miller <davem@davemloft.net> wrote:
>> The more I think about it more the more I agree with him and dislike
>> having user space make sure "it's ok", that isn't where TCP protocol
>> semantic rules are implemented.  It belongs in the kernel.
> 
> Today any app can always, on one of its sockets, set SO_LINGER with a
> timeout of 0 and call tcp_close. That results in immediately sending a
> RST and forgetting about local state. (Those semantics are the ones of
> RFC 793 ABORT.) If SOCK_DESTROY did that instead of just calling
> tcp_done, would that be acceptable?

What I object to is userspace making reachability decisions, not
whether SOCK_DESTROY closes the socket in one way or the other.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19  5:53                 ` David Miller
@ 2015-11-19  7:19                   ` Maciej Żenczykowski
  2015-11-19 15:48                     ` David Miller
  2015-11-20  0:19                   ` Lorenzo Colitti
  1 sibling, 1 reply; 110+ messages in thread
From: Maciej Żenczykowski @ 2015-11-19  7:19 UTC (permalink / raw)
  To: David Miller
  Cc: Lorenzo Colitti, hannes, Eric Dumazet, stephen, Linux NetDev,
	Eric Dumazet, Erik Kline, Dmitry Torokhov

> What I object to is userspace making reachability decisions, not
> whether SOCK_DESTROY closes the socket in one way or the other.

Privileged userspace can already make these decisions today, whether
it is by killing processes with open sockets, or by turning interfaces
up and down or by reconfiguring the firewall and/or the routing
rules/tables, or by injecting spoofed TCP reset packets (via tap).
It's just *very* inconvenient to do and error prone.

Another example: privileged userspace could ptrace the userspace apps
and via code injection call close() on the app's behalf and reopen the
file descriptor to some null routed destination so it behaves like if
it was timed out / unreachable.

In general the statement that (appropriately privileged) 'userspace
should not be able to do something' is wrong - about the only
exception I can think of being crashing the machine or burning the
hardware.

The kernel's purpose in life should be to enable userspace to do things.

You make things too hard for people to do stuff via the kernel and you
end up with the kernel being out of a job.  People either implement
things entirely in userspace (userspace tcp stacks on raw sockets or
tap or udp) or switch to a different kernel or create a local fork...

I'm not saying the proposed solution is ideal (although honestly it
doesn't seem bad), but this problem does deserve a clean *kernel*
solution instead of insane userspace hacks or non vanilla Linux
patches which then propagate to a billion devices...

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19  7:19                   ` Maciej Żenczykowski
@ 2015-11-19 15:48                     ` David Miller
  2015-11-19 16:19                       ` Eric Dumazet
  0 siblings, 1 reply; 110+ messages in thread
From: David Miller @ 2015-11-19 15:48 UTC (permalink / raw)
  To: zenczykowski
  Cc: lorenzo, hannes, eric.dumazet, stephen, netdev, edumazet, ek, dtor

From: Maciej Żenczykowski <zenczykowski@gmail.com>
Date: Wed, 18 Nov 2015 23:19:03 -0800

> Privileged userspace can already make these decisions today, whether
> it is by killing processes with open sockets, or by turning interfaces
> up and down or by reconfiguring the firewall and/or the routing
> rules/tables, or by injecting spoofed TCP reset packets (via tap).
> It's just *very* inconvenient to do and error prone.
> 
> Another example: privileged userspace could ptrace the userspace apps
> and via code injection call close() on the app's behalf and reopen the
> file descriptor to some null routed destination so it behaves like if
> it was timed out / unreachable.

At least if they do it this way, and someone claims that Linux TCP
behaves outside the spec or improperly, it's not directly because of
any code I am responsible for.

That's the difference, and frankly an important one to me.

If I'm going to give userspace a direct tool by which to do things,
then it's suddenly my responsibility and my problem.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19  5:12               ` Tom Herbert
@ 2015-11-19 15:54                 ` Hannes Frederic Sowa
  2015-11-19 23:54                   ` Maciej Żenczykowski
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 15:54 UTC (permalink / raw)
  To: Tom Herbert, David Miller
  Cc: Lorenzo Colitti, Eric Dumazet, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski, Dmitry Torokhov

On Thu, Nov 19, 2015, at 06:12, Tom Herbert wrote:
> I think this solution presumes some out of band signaling about a path
> failure deep in the network that is not reported via the TCP
> connection. This solution is obviously only as good as the signaling,
> but clearly the most general solution to maintaining reachability has
> been (and unfortunately will probably continue to be) application
> keepalives.

Ack. With emphasis of *application*. :)

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 15:48                     ` David Miller
@ 2015-11-19 16:19                       ` Eric Dumazet
  2015-11-19 16:33                         ` David Miller
                                           ` (3 more replies)
  0 siblings, 4 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 16:19 UTC (permalink / raw)
  To: David Miller
  Cc: zenczykowski, lorenzo, hannes, stephen, netdev, edumazet, ek, dtor

On Thu, 2015-11-19 at 10:48 -0500, David Miller wrote:
> At least if they do it this way, and someone claims that Linux TCP
> behaves outside the spec or improperly, it's not directly because of
> any code I am responsible for.
> 
> That's the difference, and frankly an important one to me.
> 
> If I'm going to give userspace a direct tool by which to do things,
> then it's suddenly my responsibility and my problem.

Here is the thing :

- Android powered phones and devices have a similar code since 2008.
There is _no_ way they can avoid having this functionality.

Every-time I make a change in linux TCP stack, this code breaks, and
this a  real pain because Android changes need to be carried over to
vendors.

I worked closely with Lorenzo, removing the ugly code Android had, and
proposed an implementation based on inet_diag.

We have a clear business case here, and I would like we find a solution.

Having this code in upstream kernel will save me time and energy to deal
with real issues and improvements, not with bugs opened by Android
engineers 9 months after I did changes in upstream kernels.

Having comments like "look, just implement application keepalives" is
not going to work [1][2]. This is terrible, and show lack of
understanding of the problem. We are not dealing with DC communications
here. (I wish !)

We (TCP stack) compete with QUIC, based on UDP, which has no issues like
that. We need to allow TCP sessions being signaled of a non temporary
network disruption.

[1] Phones are very often in very polluted radio environments.

  I remember the terrible wifi we had during Linux Kernel Summit in
Chicago, where ping had more than 40 seconds delay.

Here the network manager can avoid this 40 seconds penalty.

[2] Taking air time to send keepalive(s) and receive their answers is
going to make all radio providers shout really loud. They are already
installing stupid proxies filtering/compressing ACK messages and
breaking TCP clocking.

Please add

Signed-off-by: Eric Dumazet <edumazet@google.com>

Please wait one month for people submitting a working alternative,
I am fine with it as long it is upstream.

Thanks.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:19                       ` Eric Dumazet
@ 2015-11-19 16:33                         ` David Miller
  2015-11-19 16:43                           ` Eric Dumazet
                                             ` (2 more replies)
  2015-11-19 17:08                         ` Hannes Frederic Sowa
                                           ` (2 subsequent siblings)
  3 siblings, 3 replies; 110+ messages in thread
From: David Miller @ 2015-11-19 16:33 UTC (permalink / raw)
  To: eric.dumazet
  Cc: zenczykowski, lorenzo, hannes, stephen, netdev, edumazet, ek, dtor

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 Nov 2015 08:19:24 -0800

> Here is the thing :
> 
> - Android powered phones and devices have a similar code since 2008.
> There is _no_ way they can avoid having this functionality.

Weren't we given similar story about initial wake locks
implementation?  There is no other way, Android does this forever,
we have to have this!

> Every-time I make a change in linux TCP stack, this code breaks, and
> this a  real pain because Android changes need to be carried over to
> vendors.

I'm very glad that you felt the pain enough that you finally had
to reluctantly try and upstream something.  I'm sorry that doing
hackish things locally in the Android kernel tree is so painful :-/

Android folks really do not care about upstream, and it is probably
bottom of their priority.  Their actions consistently support this.

Now I am supposed to urgently care about some of their problems.

What kind of relationship is this?  It sounds quite one sided if you
ask me.

> We have a clear business case here, and I would like we find a solution.

I'm sorry, I thought the plan was to stay on 3.10 kernel in Android
forever, that putting upstream issues as far down the road as
possible.

> Having this code in upstream kernel will save me time and energy to deal
> with real issues and improvements, not with bugs opened by Android
> engineers 9 months after I did changes in upstream kernels.

I think the timeline is more like 2+ years.

Guess what, it's frustrating for me too!

> [2] Taking air time to send keepalive(s) and receive their answers is
> going to make all radio providers shout really loud. They are already
> installing stupid proxies filtering/compressing ACK messages and
> breaking TCP clocking.

I understand.

But you know what, if you just toss some major facility like this onto
the list you have to expect some discussion, disagreement, and
suggestions of alternate schemes.

Some of which you may not like nor consider appropriate at all.

You have been considering this non-stop for whatever time you have
been working on this, everyone else is now considering and thinking
about this for the first time right now.

Therefore you must be understanding and patient.  Just like I've been
patiently waiting for my Nexus 6 to be updated to something newer than
2+ year old kernel technology.

You cannot just say "I signoff on this, it's the only reasonable
scheme, apply it."  That's not how we do things here.

Frankly, that kind of attitude makes me want to apply this series even
less, and encourage attempts to find alternate schemes even more.

Thanks.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:33                         ` David Miller
@ 2015-11-19 16:43                           ` Eric Dumazet
  2015-11-19 16:50                             ` David Miller
  2015-11-19 16:47                           ` Eric Dumazet
  2015-11-19 22:55                           ` Lorenzo Colitti
  2 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 16:43 UTC (permalink / raw)
  To: David Miller
  Cc: zenczykowski, lorenzo, hannes, stephen, netdev, edumazet, ek, dtor

On Thu, 2015-11-19 at 11:33 -0500, David Miller wrote:

> You cannot just say "I signoff on this, it's the only reasonable
> scheme, apply it."  That's not how we do things here.


I added my SOB because I effectively worked a lot on this patch.

Not because it is a sign of "This is the only way it can be done"

You already rejected some of my patches, obviously.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:33                         ` David Miller
  2015-11-19 16:43                           ` Eric Dumazet
@ 2015-11-19 16:47                           ` Eric Dumazet
  2015-11-19 17:02                             ` David Miller
  2015-11-19 22:55                           ` Lorenzo Colitti
  2 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 16:47 UTC (permalink / raw)
  To: David Miller
  Cc: zenczykowski, lorenzo, hannes, stephen, netdev, edumazet, ek, dtor

On Thu, 2015-11-19 at 11:33 -0500, David Miller wrote:

> Android folks really do not care about upstream, and it is probably
> bottom of their priority.  Their actions consistently support this.

Well, in this case they contacted me and we worked on a modern
solution, candidate for upstream kernel.

It took lot of iterations and I felt the result was quite nice.

So if the reaction of this is "Patch is coming from Android, must be yet
another hack", it is quite not fair.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:43                           ` Eric Dumazet
@ 2015-11-19 16:50                             ` David Miller
  0 siblings, 0 replies; 110+ messages in thread
From: David Miller @ 2015-11-19 16:50 UTC (permalink / raw)
  To: eric.dumazet
  Cc: zenczykowski, lorenzo, hannes, stephen, netdev, edumazet, ek, dtor

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 Nov 2015 08:43:34 -0800

> You already rejected some of my patches, obviously.

Of course, and I applied quickly all the really nice ones,
many of them... :-)

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:47                           ` Eric Dumazet
@ 2015-11-19 17:02                             ` David Miller
  2015-11-19 17:44                               ` Eric Dumazet
  0 siblings, 1 reply; 110+ messages in thread
From: David Miller @ 2015-11-19 17:02 UTC (permalink / raw)
  To: eric.dumazet
  Cc: zenczykowski, lorenzo, hannes, stephen, netdev, edumazet, ek, dtor

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 19 Nov 2015 08:47:44 -0800

> So if the reaction of this is "Patch is coming from Android, must be
> yet another hack", it is quite not fair.

I have not said this.

But the attitude of that all of a sudden we must care urgently about
some Android issue they have waited as long as possible to address
upstream... that stinks.

How come our opinions and ideas about how to approach solving this
problem were not of any value two years ago?  Why?

We've waited two years, surely Android folks can wait however long it
takes to discuss this design and alternative approaches.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:19                       ` Eric Dumazet
  2015-11-19 16:33                         ` David Miller
@ 2015-11-19 17:08                         ` Hannes Frederic Sowa
  2015-11-19 17:38                           ` Tom Herbert
  2015-11-19 21:29                         ` Tom Herbert
  2015-11-20  0:12                         ` Add a SOCK_DESTROY operation to close sockets from userspace Maciej Żenczykowski
  3 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 17:08 UTC (permalink / raw)
  To: Eric Dumazet, David Miller
  Cc: zenczykowski, lorenzo, stephen, netdev, edumazet, ek, dtor

On Thu, Nov 19, 2015, at 17:19, Eric Dumazet wrote:
> On Thu, 2015-11-19 at 10:48 -0500, David Miller wrote:
> > At least if they do it this way, and someone claims that Linux TCP
> > behaves outside the spec or improperly, it's not directly because of
> > any code I am responsible for.
> > 
> > That's the difference, and frankly an important one to me.
> > 
> > If I'm going to give userspace a direct tool by which to do things,
> > then it's suddenly my responsibility and my problem.
> 
> Here is the thing :
> 
> - Android powered phones and devices have a similar code since 2008.
> There is _no_ way they can avoid having this functionality.
> 
> Every-time I make a change in linux TCP stack, this code breaks, and
> this a  real pain because Android changes need to be carried over to
> vendors.
> 
> I worked closely with Lorenzo, removing the ugly code Android had, and
> proposed an implementation based on inet_diag.
> 
> We have a clear business case here, and I would like we find a solution.
> 
> Having this code in upstream kernel will save me time and energy to deal
> with real issues and improvements, not with bugs opened by Android
> engineers 9 months after I did changes in upstream kernels.
> 
> Having comments like "look, just implement application keepalives" is
> not going to work [1][2]. This is terrible, and show lack of
> understanding of the problem. We are not dealing with DC communications
> here. (I wish !)
> 
> We (TCP stack) compete with QUIC, based on UDP, which has no issues like
> that. We need to allow TCP sessions being signaled of a non temporary
> network disruption.
> 
> [1] Phones are very often in very polluted radio environments.
> 
>   I remember the terrible wifi we had during Linux Kernel Summit in
> Chicago, where ping had more than 40 seconds delay.
> 
> Here the network manager can avoid this 40 seconds penalty.
> 
> [2] Taking air time to send keepalive(s) and receive their answers is
> going to make all radio providers shout really loud. They are already
> installing stupid proxies filtering/compressing ACK messages and
> breaking TCP clocking.
> 
> Please add
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> 
> Please wait one month for people submitting a working alternative,
> I am fine with it as long it is upstream.

I actually don't have an issue with killing from user space that much. I
still recommend (and actually have started to look at it today) to add a
new substate for TCP TIMEWAIT and don't have any issue if we block the
socket for 60 seconds and send RSTs to all incoming data. This way we
can solve the problem Florian indicated as well as this problem. Users
can happily kill TCP connections then.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 17:08                         ` Hannes Frederic Sowa
@ 2015-11-19 17:38                           ` Tom Herbert
  2015-11-19 18:09                             ` David Miller
  2015-11-19 22:33                             ` Lorenzo Colitti
  0 siblings, 2 replies; 110+ messages in thread
From: Tom Herbert @ 2015-11-19 17:38 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Eric Dumazet, David Miller, zenczykowski, Lorenzo Colitti,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov

> I actually don't have an issue with killing from user space that much. I
> still recommend (and actually have started to look at it today) to add a
> new substate for TCP TIMEWAIT and don't have any issue if we block the
> socket for 60 seconds and send RSTs to all incoming data. This way we
> can solve the problem Florian indicated as well as this problem. Users
> can happily kill TCP connections then.
>
Neither do I have a problem with killing connections from userspace,
but we do have to acknowledge that this is a powerful and invasive
mechanism. I suggest:

1) We need transparency. If a third party kills a TCP connection then
the application should be informed of specifically that. This seems
easy enough to just pick an appropriate error number as I suggested.
2) We need constraints. This feature seems to be specific to a very
narrow use case. It is not at all clear to me if there are any
legitimate uses cases beyond Android, enabling this by default in the
stack creates a non-zero amount of risk and liability for abuse. It
seems like this should be an opt-in sort of feature, with a kernel
CONFIG or maybe opt-in per socket.

Tom

> Bye,
> Hannes
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 17:02                             ` David Miller
@ 2015-11-19 17:44                               ` Eric Dumazet
  0 siblings, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 17:44 UTC (permalink / raw)
  To: David Miller
  Cc: zenczykowski, lorenzo, hannes, stephen, netdev, edumazet, ek, dtor

On Thu, 2015-11-19 at 12:02 -0500, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Thu, 19 Nov 2015 08:47:44 -0800
> 
> > So if the reaction of this is "Patch is coming from Android, must be
> > yet another hack", it is quite not fair.
> 
> I have not said this.
> 
> But the attitude of that all of a sudden we must care urgently about
> some Android issue they have waited as long as possible to address
> upstream... that stinks.
> 
> How come our opinions and ideas about how to approach solving this
> problem were not of any value two years ago?  Why?
> 
> We've waited two years, surely Android folks can wait however long it
> takes to discuss this design and alternative approaches.


Then we are in perfect agreement.

I did not say there was a sudden urgency, sorry for the
misunderstanding.
 I only know that TCP listener rewrite in 4.4 will again trigger all
these pesky Android bugs, I would like avoiding the mess I had to cope
when TIME_WAIT were merged into regular ehash established chains.

This is the major reason I worked closely with Lorenzo.

Thanks.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 17:38                           ` Tom Herbert
@ 2015-11-19 18:09                             ` David Miller
  2015-11-19 18:27                               ` Hannes Frederic Sowa
  2015-11-19 22:33                             ` Lorenzo Colitti
  1 sibling, 1 reply; 110+ messages in thread
From: David Miller @ 2015-11-19 18:09 UTC (permalink / raw)
  To: tom
  Cc: hannes, eric.dumazet, zenczykowski, lorenzo, stephen, netdev,
	edumazet, ek, dtor

From: Tom Herbert <tom@herbertland.com>
Date: Thu, 19 Nov 2015 09:38:37 -0800

> 1) We need transparency. If a third party kills a TCP connection then
> the application should be informed of specifically that. This seems
> easy enough to just pick an appropriate error number as I suggested.

Agreed.

> 2) We need constraints. This feature seems to be specific to a very
> narrow use case. It is not at all clear to me if there are any
> legitimate uses cases beyond Android, enabling this by default in the
> stack creates a non-zero amount of risk and liability for abuse. It
> seems like this should be an opt-in sort of feature, with a kernel
> CONFIG or maybe opt-in per socket.

And this will probably be the point of contention.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 18:09                             ` David Miller
@ 2015-11-19 18:27                               ` Hannes Frederic Sowa
  2015-11-19 23:02                                 ` Hannes Frederic Sowa
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 18:27 UTC (permalink / raw)
  To: David Miller, tom
  Cc: eric.dumazet, zenczykowski, lorenzo, stephen, netdev, edumazet, ek, dtor

On Thu, Nov 19, 2015, at 19:09, David Miller wrote:
> From: Tom Herbert <tom@herbertland.com>
> Date: Thu, 19 Nov 2015 09:38:37 -0800
> 
> > 1) We need transparency. If a third party kills a TCP connection then
> > the application should be informed of specifically that. This seems
> > easy enough to just pick an appropriate error number as I suggested.
> 
> Agreed.
> 
> > 2) We need constraints. This feature seems to be specific to a very
> > narrow use case. It is not at all clear to me if there are any
> > legitimate uses cases beyond Android, enabling this by default in the
> > stack creates a non-zero amount of risk and liability for abuse. It
> > seems like this should be an opt-in sort of feature, with a kernel
> > CONFIG or maybe opt-in per socket.
> 
> And this will probably be the point of contention.

For me this looks like the revoke(const char *) syscall for TCP. I would
be willing to enable this by default if we can guarantee that for
TIMEWAIT_LEN we don't have reuse of that quadruple. I don't see anything
bad happening by tearing down any connection besides too quick reuse of
that identifier, so why not enable it by default for everyone? I
certainly am often in the situation that I would like to kill
connections on my laptop which are hanging and I know the reason,
actually. A nice user interface would be certainly a place. Also we
should of course wake up any poll_wait_time wchans waiting on the socket
so user space can move forward.

Probably I would really like to see that we send a reset, even though it
won't probably reach its destination in case the source address is
removed from the system.

I will research the semantics behind tcpdrop:
<http://www.freebsd.org/cgi/man.cgi?query=tcpdrop&apropos=0&sektion=0&manpath=FreeBSD+10.2-RELEASE&arch=default&format=html>

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:19                       ` Eric Dumazet
  2015-11-19 16:33                         ` David Miller
  2015-11-19 17:08                         ` Hannes Frederic Sowa
@ 2015-11-19 21:29                         ` Tom Herbert
  2015-11-19 21:41                           ` Eric Dumazet
  2015-11-20  0:12                         ` Add a SOCK_DESTROY operation to close sockets from userspace Maciej Żenczykowski
  3 siblings, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-11-19 21:29 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, zenczykowski, Lorenzo Colitti,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

> We (TCP stack) compete with QUIC, based on UDP, which has no issues like
> that. We need to allow TCP sessions being signaled of a non temporary
> network disruption.
>

Eric, can you provide some detail on this statement?

I don't understand why QUIC wouldn't have this same issue. Seems like
it is still connection oriented just like TCP, so if the application
does a read expecting data from a peer and reverse reachability is
lost, the the read on the socket hang just like reading a TCP would.
If this is true, then the TCP solution would might actually be a
better since it allows a means for a third party (presumably a daemon
monitoring the network) to signal the application via closing specific
TCP sockets. I don't see how this could work in UDP especially if
these are unconnected sockets. What am I missing?

Thanks,
Tom

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 21:29                         ` Tom Herbert
@ 2015-11-19 21:41                           ` Eric Dumazet
  2015-11-19 21:53                             ` Hannes Frederic Sowa
  2015-11-19 21:53                             ` Tom Herbert
  0 siblings, 2 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 21:41 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Miller, zenczykowski, Lorenzo Colitti,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Thu, 2015-11-19 at 13:29 -0800, Tom Herbert wrote:
> > We (TCP stack) compete with QUIC, based on UDP, which has no issues like
> > that. We need to allow TCP sessions being signaled of a non temporary
> > network disruption.
> >
> 
> Eric, can you provide some detail on this statement?
> 
> I don't understand why QUIC wouldn't have this same issue. Seems like
> it is still connection oriented just like TCP, so if the application
> does a read expecting data from a peer and reverse reachability is
> lost, the the read on the socket hang just like reading a TCP would.
> If this is true, then the TCP solution would might actually be a
> better since it allows a means for a third party (presumably a daemon
> monitoring the network) to signal the application via closing specific
> TCP sockets. I don't see how this could work in UDP especially if
> these are unconnected sockets. What am I missing?

Quic simply sends UDP packets to a destination IP, port 443 (generally)

Say your UDP client binds to 0.0.0.0:<allocated/ephemeral port>

Kernel pick up source address given current working routing, on a per
packet basis.

Their notion of 'flow' is provided by the use of an unique connection
ID, included somewhere in the payload.

The replies from QUIC server will then reach the UDP port, because
server learned the latest source IP known for the client.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 21:41                           ` Eric Dumazet
@ 2015-11-19 21:53                             ` Hannes Frederic Sowa
  2015-11-19 22:04                               ` Eric Dumazet
  2015-11-19 21:53                             ` Tom Herbert
  1 sibling, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 21:53 UTC (permalink / raw)
  To: Eric Dumazet, Tom Herbert
  Cc: David Miller, zenczykowski, Lorenzo Colitti, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov



On Thu, Nov 19, 2015, at 22:41, Eric Dumazet wrote:
> On Thu, 2015-11-19 at 13:29 -0800, Tom Herbert wrote:
> > > We (TCP stack) compete with QUIC, based on UDP, which has no issues like
> > > that. We need to allow TCP sessions being signaled of a non temporary
> > > network disruption.
> > >
> > 
> > Eric, can you provide some detail on this statement?
> > 
> > I don't understand why QUIC wouldn't have this same issue. Seems like
> > it is still connection oriented just like TCP, so if the application
> > does a read expecting data from a peer and reverse reachability is
> > lost, the the read on the socket hang just like reading a TCP would.
> > If this is true, then the TCP solution would might actually be a
> > better since it allows a means for a third party (presumably a daemon
> > monitoring the network) to signal the application via closing specific
> > TCP sockets. I don't see how this could work in UDP especially if
> > these are unconnected sockets. What am I missing?
> 
> Quic simply sends UDP packets to a destination IP, port 443 (generally)
> 
> Say your UDP client binds to 0.0.0.0:<allocated/ephemeral port>
> 
> Kernel pick up source address given current working routing, on a per
> packet basis.
> 
> Their notion of 'flow' is provided by the use of an unique connection
> ID, included somewhere in the payload.
> 
> The replies from QUIC server will then reach the UDP port, because
> server learned the latest source IP known for the client.

You don't steer QUIC source addresses at all? I think most networking
failures are of transient nature thus the kernel routing subsystem is
not aware of link quality and packets get lost anyway e.g. in the air?
Thus binding on multiple interfaces and keepalives seem still
appropriate, no?

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 21:41                           ` Eric Dumazet
  2015-11-19 21:53                             ` Hannes Frederic Sowa
@ 2015-11-19 21:53                             ` Tom Herbert
  2015-11-19 22:07                               ` Eric Dumazet
  1 sibling, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-11-19 21:53 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, zenczykowski, Lorenzo Colitti,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Thu, Nov 19, 2015 at 1:41 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2015-11-19 at 13:29 -0800, Tom Herbert wrote:
>> > We (TCP stack) compete with QUIC, based on UDP, which has no issues like
>> > that. We need to allow TCP sessions being signaled of a non temporary
>> > network disruption.
>> >
>>
>> Eric, can you provide some detail on this statement?
>>
>> I don't understand why QUIC wouldn't have this same issue. Seems like
>> it is still connection oriented just like TCP, so if the application
>> does a read expecting data from a peer and reverse reachability is
>> lost, the the read on the socket hang just like reading a TCP would.
>> If this is true, then the TCP solution would might actually be a
>> better since it allows a means for a third party (presumably a daemon
>> monitoring the network) to signal the application via closing specific
>> TCP sockets. I don't see how this could work in UDP especially if
>> these are unconnected sockets. What am I missing?
>
> Quic simply sends UDP packets to a destination IP, port 443 (generally)
>
> Say your UDP client binds to 0.0.0.0:<allocated/ephemeral port>
>
> Kernel pick up source address given current working routing, on a per
> packet basis.
>
> Their notion of 'flow' is provided by the use of an unique connection
> ID, included somewhere in the payload.
>
> The replies from QUIC server will then reach the UDP port, because
> server learned the latest source IP known for the client.
>
>
That covers the case where the local address is removed, but the not
the case where the network manager is informed of an error in the path
and wants to signal the application. My understanding was that
SIOCKILLADDR would work for the first case, but this patch was need to
cover the second case.

btw, instead of closing the TCP socket can we just report an error and
wake up the application without affecting the connection? That is this
just becomes an error on the socket. The response by the application
will be the same in any case, porbablly just close the socket and try
to reestablish the connection.

Tom

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 21:53                             ` Hannes Frederic Sowa
@ 2015-11-19 22:04                               ` Eric Dumazet
  2015-11-19 22:09                                 ` Hannes Frederic Sowa
  0 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 22:04 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Tom Herbert, David Miller, zenczykowski, Lorenzo Colitti,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov

On Thu, 2015-11-19 at 22:53 +0100, Hannes Frederic Sowa wrote:
> 

> You don't steer QUIC source addresses at all? I think most networking
> failures are of transient nature thus the kernel routing subsystem is
> not aware of link quality and packets get lost anyway e.g. in the air?
> Thus binding on multiple interfaces and keepalives seem still
> appropriate, no?

Imagine you are in your home near a wifi AP, then you close a door and
switch to 3G, or another AP.

No down time. packet will eventually reach its destination.

Application does not have to care.

Why QUIC should absolutely use '4-tuple UDP connections' when this is
likely to fail in this scenario ?

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 21:53                             ` Tom Herbert
@ 2015-11-19 22:07                               ` Eric Dumazet
  2015-11-19 22:14                                 ` Tom Herbert
  0 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 22:07 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Miller, zenczykowski, Lorenzo Colitti,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Thu, 2015-11-19 at 13:53 -0800, Tom Herbert wrote:

> That covers the case where the local address is removed, but the not
> the case where the network manager is informed of an error in the path
> and wants to signal the application. My understanding was that
> SIOCKILLADDR would work for the first case, but this patch was need to
> cover the second case.
> 
> btw, instead of closing the TCP socket can we just report an error and
> wake up the application without affecting the connection? That is this
> just becomes an error on the socket. The response by the application
> will be the same in any case, porbablly just close the socket and try
> to reestablish the connection.

I thought this was the patch intent ?

Application gets a EPOLLIN|EPOLLOUT|POLLERR notification (if it is
willing to receive it, or blocked in a socket syscall) and closes the
socket.

         sk->sk_err = ETIMEDOUT;
         sk->sk_error_report(sk);
         tcp_done(sk);

If this sequence is not doing the job, then we have other worries in our
stack.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:04                               ` Eric Dumazet
@ 2015-11-19 22:09                                 ` Hannes Frederic Sowa
  2015-11-19 22:15                                   ` Eric Dumazet
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 22:09 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, David Miller, zenczykowski, Lorenzo Colitti,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov



On Thu, Nov 19, 2015, at 23:04, Eric Dumazet wrote:
> On Thu, 2015-11-19 at 22:53 +0100, Hannes Frederic Sowa wrote:
> > 
> 
> > You don't steer QUIC source addresses at all? I think most networking
> > failures are of transient nature thus the kernel routing subsystem is
> > not aware of link quality and packets get lost anyway e.g. in the air?
> > Thus binding on multiple interfaces and keepalives seem still
> > appropriate, no?
> 
> Imagine you are in your home near a wifi AP, then you close a door and
> switch to 3G, or another AP.
> 
> No down time. packet will eventually reach its destination.
> 
> Application does not have to care.
> 
> Why QUIC should absolutely use '4-tuple UDP connections' when this is
> likely to fail in this scenario ?

My point is the "eventually" and the very much increased latency until
the kernel learns about new better source addresses it has available. I
would monitor link quality over time and decide source address based on
this on the sending side.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:07                               ` Eric Dumazet
@ 2015-11-19 22:14                                 ` Tom Herbert
  2015-11-19 22:33                                   ` Eric Dumazet
  0 siblings, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-11-19 22:14 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, zenczykowski, Lorenzo Colitti,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Thu, Nov 19, 2015 at 2:07 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2015-11-19 at 13:53 -0800, Tom Herbert wrote:
>
>> That covers the case where the local address is removed, but the not
>> the case where the network manager is informed of an error in the path
>> and wants to signal the application. My understanding was that
>> SIOCKILLADDR would work for the first case, but this patch was need to
>> cover the second case.
>>
>> btw, instead of closing the TCP socket can we just report an error and
>> wake up the application without affecting the connection? That is this
>> just becomes an error on the socket. The response by the application
>> will be the same in any case, porbablly just close the socket and try
>> to reestablish the connection.
>
> I thought this was the patch intent ?
>
> Application gets a EPOLLIN|EPOLLOUT|POLLERR notification (if it is
> willing to receive it, or blocked in a socket syscall) and closes the
> socket.
>
>          sk->sk_err = ETIMEDOUT;
>          sk->sk_error_report(sk);
>          tcp_done(sk);
>
The tcp_done is not needed here. This is the difference between the
application having the connection closed underneath them, versus the
application performing a close to terminate the connection. The latter
behavior preserves the semantics that only the stack or the
application owning the socket can initiate a state change on the
connection.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:09                                 ` Hannes Frederic Sowa
@ 2015-11-19 22:15                                   ` Eric Dumazet
  2015-11-19 22:31                                     ` Hannes Frederic Sowa
  0 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 22:15 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Tom Herbert, David Miller, zenczykowski, Lorenzo Colitti,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov

On Thu, 2015-11-19 at 23:09 +0100, Hannes Frederic Sowa wrote:

> My point is the "eventually" and the very much increased latency until
> the kernel learns about new better source addresses it has available. I
> would monitor link quality over time and decide source address based on
> this on the sending side.

We are here speaking of code running in a browser (like Chrome speaking
to Google servers). Written in java script or whatever language du jour.

It has no business of managing routes, or interfaces.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:15                                   ` Eric Dumazet
@ 2015-11-19 22:31                                     ` Hannes Frederic Sowa
  2015-11-19 22:36                                       ` Eric Dumazet
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 22:31 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Tom Herbert, David Miller, zenczykowski, Lorenzo Colitti,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov



On Thu, Nov 19, 2015, at 23:15, Eric Dumazet wrote:
> On Thu, 2015-11-19 at 23:09 +0100, Hannes Frederic Sowa wrote:
> 
> > My point is the "eventually" and the very much increased latency until
> > the kernel learns about new better source addresses it has available. I
> > would monitor link quality over time and decide source address based on
> > this on the sending side.
> 
> We are here speaking of code running in a browser (like Chrome speaking
> to Google servers). Written in java script or whatever language du jour.
> 
> It has no business of managing routes, or interfaces.

I thought it would be necessary to support multipathing in QUIC?

Interesting and thanks,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:14                                 ` Tom Herbert
@ 2015-11-19 22:33                                   ` Eric Dumazet
  2015-11-20  0:04                                     ` Tom Herbert
  0 siblings, 1 reply; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 22:33 UTC (permalink / raw)
  To: Tom Herbert
  Cc: David Miller, zenczykowski, Lorenzo Colitti,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Thu, 2015-11-19 at 14:14 -0800, Tom Herbert wrote:
> On Thu, Nov 19, 2015 at 2:07 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > On Thu, 2015-11-19 at 13:53 -0800, Tom Herbert wrote:
> >
> >> That covers the case where the local address is removed, but the not
> >> the case where the network manager is informed of an error in the path
> >> and wants to signal the application. My understanding was that
> >> SIOCKILLADDR would work for the first case, but this patch was need to
> >> cover the second case.
> >>
> >> btw, instead of closing the TCP socket can we just report an error and
> >> wake up the application without affecting the connection? That is this
> >> just becomes an error on the socket. The response by the application
> >> will be the same in any case, porbablly just close the socket and try
> >> to reestablish the connection.
> >
> > I thought this was the patch intent ?
> >
> > Application gets a EPOLLIN|EPOLLOUT|POLLERR notification (if it is
> > willing to receive it, or blocked in a socket syscall) and closes the
> > socket.
> >
> >          sk->sk_err = ETIMEDOUT;
> >          sk->sk_error_report(sk);
> >          tcp_done(sk);
> >
> The tcp_done is not needed here. This is the difference between the
> application having the connection closed underneath them, versus the
> application performing a close to terminate the connection. The latter
> behavior preserves the semantics that only the stack or the
> application owning the socket can initiate a state change on the
> connection.


The code behaves like we received a formal RST : 

Please do not even bother trying to send additional data, it is not
worth wasting precious resource.

There is currently no system call telling the stack : 

Please remove all outstanding data for this socket.

And even if it was present, how many applications need to be changed,
because all they do is probably a close(), after a shutdown() if they
really were cautious...

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 17:38                           ` Tom Herbert
  2015-11-19 18:09                             ` David Miller
@ 2015-11-19 22:33                             ` Lorenzo Colitti
  2015-11-19 22:38                               ` Hannes Frederic Sowa
  1 sibling, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-19 22:33 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Hannes Frederic Sowa, Eric Dumazet, David Miller,
	Maciej Żenczykowski, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Fri, Nov 20, 2015 at 2:38 AM, Tom Herbert <tom@herbertland.com> wrote:
>> I actually don't have an issue with killing from user space that much. I
>> still recommend (and actually have started to look at it today) to add a
>> new substate for TCP TIMEWAIT and don't have any issue if we block the
>> socket for 60 seconds and send RSTs to all incoming data. This way we
>> can solve the problem Florian indicated as well as this problem. Users
>> can happily kill TCP connections then.
>>
> Neither do I have a problem with killing connections from userspace,
> but we do have to acknowledge that this is a powerful and invasive
> mechanism. I suggest:
>
> 1) We need transparency. If a third party kills a TCP connection then
> the application should be informed of specifically that. This seems
> easy enough to just pick an appropriate error number as I suggested.

I'm not wedded to ETIMEDOUT. If it means we can get this code
upstream, then we can likely do the userspace work that is needed to
ensure that applications respond correctly. Mot

> 2) We need constraints. This feature seems to be specific to a very
> narrow use case. It is not at all clear to me if there are any
> legitimate uses cases beyond Android, enabling this by default in the
> stack creates a non-zero amount of risk and liability for abuse. It
> seems like this should be an opt-in sort of feature, with a kernel
> CONFIG or maybe opt-in per socket.

I am perfectly happy for this to be behind a config option.

I do think this kernel functionality is useful in general, and as a
linux-on-laptop user I wish it was available to NetworkManager as
well, because I use Linux as well, but I think it will work for
Android if this requires a per-socket opt in setsockopt. For other
reasons we pipe all connected sockets through a userspace daemon
anyway. (But please don't tell me that that daemon should keep state
on *all* connected sockets it ever sees :-))

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:31                                     ` Hannes Frederic Sowa
@ 2015-11-19 22:36                                       ` Eric Dumazet
  0 siblings, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-11-19 22:36 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Tom Herbert, David Miller, zenczykowski, Lorenzo Colitti,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov

On Thu, 2015-11-19 at 23:31 +0100, Hannes Frederic Sowa wrote:

> I thought it would be necessary to support multipathing in QUIC?

It is not necessary ;)

There are billions of flows sharing Internet pipes, no need to double
their numbers, it wont help at all.

Simply use one path with appropriate Congestion Control, and it is fine.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:33                             ` Lorenzo Colitti
@ 2015-11-19 22:38                               ` Hannes Frederic Sowa
  2015-11-19 23:24                                 ` Tom Herbert
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 22:38 UTC (permalink / raw)
  To: Lorenzo Colitti, Tom Herbert
  Cc: Eric Dumazet, David Miller, Maciej Żenczykowski,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov



On Thu, Nov 19, 2015, at 23:33, Lorenzo Colitti wrote:
> On Fri, Nov 20, 2015 at 2:38 AM, Tom Herbert <tom@herbertland.com> wrote:
> >> I actually don't have an issue with killing from user space that much. I
> >> still recommend (and actually have started to look at it today) to add a
> >> new substate for TCP TIMEWAIT and don't have any issue if we block the
> >> socket for 60 seconds and send RSTs to all incoming data. This way we
> >> can solve the problem Florian indicated as well as this problem. Users
> >> can happily kill TCP connections then.
> >>
> > Neither do I have a problem with killing connections from userspace,
> > but we do have to acknowledge that this is a powerful and invasive
> > mechanism. I suggest:
> >
> > 1) We need transparency. If a third party kills a TCP connection then
> > the application should be informed of specifically that. This seems
> > easy enough to just pick an appropriate error number as I suggested.
> 
> I'm not wedded to ETIMEDOUT. If it means we can get this code
> upstream, then we can likely do the userspace work that is needed to
> ensure that applications respond correctly. Mot
> 
> > 2) We need constraints. This feature seems to be specific to a very
> > narrow use case. It is not at all clear to me if there are any
> > legitimate uses cases beyond Android, enabling this by default in the
> > stack creates a non-zero amount of risk and liability for abuse. It
> > seems like this should be an opt-in sort of feature, with a kernel
> > CONFIG or maybe opt-in per socket.
> 
> I am perfectly happy for this to be behind a config option.

Why? If it is an administrator only option it does not make sense to
hide it behind a sysctl. Applications using this interface could also
easily change the sysctl because they probably have the same privileges.
A Kconfig option seems to be not useful to me either.

> I do think this kernel functionality is useful in general, and as a
> linux-on-laptop user I wish it was available to NetworkManager as
> well, because I use Linux as well, but I think it will work for
> Android if this requires a per-socket opt in setsockopt. For other
> reasons we pipe all connected sockets through a userspace daemon
> anyway. (But please don't tell me that that daemon should keep state
> on *all* connected sockets it ever sees :-))

Exactly!

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:33                         ` David Miller
  2015-11-19 16:43                           ` Eric Dumazet
  2015-11-19 16:47                           ` Eric Dumazet
@ 2015-11-19 22:55                           ` Lorenzo Colitti
  2 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-19 22:55 UTC (permalink / raw)
  To: David Miller
  Cc: Eric Dumazet, Maciej Żenczykowski, Hannes Frederic Sowa,
	Stephen Hemminger, netdev, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Fri, Nov 20, 2015 at 1:33 AM, David Miller <davem@davemloft.net> wrote:
>> Every-time I make a change in linux TCP stack, this code breaks, and
>> this a  real pain because Android changes need to be carried over to
>> vendors.
>
> I'm very glad that you felt the pain enough that you finally had
> to reluctantly try and upstream something.  I'm sorry that doing
> hackish things locally in the Android kernel tree is so painful :-/

Actually, when SIOCKILLADDR crashes users' phones, Eric is not the one
that needs to fix this. I am. Because I am not a TCP expert or a
hardcode kernel developer, I ask Eric for help, and he is good enough
to provide it.

> Android folks really do not care about upstream, and it is probably
> bottom of their priority.  Their actions consistently support this.

Speaking as the one of the Android networking team leads, I would love
it if that changed. It would certainly make my life easier if these
features were upstream. I am trying to upstream all of them so that I
don't have to worry about ensuring that all devices pick them up as
opposed to shipping kernels that lack important commits, and thus
contain subtle bugs that cause real networking problems for real
users.

Pretty much everything we have done in networking code in the past two
years we have tried to upstream - see patches by myself and Erik Kline
that got merged. Some of the things we do are ugly hacks that we've
were expedient at the time, like SIOCKILLADDR. Eric rightly says it's
evil and racy. For those, we try to come up with something that's a
better solution, and we send it upstream. This is one of the results.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 18:27                               ` Hannes Frederic Sowa
@ 2015-11-19 23:02                                 ` Hannes Frederic Sowa
  2015-11-19 23:47                                   ` Lorenzo Colitti
  0 siblings, 1 reply; 110+ messages in thread
From: Hannes Frederic Sowa @ 2015-11-19 23:02 UTC (permalink / raw)
  To: David Miller, tom
  Cc: eric.dumazet, zenczykowski, lorenzo, stephen, netdev, edumazet, ek, dtor

On Thu, Nov 19, 2015, at 19:27, Hannes Frederic Sowa wrote:
> I will research the semantics behind tcpdrop:
> <http://www.freebsd.org/cgi/man.cgi?query=tcpdrop&apropos=0&sektion=0&manpath=FreeBSD+10.2-RELEASE&arch=default&format=html>

Fyi, my research shows that it just sends RST and discards complete
socket state.

Bye,
Hannes

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:38                               ` Hannes Frederic Sowa
@ 2015-11-19 23:24                                 ` Tom Herbert
  0 siblings, 0 replies; 110+ messages in thread
From: Tom Herbert @ 2015-11-19 23:24 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Lorenzo Colitti, Eric Dumazet, David Miller,
	Maciej Żenczykowski, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Thu, Nov 19, 2015 at 2:38 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
>
>
> On Thu, Nov 19, 2015, at 23:33, Lorenzo Colitti wrote:
>> On Fri, Nov 20, 2015 at 2:38 AM, Tom Herbert <tom@herbertland.com> wrote:
>> >> I actually don't have an issue with killing from user space that much. I
>> >> still recommend (and actually have started to look at it today) to add a
>> >> new substate for TCP TIMEWAIT and don't have any issue if we block the
>> >> socket for 60 seconds and send RSTs to all incoming data. This way we
>> >> can solve the problem Florian indicated as well as this problem. Users
>> >> can happily kill TCP connections then.
>> >>
>> > Neither do I have a problem with killing connections from userspace,
>> > but we do have to acknowledge that this is a powerful and invasive
>> > mechanism. I suggest:
>> >
>> > 1) We need transparency. If a third party kills a TCP connection then
>> > the application should be informed of specifically that. This seems
>> > easy enough to just pick an appropriate error number as I suggested.
>>
>> I'm not wedded to ETIMEDOUT. If it means we can get this code
>> upstream, then we can likely do the userspace work that is needed to
>> ensure that applications respond correctly. Mot
>>
>> > 2) We need constraints. This feature seems to be specific to a very
>> > narrow use case. It is not at all clear to me if there are any
>> > legitimate uses cases beyond Android, enabling this by default in the
>> > stack creates a non-zero amount of risk and liability for abuse. It
>> > seems like this should be an opt-in sort of feature, with a kernel
>> > CONFIG or maybe opt-in per socket.
>>
>> I am perfectly happy for this to be behind a config option.
>
> Why? If it is an administrator only option it does not make sense to
> hide it behind a sysctl. Applications using this interface could also
> easily change the sysctl because they probably have the same privileges.
> A Kconfig option seems to be not useful to me either.
>

A sysctl is obviously useless, but Kconfig option is good. There is no
way I want this option enabled in my data center without any
demonstrated need for it. History has shown many times, that once we
allow options like this to be enabled in the kernel, they will
inevitably be used often without our (kernel experts') knowledge or
supervision. In too many cases this blows up in our faces (there has
been at least one outage @FB caused by someone inadvertently
exercising an otherwise obscure kernel feature).

Tom

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 23:02                                 ` Hannes Frederic Sowa
@ 2015-11-19 23:47                                   ` Lorenzo Colitti
  0 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-19 23:47 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: David Miller, Tom Herbert, Eric Dumazet,
	Maciej Żenczykowski, Stephen Hemminger, netdev,
	Eric Dumazet, Erik Kline, Dmitry Torokhov

On Fri, Nov 20, 2015 at 8:02 AM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> On Thu, Nov 19, 2015, at 19:27, Hannes Frederic Sowa wrote:
>> I will research the semantics behind tcpdrop:
>> <http://www.freebsd.org/cgi/man.cgi?query=tcpdrop&apropos=0&sektion=0&manpath=FreeBSD+10.2-RELEASE&arch=default&format=html>
>
> Fyi, my research shows that it just sends RST and discards complete
> socket state.

That makes sense. It is what RFC 793 calls ABORT:

http://tools.ietf.org/html/rfc793#page-50

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 15:54                 ` Hannes Frederic Sowa
@ 2015-11-19 23:54                   ` Maciej Żenczykowski
  0 siblings, 0 replies; 110+ messages in thread
From: Maciej Żenczykowski @ 2015-11-19 23:54 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Tom Herbert, David Miller, Lorenzo Colitti, Eric Dumazet,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov

> Ack. With emphasis of *application*. :)

The problem with application anything is that changing apps is
outright impossible.
There are far too many of them written by far too many people.
And until you fix them all (which is infeasible) and roll those
changes out (unlikely) users will continue to suffer.

You are free to change the kernel, you are free to change some (or a
few) network management daemon(s), you are not free to update every
application out there.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 22:33                                   ` Eric Dumazet
@ 2015-11-20  0:04                                     ` Tom Herbert
  2015-11-20  0:09                                       ` Lorenzo Colitti
  0 siblings, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-11-20  0:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, zenczykowski, Lorenzo Colitti,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

On Thu, Nov 19, 2015 at 2:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Thu, 2015-11-19 at 14:14 -0800, Tom Herbert wrote:
>> On Thu, Nov 19, 2015 at 2:07 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> > On Thu, 2015-11-19 at 13:53 -0800, Tom Herbert wrote:
>> >
>> >> That covers the case where the local address is removed, but the not
>> >> the case where the network manager is informed of an error in the path
>> >> and wants to signal the application. My understanding was that
>> >> SIOCKILLADDR would work for the first case, but this patch was need to
>> >> cover the second case.
>> >>
>> >> btw, instead of closing the TCP socket can we just report an error and
>> >> wake up the application without affecting the connection? That is this
>> >> just becomes an error on the socket. The response by the application
>> >> will be the same in any case, porbablly just close the socket and try
>> >> to reestablish the connection.
>> >
>> > I thought this was the patch intent ?
>> >
>> > Application gets a EPOLLIN|EPOLLOUT|POLLERR notification (if it is
>> > willing to receive it, or blocked in a socket syscall) and closes the
>> > socket.
>> >
>> >          sk->sk_err = ETIMEDOUT;
>> >          sk->sk_error_report(sk);
>> >          tcp_done(sk);
>> >
>> The tcp_done is not needed here. This is the difference between the
>> application having the connection closed underneath them, versus the
>> application performing a close to terminate the connection. The latter
>> behavior preserves the semantics that only the stack or the
>> application owning the socket can initiate a state change on the
>> connection.
>
>
> The code behaves like we received a formal RST :
>
But we didn't get a RST, closing the connection is not being done
under the auspices of the protocol. The most comparable event to be a
timeout on the socket read operation.

> Please do not even bother trying to send additional data, it is not
> worth wasting precious resource.
>
It is reasonable to think of this mechanism as way to indicate loss of
reachability. But if this is being viewed as a way to do resource
management that is a little worrisome. What is to stop someone from
using this mechanism from implementing to implement a security layer,
or imposing a global time limit to how long someone can be connected,
or to start killing connections based on some arbitrary policy when
under memory pressure. I am not necessarily saying this stuff is
necessarily bad or an abuse of the mechanism, but the flexibility and
power of the mechanism opens the door for these use cases.

Tom

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  0:04                                     ` Tom Herbert
@ 2015-11-20  0:09                                       ` Lorenzo Colitti
  2015-11-20  0:15                                         ` Tom Herbert
  0 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-20  0:09 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Eric Dumazet, David Miller, zenczykowski, Hannes Frederic Sowa,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov

On Fri, Nov 20, 2015 at 9:04 AM, Tom Herbert <tom@herbertland.com> wrote:
> or to start killing connections based on some arbitrary policy when
> under memory pressure.

You mean like the OOM killer starts killing entire processes based on
some arbitrary policy when under memory pressure? :-)

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19 16:19                       ` Eric Dumazet
                                           ` (2 preceding siblings ...)
  2015-11-19 21:29                         ` Tom Herbert
@ 2015-11-20  0:12                         ` Maciej Żenczykowski
  3 siblings, 0 replies; 110+ messages in thread
From: Maciej Żenczykowski @ 2015-11-20  0:12 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, Lorenzo Colitti, hannes, stephen, Linux NetDev,
	Eric Dumazet, ek, dtor

> Having comments like "look, just implement application keepalives" is
> not going to work [1][2]. This is terrible, and show lack of
> understanding of the problem. We are not dealing with DC communications
> here. (I wish !)

There's a 3rd reason: keepalives (tcp or application) are actually
undesirable because they burn through your battery.

It's far better to do keepalives on one dedicated connection and kill
all the other non keepalive connections
if you determine via that one that the network is bad (or if you have
some other signals about network badness).

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  0:09                                       ` Lorenzo Colitti
@ 2015-11-20  0:15                                         ` Tom Herbert
  2015-11-20  2:25                                           ` Maciej Żenczykowski
  0 siblings, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-11-20  0:15 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Eric Dumazet, David Miller, zenczykowski, Hannes Frederic Sowa,
	Stephen Hemminger, Linux Kernel Network Developers, Eric Dumazet,
	Erik Kline, Dmitry Torokhov

On Thu, Nov 19, 2015 at 4:09 PM, Lorenzo Colitti <lorenzo@google.com> wrote:
> On Fri, Nov 20, 2015 at 9:04 AM, Tom Herbert <tom@herbertland.com> wrote:
>> or to start killing connections based on some arbitrary policy when
>> under memory pressure.
>
> You mean like the OOM killer starts killing entire processes based on
> some arbitrary policy when under memory pressure? :-)

No, I mean something that kills connections where previously this did
not happen. The fact that this is done at the process level does not
justify that it is a right to at do connections. Besides, if you
really intend to do this then provide a privileged process a means to
close *any* open file descriptor in the system (why stop at TCP
sockets)!

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-19  5:53                 ` David Miller
  2015-11-19  7:19                   ` Maciej Żenczykowski
@ 2015-11-20  0:19                   ` Lorenzo Colitti
  2015-11-20  0:55                     ` David Miller
  1 sibling, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-20  0:19 UTC (permalink / raw)
  To: David Miller
  Cc: Hannes Frederic Sowa, Eric Dumazet, Stephen Hemminger, netdev,
	Eric Dumazet, Erik Kline, Maciej Żenczykowski,
	Dmitry Torokhov

On Thu, Nov 19, 2015 at 2:53 PM, David Miller <davem@davemloft.net> wrote:
> What I object to is userspace making reachability decisions, not
> whether SOCK_DESTROY closes the socket in one way or the other.

To be fair: userspace already makes reachability decisions. Using
iptables and ip rules, it can configure arbitrarily complex policies
such as "cause new TCP connections from port 12742 to port 443 by UID
12345 to fail with fast ECONNRESET". Userspace can change those
policies at any time.

I think what's new in this patchset is that today userspace can only
do that at connection establishment time; this patchset would allow
re-evaluating reachability for established connections as well.

But I do think this is needed. In addition to the "network
disconnected" and "VPN connected" examples, I already provided here's
another example where it's hard for the kernel to make reachability
decisions.

Android and iOS have settings that the user can use to allow or
disallow an app from using cellular data. If the user disables cell
data for app X, userspace will tell the kernel to drop all that app's
packets, but it can't prevent an app from hanging on read() or write()
on a socket that the app already had (e.g., one it had opened before
the user changed the setting and had left open for latency reasons).

In this case, userspace knows that that app's connections are now
unusable because it configured an iptables rule to block them. The
kernel doesn't really know until it the time comes to send a packet,
and maybe not even then.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  0:19                   ` Lorenzo Colitti
@ 2015-11-20  0:55                     ` David Miller
  2015-11-20  1:00                       ` Maciej Żenczykowski
  2015-11-20  1:55                       ` Lorenzo Colitti
  0 siblings, 2 replies; 110+ messages in thread
From: David Miller @ 2015-11-20  0:55 UTC (permalink / raw)
  To: lorenzo; +Cc: hannes, eric.dumazet, stephen, netdev, edumazet, ek, maze, dtor

From: Lorenzo Colitti <lorenzo@google.com>
Date: Fri, 20 Nov 2015 09:19:25 +0900

> In this case, userspace knows that that app's connections are now
> unusable because it configured an iptables rule to block them. The
> kernel doesn't really know until it the time comes to send a packet,
> and maybe not even then.

Netfilter could perform signalling on skb->sk when it drops packets.

Your example is actually a argument _for_ doing this in the kernel.

:-)

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  0:55                     ` David Miller
@ 2015-11-20  1:00                       ` Maciej Żenczykowski
  2015-11-20  1:55                       ` Lorenzo Colitti
  1 sibling, 0 replies; 110+ messages in thread
From: Maciej Żenczykowski @ 2015-11-20  1:00 UTC (permalink / raw)
  To: David Miller
  Cc: Lorenzo Colitti, Hannes Frederic Sowa, Eric Dumazet,
	Stephen Hemminger, Linux NetDev, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

>> In this case, userspace knows that that app's connections are now
>> unusable because it configured an iptables rule to block them. The
>> kernel doesn't really know until it the time comes to send a packet,
>> and maybe not even then.
>
> Netfilter could perform signalling on skb->sk when it drops packets.
>
> Your example is actually a argument _for_ doing this in the kernel.

That only (currently) works if a socket actually tries to send something.

Idle sockets (for example a socket used for push notification from the
remote server) still end up blocking forever.

If you were to, whenever the firewall configuration is changed,
iterate through all sockets in the system and generate a pair of fake
0-data packets (for both directions) for every socket to see if it
would get blocked by the firewall... but that seems quite insane.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  0:55                     ` David Miller
  2015-11-20  1:00                       ` Maciej Żenczykowski
@ 2015-11-20  1:55                       ` Lorenzo Colitti
  2015-11-20 16:51                         ` David Ahern
  1 sibling, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-11-20  1:55 UTC (permalink / raw)
  To: David Miller
  Cc: Hannes Frederic Sowa, Eric Dumazet, Stephen Hemminger, netdev,
	Eric Dumazet, Erik Kline, Maciej Żenczykowski,
	Dmitry Torokhov

On Fri, Nov 20, 2015 at 9:55 AM, David Miller <davem@davemloft.net> wrote:
> Netfilter could perform signalling on skb->sk when it drops packets.
>
> Your example is actually a argument _for_ doing this in the kernel.
>
> :-)

Well, I did say that was a simple example. :-) How do I make the VPN
case work? In that case, the packets will go out just fine, they'll
just be sent through the VPN tunnel and get dropped by the VPN server.
Again, the issue there is:

1. TCP connection is established on wifi network.
2. VPN comes up, adds high-priority routing rule that ensures that no
unprivileged app traffic leaks to the wifi network directly, but
everything is sent on VPN.
3. Any packet sent on TCP connection in #1 is now sent on VPN network
with wifi source address.

We don't have a solution for this today; SIOCKILLADDR only takes an IP
address. Today, apps just get stuck. We could fix this with
SOCK_DESTROY.

Here's another thing that could be fixed with SOCK_DESTROY. Today,
when the device switches from cell data to wifi, it tears down cell
data after a 30-second seamless handover period. One major reason to
do this is that if it doesn't, some already-established, long-lived
TCP connection might continue to use expensive cell data because the
app did not close the connection and reconnect on wifi.

If we had SOCK_DESTROY, the system could instead do the following:

1. Device connects to wifi.
2. After 30-second seamless handover, sockets currently established on
cell get closed by SOCK_DESTROY (instead of being closed by tearing
down cell data and calling SIOCKILLADDR). As happens today, apps see
the error and reconnect, and policy steers them to wifi.
3. Unlike today, cell stays up, so that the next time the user needs
it (fast network switching? multipath? app that already has permission
to use cell data?), it's already there and the user doesn't need to
wait 500ms for it to come up again.
4. Unprivileged apps can't use expensive mobile data any more than
they could in previous releases.

As a more general point, I think part of the issue is that there are
lots of problems in the mobile space that might not be immediately
obvious to people on this list. In the past, this has led to us
(Android networking team) proposing non-trivial patches that provided
functionality that makes sense on a mobile device (e.g., UID-based
routing, SOCK_DESTROY), and those patches being rejected on the
grounds that the use cases didn't make sense or that the people
proposing them didn't understand networking. :-)

Better communication might help. I will try to publicly document how
we currently use the stack; hopefully that will provide more context
for discussions such as these, and allow future work to unwind some of
the out-of-tree hacks that we currently rely on and replace it with
upstream alternatives. We might even be able to show up at netdev 1.1
for some higher-bandwidth conversations.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  0:15                                         ` Tom Herbert
@ 2015-11-20  2:25                                           ` Maciej Żenczykowski
  2015-12-01  2:32                                             ` Lorenzo Colitti
  0 siblings, 1 reply; 110+ messages in thread
From: Maciej Żenczykowski @ 2015-11-20  2:25 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Lorenzo Colitti, Eric Dumazet, David Miller,
	Hannes Frederic Sowa, Stephen Hemminger,
	Linux Kernel Network Developers, Eric Dumazet, Erik Kline,
	Dmitry Torokhov

> No, I mean something that kills connections where previously this did
> not happen. The fact that this is done at the process level does not
> justify that it is a right to at do connections. Besides, if you
> really intend to do this then provide a privileged process a means to
> close *any* open file descriptor in the system (why stop at TCP
> sockets)!

There is little reason to be able to kill random processes arbitrary
file descriptors,
while there is ample reason to be able to kill network connections.

Network connections can already (and often are) be killed by the network, but
that's far less true for files (although IO errors and emergency fs remount r/o
can cause similar behaviour).

Userspace doesn't expect file descriptors to local files to suddenly break,
while it already has to handle network connections timing out and the like.

Perhaps we shouldn't be using the word 'kill'.  We're not really killing
these sockets, nor closing them, we just need an out-of-band mechanism
for flagging them with some sort of error (ie. force timing them out, or force
resetting them).

That said, one can certainly come up with cases where killing arbitrary file
descriptors would make sense (or more likely switching them from r/w to r/o
mode or replacing them with some sort of /dev/null like file
descriptor instead).
Example use cases would be umounting a removable usb key which is in the
process of being pulled out of the machine (or was already pulled
out), or remounting
a file system readonly (which still has r/w open files).

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  1:55                       ` Lorenzo Colitti
@ 2015-11-20 16:51                         ` David Ahern
  0 siblings, 0 replies; 110+ messages in thread
From: David Ahern @ 2015-11-20 16:51 UTC (permalink / raw)
  To: Lorenzo Colitti, David Miller
  Cc: Hannes Frederic Sowa, Eric Dumazet, Stephen Hemminger, netdev,
	Eric Dumazet, Erik Kline, Maciej Żenczykowski,
	Dmitry Torokhov

On 11/19/15 6:55 PM, Lorenzo Colitti wrote:
> upstream alternatives. We might even be able to show up at netdev 1.1
> for some higher-bandwidth conversations.

This use case would make a great talk for netdev.

There are similar problems when netdev's are moved between namespaces 
(and VRFs).

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-11-20  2:25                                           ` Maciej Żenczykowski
@ 2015-12-01  2:32                                             ` Lorenzo Colitti
  2015-12-01  2:32                                               ` [PATCH v3 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
                                                                 ` (4 more replies)
  0 siblings, 5 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-01  2:32 UTC (permalink / raw)
  To: netdev; +Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski

Here is an updated version of the SOCK_DESTROY patch
incorporating some of the feedback received.

There were two substantial concerns expressed on the approach
taken in this patch. The first was that it allows applications
to cause the Linux TCP stack to behave improperly. I believe
this is addressed as follows:

1. This new patchset sends a RST in addition to clearing state.
   This is compliant behaviour: it is the ABORT operation
   specified in RFC 793 [1]. Any app today can do this by
   enabling SO_LINGER with a timeout of 0 and calling close.
2. Multiple other operating systems implement this behaviour:
   - FreeBSD has had this since 5.4 in 2005 [2]. It is available
     to privileged userspace and there is a tool to use it [3].
   - The FreeBSD commit description states that the idea came
     from OpenBSD.
   - iOS has been administratively closing app sockets since
     iOS 4 [see 4, which states that a socket "might get
     reclaimed by the kernel" and after that will return EBADF].

The second concern was that userspace should not be in the
business of making reachability determinations for TCP sockets;
that job belongs to the kernel. But userspace makes reachability
determinations all the time. Most relevant to this patchset:
"-j REJECT --reject-with tcp-reset" has exactly the same
effect as SOCK_DESTROY, except it only does so when the app does
write or the kernel sends a keepalive, not when blocked on read.

Also, there are real use cases where the kernel does not have
enough information to know that a connection is now inoperable.
The kernel can know if a packet can't be routed, but in general
it won't if a TCP connection is dead in the water because it is
now routed to a network where its source address is no longer
valid [5][6]. 

Other concerns have been addressed in this version, as follows:

1. tcp_diag_destroy now does a proper RFC 793 ABORT, i.e., sends
   a RST to the peer. This is consistent with BSD's tcpdrop, and
   is more correct in general, even though in most use cases
   SOCK_DESTROY will only be called when sending a RST is no
   longer possible (e.g., the network has disconnected).
2. Blocking socket operations are interrupted with ECONNABORTED
   instead of ETIMEDOUT. This addresses Tom's point that
   ETIMEDOUT is vague and an explicit notification is needed.
   ECONNABORTED was chosen because it is consistent with BSD.
3. SOCK_DESTROY is placed behind an INET_DIAG_DESTROY
   configuration option, which is off by default.

[1] http://tools.ietf.org/html/rfc793#page-50
[2] http://svnweb.freebsd.org/base?view=revision&revision=141381
[3] https://www.freebsd.org/cgi/man.cgi?query=tcpdrop&sektion=8&manpath=FreeBSD+5.4-RELEASE
[4] https://developer.apple.com/library/ios/technotes/tn2277/_index.html#//apple_ref/doc/uid/DTS40010841-CH1-SUBSECTION3
[5] http://www.spinics.net/lists/netdev/msg352775.html
[6] http://www.spinics.net/lists/netdev/msg352952.html

^ permalink raw reply	[flat|nested] 110+ messages in thread

* [PATCH v3 1/4] net: diag: split inet_diag_dump_one_icsk into two
  2015-12-01  2:32                                             ` Lorenzo Colitti
@ 2015-12-01  2:32                                               ` Lorenzo Colitti
  2015-12-01  2:32                                               ` [PATCH v3 2/4] net: diag: Add the ability to destroy a socket from userspace Lorenzo Colitti
                                                                 ` (3 subsequent siblings)
  4 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-01  2:32 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

Currently, inet_diag_dump_one_icsk finds a socket and then dumps
its information to userspace. Split it into a part that finds the
socket and a part that dumps the information.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  5 +++++
 net/ipv4/inet_diag.c      | 42 +++++++++++++++++++++++++++---------------
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0..e7032f04 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -3,6 +3,7 @@
 
 #include <uapi/linux/inet_diag.h>
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a6..cfabb8f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 				  nlmsg_flags, unlh);
 }
 
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-			    struct sk_buff *in_skb,
-			    const struct nlmsghdr *nlh,
-			    const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req)
 {
-	struct net *net = sock_net(in_skb->sk);
-	struct sk_buff *rep;
 	struct sock *sk;
-	int err;
 
-	err = -EINVAL;
 	if (req->sdiag_family == AF_INET)
 		sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
 				 req->id.idiag_dport, req->id.idiag_src[0],
@@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 				  req->id.idiag_if);
 #endif
 	else
-		goto out_nosk;
+		return ERR_PTR(-EINVAL);
 
-	err = -ENOENT;
 	if (!sk)
-		goto out_nosk;
+		return ERR_PTR(-ENOENT);
 
-	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-	if (err)
-		goto out;
+	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+		sock_gen_put(sk);
+		return ERR_PTR(-ENOENT);
+	}
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+			    struct sk_buff *in_skb,
+			    const struct nlmsghdr *nlh,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *rep;
+	struct sock *sk;
+	int err;
+
+	sk = inet_diag_find_one_icsk(net, hashinfo, req);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
 
 	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 	if (!rep) {
@@ -409,7 +422,6 @@ out:
 	if (sk)
 		sock_gen_put(sk);
 
-out_nosk:
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 2/4] net: diag: Add the ability to destroy a socket from userspace.
  2015-12-01  2:32                                             ` Lorenzo Colitti
  2015-12-01  2:32                                               ` [PATCH v3 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
@ 2015-12-01  2:32                                               ` Lorenzo Colitti
  2015-12-01  2:32                                               ` [PATCH v3 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
                                                                 ` (2 subsequent siblings)
  4 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-01  2:32 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This allows a privileged userspace process, such as a connection
manager or system administration tool, to close sockets belonging
to other apps when the network they were established on has
disconnected. It is needed on laptops and mobile hosts to ensure
that network switches / disconnects do not result in applications
being blocked for long periods of time (minutes) in read or
connect calls on TCP sockets that will never succeed because the
IP address they are bound to is no longer on the system. Closing
the sockets causes these calls to fail fast and allows the apps
to reconnect on another network.

For many years Android kernels have supported this via an
out-of-tree SIOCKILLADDR ioctl that is called on every
RTM_DELADDR event, but this solution is cleaner, more robust and
more flexible: the connection manager can iterate over all
connections on the deleted IP address and close all of them. It
can also be used to close all sockets opened by a given app
process, for example if the user has restricted that app from
using the network.

This patch adds a SOCK_DESTROY operation and a destroy function
pointer to sock_diag_handler. It does not include any
implementation code.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/sock_diag.h      |  1 +
 include/uapi/linux/sock_diag.h |  1 +
 net/core/sock_diag.c           | 11 ++++++++---
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index fddebc6..e15e8e2 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -15,6 +15,7 @@ struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
 	int (*get_info)(struct sk_buff *skb, struct sock *sk);
+	int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 49230d3..bae2d80 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY 21
 
 struct sock_diag_req {
 	__u8	sdiag_family;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d..64e3d81 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int err;
 	struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	hndl = sock_diag_handlers[req->sdiag_family];
 	if (hndl == NULL)
 		err = -ENOENT;
-	else
+	else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
 		err = hndl->dump(skb, nlh);
+	else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
+		err = hndl->destroy(skb, nlh);
+	else
+		err = -EOPNOTSUPP;
 	mutex_unlock(&sock_diag_table_mutex);
 
 	return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		return ret;
 	case SOCK_DIAG_BY_FAMILY:
-		return __sock_diag_rcv_msg(skb, nlh);
+	case SOCK_DESTROY:
+		return __sock_diag_cmd(skb, nlh);
 	default:
 		return -EINVAL;
 	}
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 3/4] net: diag: Support SOCK_DESTROY for inet sockets.
  2015-12-01  2:32                                             ` Lorenzo Colitti
  2015-12-01  2:32                                               ` [PATCH v3 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
  2015-12-01  2:32                                               ` [PATCH v3 2/4] net: diag: Add the ability to destroy a socket from userspace Lorenzo Colitti
@ 2015-12-01  2:32                                               ` Lorenzo Colitti
  2015-12-01  2:32                                               ` [PATCH v3 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
  2015-12-01  2:53                                               ` Add a SOCK_DESTROY operation to close sockets from userspace Tom Herbert
  4 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-01  2:32 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This passes the SOCK_DESTROY operation to the underlying
protocol diag handler, or returns -EINVAL if that handler does
not define a destroy operation.

Most of this patch is just renaming functions. This is not
strictly necessary, but it would be fairly counterintuitive to
have the code to destroy inet sockets be in a function whose name
starts with inet_diag_get.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  4 ++++
 net/ipv4/inet_diag.c      | 23 +++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index e7032f04..7c27fa1 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,6 +24,10 @@ struct inet_diag_handler {
 	void		(*idiag_get_info)(struct sock *sk,
 					  struct inet_diag_msg *r,
 					  void *info);
+
+	int		(*destroy)(struct sk_buff *in_skb,
+				   const struct inet_diag_req_v2 *req);
+
 	__u16		idiag_type;
 	__u16		idiag_info_size;
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cfabb8f..8bb8e7a 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -426,7 +426,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
 			       const struct inet_diag_req_v2 *req)
 {
@@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	handler = inet_diag_lock_handler(req->sdiag_protocol);
 	if (IS_ERR(handler))
 		err = PTR_ERR(handler);
-	else
+	else if (cmd == SOCK_DIAG_BY_FAMILY)
 		err = handler->dump_one(in_skb, nlh, req);
+	else if (cmd == SOCK_DESTROY && handler->destroy)
+		err = handler->destroy(in_skb, req);
+	else
+		err = -EOPNOTSUPP;
 	inet_diag_unlock_handler(handler);
 
 	return err;
@@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 	req.idiag_states = rc->idiag_states;
 	req.id = rc->id;
 
-	return inet_diag_get_exact(in_skb, nlh, &req);
+	return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
 }
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact_compat(skb, nlh);
 }
 
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 {
 	int hdrlen = sizeof(struct inet_diag_req_v2);
 	struct net *net = sock_net(skb->sk);
@@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
 
-	if (h->nlmsg_flags & NLM_F_DUMP) {
+	if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+	    h->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(h, hdrlen)) {
 			struct nlattr *attr;
 
@@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 		}
 	}
 
-	return inet_diag_get_exact(skb, h, nlmsg_data(h));
+	return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
 }
 
 static
@@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
 
 static const struct sock_diag_handler inet_diag_handler = {
 	.family = AF_INET,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
 	.family = AF_INET6,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 4/4] net: diag: Support destroying TCP sockets.
  2015-12-01  2:32                                             ` Lorenzo Colitti
                                                                 ` (2 preceding siblings ...)
  2015-12-01  2:32                                               ` [PATCH v3 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
@ 2015-12-01  2:32                                               ` Lorenzo Colitti
  2015-12-01  6:23                                                 ` kbuild test robot
  2015-12-01  2:53                                               ` Add a SOCK_DESTROY operation to close sockets from userspace Tom Herbert
  4 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-01  2:32 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This implements SOCK_DESTROY for TCP sockets. It causes all
blocking calls on the socket to fail fast with ECONNABORTED and
causes a protocol close of the socket. It informs the other end
of the connection by sending a RST, i.e., initiating a TCP ABORT
as per RFC 793. ECONNABORTED was chosen for consistency with
FreeBSD.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 net/ipv4/Kconfig    | 13 +++++++++++++
 net/ipv4/tcp_diag.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 416dfa0..31c4496 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -436,6 +436,19 @@ config INET_UDP_DIAG
 	  Support for UDP socket monitoring interface used by the ss tool.
 	  If unsure, say Y.
 
+config INET_DIAG_DESTROY
+	bool "INET: allow privileged process to administratively close sockets"
+	depends on INET_DIAG && (IPV6 || IPV6=n)
+	default n
+	---help---
+	  Provides a SOCK_DESTROY operation that allows privileged processes
+	  (e.g., a connection manager or a network administration tool such as
+	  ss) to close sockets opened by other processes. Closing a socket in
+	  this way interrupts any blocking read/writes/connect operations on
+	  the socket and causes future socket calls to behave as if the socket
+	  had been disconnected.
+	  If unsure, say N.
+
 menuconfig TCP_CONG_ADVANCED
 	bool "TCP: advanced congestion control"
 	---help---
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index b316040..31d3e0c 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
 #include <linux/inet_diag.h>
 
 #include <linux/tcp.h>
@@ -46,12 +47,57 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
 
+#if IS_ENABLED(CONFIG_INET_DIAG_DESTROY)
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct sock *sk;
+	struct net *net = sock_net(in_skb->sk);
+
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	if (!sk_fullsock(sk)) {
+		sock_gen_put(sk);
+		return -EOPNOTSUPP;
+	}
+
+	/* Don't race with userspace socket closes such as tcp_close. */
+	lock_sock(sk);
+
+	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		smp_wmb();  /* Be consistent with tcp_reset */
+		sk->sk_err = ECONNABORTED;
+		sk->sk_error_report(sk);
+		tcp_send_active_reset(sk, GFP_ATOMIC);
+		tcp_done(sk);
+	}
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+#endif
+
 static const struct inet_diag_handler tcp_diag_handler = {
 	.dump		 = tcp_diag_dump,
 	.dump_one	 = tcp_diag_dump_one,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
 	.idiag_info_size = sizeof(struct tcp_info),
+#if IS_ENABLED(CONFIG_INET_DIAG_DESTROY)
+	.destroy	 = tcp_diag_destroy,
+#endif
 };
 
 static int __init tcp_diag_init(void)
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-12-01  2:32                                             ` Lorenzo Colitti
                                                                 ` (3 preceding siblings ...)
  2015-12-01  2:32                                               ` [PATCH v3 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
@ 2015-12-01  2:53                                               ` Tom Herbert
  2015-12-02 15:18                                                 ` Lorenzo Colitti
  4 siblings, 1 reply; 110+ messages in thread
From: Tom Herbert @ 2015-12-01  2:53 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Linux Kernel Network Developers, David S. Miller,
	Hannes Frederic Sowa, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski

On Mon, Nov 30, 2015 at 6:32 PM, Lorenzo Colitti <lorenzo@google.com> wrote:
> Here is an updated version of the SOCK_DESTROY patch
> incorporating some of the feedback received.
>
> There were two substantial concerns expressed on the approach
> taken in this patch. The first was that it allows applications
> to cause the Linux TCP stack to behave improperly. I believe
> this is addressed as follows:
>
> 1. This new patchset sends a RST in addition to clearing state.
>    This is compliant behaviour: it is the ABORT operation
>    specified in RFC 793 [1]. Any app today can do this by
>    enabling SO_LINGER with a timeout of 0 and calling close.
> 2. Multiple other operating systems implement this behaviour:
>    - FreeBSD has had this since 5.4 in 2005 [2]. It is available
>      to privileged userspace and there is a tool to use it [3].
>    - The FreeBSD commit description states that the idea came
>      from OpenBSD.
>    - iOS has been administratively closing app sockets since
>      iOS 4 [see 4, which states that a socket "might get
>      reclaimed by the kernel" and after that will return EBADF].
>
> The second concern was that userspace should not be in the
> business of making reachability determinations for TCP sockets;
> that job belongs to the kernel. But userspace makes reachability
> determinations all the time. Most relevant to this patchset:
> "-j REJECT --reject-with tcp-reset" has exactly the same
> effect as SOCK_DESTROY, except it only does so when the app does
> write or the kernel sends a keepalive, not when blocked on read.
>
> Also, there are real use cases where the kernel does not have
> enough information to know that a connection is now inoperable.
> The kernel can know if a packet can't be routed, but in general
> it won't if a TCP connection is dead in the water because it is
> now routed to a network where its source address is no longer
> valid [5][6].
>
> Other concerns have been addressed in this version, as follows:
>
> 1. tcp_diag_destroy now does a proper RFC 793 ABORT, i.e., sends
>    a RST to the peer. This is consistent with BSD's tcpdrop, and
>    is more correct in general, even though in most use cases
>    SOCK_DESTROY will only be called when sending a RST is no
>    longer possible (e.g., the network has disconnected).
> 2. Blocking socket operations are interrupted with ECONNABORTED
>    instead of ETIMEDOUT. This addresses Tom's point that
>    ETIMEDOUT is vague and an explicit notification is needed.
>    ECONNABORTED was chosen because it is consistent with BSD.
> 3. SOCK_DESTROY is placed behind an INET_DIAG_DESTROY
>    configuration option, which is off by default.
>
Lorenzo,

This is awesome! The only thing I would suggest is to make
sock_destroy a proto_op so that it can be called from within the
kernel. This should be preferred to externally calling tcp_done
(hopefully we can unexport that symbol then).

Tom

> [1] http://tools.ietf.org/html/rfc793#page-50
> [2] http://svnweb.freebsd.org/base?view=revision&revision=141381
> [3] https://www.freebsd.org/cgi/man.cgi?query=tcpdrop&sektion=8&manpath=FreeBSD+5.4-RELEASE
> [4] https://developer.apple.com/library/ios/technotes/tn2277/_index.html#//apple_ref/doc/uid/DTS40010841-CH1-SUBSECTION3
> [5] http://www.spinics.net/lists/netdev/msg352775.html
> [6] http://www.spinics.net/lists/netdev/msg352952.html
>

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 4/4] net: diag: Support destroying TCP sockets.
  2015-12-01  2:32                                               ` [PATCH v3 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
@ 2015-12-01  6:23                                                 ` kbuild test robot
  2015-12-01  7:12                                                   ` Lorenzo Colitti
  0 siblings, 1 reply; 110+ messages in thread
From: kbuild test robot @ 2015-12-01  6:23 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: kbuild-all, netdev, davem, hannes, eric.dumazet, ek, tom,
	zenczykowski, Lorenzo Colitti

[-- Attachment #1: Type: text/plain, Size: 814 bytes --]

Hi Lorenzo,

[auto build test ERROR on net/master]
[also build test ERROR on v4.4-rc3 next-20151127]

url:    https://github.com/0day-ci/linux/commits/Lorenzo-Colitti/net-diag-split-inet_diag_dump_one_icsk-into-two/20151201-103636
config: i386-allmodconfig (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

Note: the linux-review/Lorenzo-Colitti/net-diag-split-inet_diag_dump_one_icsk-into-two/20151201-103636 HEAD 6cbafe8ef93ff2ce6969e4e07c792a326e2ef09b builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

>> ERROR: "tcp_send_active_reset" undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 52574 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 4/4] net: diag: Support destroying TCP sockets.
  2015-12-01  6:23                                                 ` kbuild test robot
@ 2015-12-01  7:12                                                   ` Lorenzo Colitti
  0 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-01  7:12 UTC (permalink / raw)
  To: kbuild test robot
  Cc: kbuild-all, netdev, David Miller, Hannes Frederic Sowa,
	Eric Dumazet, Erik Kline, Tom Herbert, Maciej Żenczykowski

On Tue, Dec 1, 2015 at 3:23 PM, kbuild test robot <lkp@intel.com> wrote:
>>> ERROR: "tcp_send_active_reset" undefined!

I used tcp_send_active_reset from tcp_diag.ko without an
EXPORT_SYMBOL_GPL in net/ipv4/tcp_output.c. Fixed in patch v4.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-12-01  2:53                                               ` Add a SOCK_DESTROY operation to close sockets from userspace Tom Herbert
@ 2015-12-02 15:18                                                 ` Lorenzo Colitti
  2015-12-02 16:12                                                   ` Tom Herbert
  0 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-02 15:18 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Kernel Network Developers, David S. Miller,
	Hannes Frederic Sowa, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski

On Tue, Dec 1, 2015 at 11:53 AM, Tom Herbert <tom@herbertland.com> wrote:
> This is awesome! The only thing I would suggest is to make
> sock_destroy a proto_op so that it can be called from within the
> kernel. This should be preferred to externally calling tcp_done
> (hopefully we can unexport that symbol then).

I'm not sure there is value in making it a proto op. The sock_diag
code that finds the socket based on the netlink diag request is
specific to both the protocol family (e.g., the sock_diag structures
for inet and unix differ) and protocol (e.g., TCP, UDPv4 and UDPv6 use
different hash tables).

So even if we add a proto_op (or struct proto function pointer) to
destroy a socket, we can't just have a generic function (or even an
inet-specific function) that just finds a socket and does "return
sk->sk_prot->diag_destroy(sk)" regardless of what protocol that socket
is.

The code does look better if the protocol-specific code is moved to a
new tcp_abort function that just takes a pointer to the sk.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-12-02 15:18                                                 ` Lorenzo Colitti
@ 2015-12-02 16:12                                                   ` Tom Herbert
  2015-12-02 16:30                                                     ` Lorenzo Colitti
  2015-12-14 17:29                                                     ` Lorenzo Colitti
  0 siblings, 2 replies; 110+ messages in thread
From: Tom Herbert @ 2015-12-02 16:12 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Linux Kernel Network Developers, David S. Miller,
	Hannes Frederic Sowa, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski

On Wed, Dec 2, 2015 at 7:18 AM, Lorenzo Colitti <lorenzo@google.com> wrote:
> On Tue, Dec 1, 2015 at 11:53 AM, Tom Herbert <tom@herbertland.com> wrote:
>> This is awesome! The only thing I would suggest is to make
>> sock_destroy a proto_op so that it can be called from within the
>> kernel. This should be preferred to externally calling tcp_done
>> (hopefully we can unexport that symbol then).
>
> I'm not sure there is value in making it a proto op. The sock_diag
> code that finds the socket based on the netlink diag request is
> specific to both the protocol family (e.g., the sock_diag structures
> for inet and unix differ) and protocol (e.g., TCP, UDPv4 and UDPv6 use
> different hash tables).
>
> So even if we add a proto_op (or struct proto function pointer) to
> destroy a socket, we can't just have a generic function (or even an
> inet-specific function) that just finds a socket and does "return
> sk->sk_prot->diag_destroy(sk)" regardless of what protocol that socket
> is.
>
> The code does look better if the protocol-specific code is moved to a
> new tcp_abort function that just takes a pointer to the sk.

The in kernel caller would already have a pointer to the socket so the
call would just be sk->sk_prot->destroy(sk). That call should make its
way down to same backend function in TCP that the diag path would use.
We need this in the kernel for the same reasons you want this in
userspace, if a third party hits an unrecoverable error on the socket
it needs to signal this condition to the owner of the socket but can't
actually close the socket (useful to RDS, KCM, probably
TLS-in-kernel).

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-12-02 16:12                                                   ` Tom Herbert
@ 2015-12-02 16:30                                                     ` Lorenzo Colitti
  2015-12-02 17:09                                                       ` Tom Herbert
  2015-12-14 17:29                                                     ` Lorenzo Colitti
  1 sibling, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-02 16:30 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Linux Kernel Network Developers, David S. Miller,
	Hannes Frederic Sowa, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski

On Thu, Dec 3, 2015 at 1:12 AM, Tom Herbert <tom@herbertland.com> wrote:
> The in kernel caller would already have a pointer to the socket so the
> call would just be sk->sk_prot->destroy(sk). That call should make its
> way down to same backend function in TCP that the diag path would use.
> We need this in the kernel for the same reasons you want this in
> userspace, if a third party hits an unrecoverable error on the socket
> it needs to signal this condition to the owner of the socket but can't
> actually close the socket (useful to RDS, KCM, probably
> TLS-in-kernel).

Oh, I see, yes. You're thinking of a case where multiple protocols
support this destroy operation. An in-kernel caller has a pointer to a
sk that it wants to unwedge, but doesn't need to know which destroy
function to use for which protocol, it just does "if
(sk->sk_prot->destroy != NULL) sk->sk_prot_destroy(sk)". I think that
should be a trivial change on top of what I have.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-12-02 16:30                                                     ` Lorenzo Colitti
@ 2015-12-02 17:09                                                       ` Tom Herbert
  0 siblings, 0 replies; 110+ messages in thread
From: Tom Herbert @ 2015-12-02 17:09 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Linux Kernel Network Developers, David S. Miller,
	Hannes Frederic Sowa, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski

On Wed, Dec 2, 2015 at 8:30 AM, Lorenzo Colitti <lorenzo@google.com> wrote:
> On Thu, Dec 3, 2015 at 1:12 AM, Tom Herbert <tom@herbertland.com> wrote:
>> The in kernel caller would already have a pointer to the socket so the
>> call would just be sk->sk_prot->destroy(sk). That call should make its
>> way down to same backend function in TCP that the diag path would use.
>> We need this in the kernel for the same reasons you want this in
>> userspace, if a third party hits an unrecoverable error on the socket
>> it needs to signal this condition to the owner of the socket but can't
>> actually close the socket (useful to RDS, KCM, probably
>> TLS-in-kernel).
>
> Oh, I see, yes. You're thinking of a case where multiple protocols
> support this destroy operation. An in-kernel caller has a pointer to a
> sk that it wants to unwedge, but doesn't need to know which destroy
> function to use for which protocol, it just does "if
> (sk->sk_prot->destroy != NULL) sk->sk_prot_destroy(sk)". I think that
> should be a trivial change on top of what I have.

Thanks, and that's a good description for the commit log :-)

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: Add a SOCK_DESTROY operation to close sockets from userspace
  2015-12-02 16:12                                                   ` Tom Herbert
  2015-12-02 16:30                                                     ` Lorenzo Colitti
@ 2015-12-14 17:29                                                     ` Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 1/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
                                                                         ` (3 more replies)
  1 sibling, 4 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-14 17:29 UTC (permalink / raw)
  To: netdev; +Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski

Here is a an updated version. The external behaviour of this
patchset is the same as v4; for more details, see that cover
letter at http://www.spinics.net/lists/netdev/msg354303.html .

This version fixes two bugs spotted by Eric, and implements Tom's
suggestion of making the socket destroy code a per-protocol
function pointer so that in-kernel callers can use it. The
resulting code is a bit longer but a bit more generic, and
exposes fewer TCP implementation details.

The operation is still called SOCK_DESTROY, but given that its
main implementation is the TCP ABORT operation, and that the
word "destroy" is used in the inet_csk code to refer to freeing
a socket, and in the inet_diag code to refer to broadcasts about
sockets being freed, perhaps it could be renamed to SOCK_ABORT.

Tested using net_test. Tests check that TCP resets are sent in
the right states, that accept(), read() and connect() are
interrupted, that closing sockets makes the socket unusable,
and that destroying non-TCP sockets returns EOPNOTSUPP. Tests
at https://android-review.googlesource.com/#/c/187491/ .

^ permalink raw reply	[flat|nested] 110+ messages in thread

* [PATCH v5 1/4] net: diag: Add the ability to destroy a socket.
  2015-12-14 17:29                                                     ` Lorenzo Colitti
@ 2015-12-14 17:29                                                       ` Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 2/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
                                                                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-14 17:29 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This adds a diag_destroy pointer to struct proto that allows a
socket to be administratively closed without any action from the
process owning the socket or the socket protocol.

This allows a privileged userspace process, such as a connection
manager or system administration tool, to close sockets belonging
to other apps when the network they were established on has
disconnected. It is needed on laptops and mobile hosts to ensure
that network switches / disconnects do not result in applications
being blocked for long periods of time (minutes) in read or
connect calls on TCP sockets that will never succeed because the
IP address they are bound to is no longer on the system. Closing
the sockets causes these calls to fail fast and allows the apps
to reconnect on another network.

For many years Android kernels have supported this via an
out-of-tree SIOCKILLADDR ioctl that is called on every
RTM_DELADDR event, but this solution is cleaner, more robust and
more flexible: the connection manager can iterate over all
connections on the deleted IP address and close all of them. It
can also be used to close all sockets opened by a given app
process, for example if the user has restricted that app from
using the network.

It also allows in-kernel callers to perform the same sort of
operation by invoking sk->sk_prot->diag_destroy(sk) directly.

This patch adds a SOCK_DESTROY operation, a destroy function
pointer to sock_diag_handler, and a diag_destroy function
pointer.  It does not include any implementation code.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/sock_diag.h      |  2 ++
 include/net/sock.h             |  1 +
 include/uapi/linux/sock_diag.h |  1 +
 net/core/sock_diag.c           | 23 ++++++++++++++++++++---
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index fddebc6..15072fc 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -15,6 +15,7 @@ struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
 	int (*get_info)(struct sk_buff *skb, struct sock *sk);
+	int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk)
 }
 void sock_diag_broadcast_destroy(struct sock *sk);
 
+int sock_diag_destroy(struct sock *sk);
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 0ca22b0..a1b30d7f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1060,6 +1060,7 @@ struct proto {
 	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
 	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
+	int			(*diag_destroy)(struct sock *sk);
 };
 
 int proto_register(struct proto *prot, int alloc_slab);
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 49230d3..bae2d80 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY 21
 
 struct sock_diag_req {
 	__u8	sdiag_family;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d..967d89f 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int err;
 	struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	hndl = sock_diag_handlers[req->sdiag_family];
 	if (hndl == NULL)
 		err = -ENOENT;
-	else
+	else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
 		err = hndl->dump(skb, nlh);
+	else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
+		err = hndl->destroy(skb, nlh);
+	else
+		err = -EOPNOTSUPP;
 	mutex_unlock(&sock_diag_table_mutex);
 
 	return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		return ret;
 	case SOCK_DIAG_BY_FAMILY:
-		return __sock_diag_rcv_msg(skb, nlh);
+	case SOCK_DESTROY:
+		return __sock_diag_cmd(skb, nlh);
 	default:
 		return -EINVAL;
 	}
@@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group)
 	return 0;
 }
 
+int sock_diag_destroy(struct sock *sk)
+{
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!sk->sk_prot->diag_destroy)
+		return -EOPNOTSUPP;
+
+	return sk->sk_prot->diag_destroy(sk);
+}
+EXPORT_SYMBOL_GPL(sock_diag_destroy);
+
 static int __net_init diag_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v5 2/4] net: diag: split inet_diag_dump_one_icsk into two
  2015-12-14 17:29                                                     ` Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 1/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
@ 2015-12-14 17:29                                                       ` Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
  3 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-14 17:29 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

Currently, inet_diag_dump_one_icsk finds a socket and then dumps
its information to userspace. Split it into a part that finds the
socket and a part that dumps the information.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  5 +++++
 net/ipv4/inet_diag.c      | 42 +++++++++++++++++++++++++++---------------
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0..e7032f04 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -3,6 +3,7 @@
 
 #include <uapi/linux/inet_diag.h>
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a6..cfabb8f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 				  nlmsg_flags, unlh);
 }
 
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-			    struct sk_buff *in_skb,
-			    const struct nlmsghdr *nlh,
-			    const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req)
 {
-	struct net *net = sock_net(in_skb->sk);
-	struct sk_buff *rep;
 	struct sock *sk;
-	int err;
 
-	err = -EINVAL;
 	if (req->sdiag_family == AF_INET)
 		sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
 				 req->id.idiag_dport, req->id.idiag_src[0],
@@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 				  req->id.idiag_if);
 #endif
 	else
-		goto out_nosk;
+		return ERR_PTR(-EINVAL);
 
-	err = -ENOENT;
 	if (!sk)
-		goto out_nosk;
+		return ERR_PTR(-ENOENT);
 
-	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-	if (err)
-		goto out;
+	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+		sock_gen_put(sk);
+		return ERR_PTR(-ENOENT);
+	}
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+			    struct sk_buff *in_skb,
+			    const struct nlmsghdr *nlh,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *rep;
+	struct sock *sk;
+	int err;
+
+	sk = inet_diag_find_one_icsk(net, hashinfo, req);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
 
 	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 	if (!rep) {
@@ -409,7 +422,6 @@ out:
 	if (sk)
 		sock_gen_put(sk);
 
-out_nosk:
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v5 3/4] net: diag: Support SOCK_DESTROY for inet sockets.
  2015-12-14 17:29                                                     ` Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 1/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 2/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
@ 2015-12-14 17:29                                                       ` Lorenzo Colitti
  2015-12-14 17:29                                                       ` [PATCH v5 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
  3 siblings, 0 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-14 17:29 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This passes the SOCK_DESTROY operation to the underlying
protocol diag handler, or returns -EINVAL if that handler does
not define a destroy operation.

Most of this patch is just renaming functions. This is not
strictly necessary, but it would be fairly counterintuitive to
have the code to destroy inet sockets be in a function whose name
starts with inet_diag_get.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  4 ++++
 net/ipv4/inet_diag.c      | 23 +++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index e7032f04..7c27fa1 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,6 +24,10 @@ struct inet_diag_handler {
 	void		(*idiag_get_info)(struct sock *sk,
 					  struct inet_diag_msg *r,
 					  void *info);
+
+	int		(*destroy)(struct sk_buff *in_skb,
+				   const struct inet_diag_req_v2 *req);
+
 	__u16		idiag_type;
 	__u16		idiag_info_size;
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cfabb8f..8bb8e7a 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -426,7 +426,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
 			       const struct inet_diag_req_v2 *req)
 {
@@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	handler = inet_diag_lock_handler(req->sdiag_protocol);
 	if (IS_ERR(handler))
 		err = PTR_ERR(handler);
-	else
+	else if (cmd == SOCK_DIAG_BY_FAMILY)
 		err = handler->dump_one(in_skb, nlh, req);
+	else if (cmd == SOCK_DESTROY && handler->destroy)
+		err = handler->destroy(in_skb, req);
+	else
+		err = -EOPNOTSUPP;
 	inet_diag_unlock_handler(handler);
 
 	return err;
@@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 	req.idiag_states = rc->idiag_states;
 	req.id = rc->id;
 
-	return inet_diag_get_exact(in_skb, nlh, &req);
+	return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
 }
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact_compat(skb, nlh);
 }
 
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 {
 	int hdrlen = sizeof(struct inet_diag_req_v2);
 	struct net *net = sock_net(skb->sk);
@@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
 
-	if (h->nlmsg_flags & NLM_F_DUMP) {
+	if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+	    h->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(h, hdrlen)) {
 			struct nlattr *attr;
 
@@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 		}
 	}
 
-	return inet_diag_get_exact(skb, h, nlmsg_data(h));
+	return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
 }
 
 static
@@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
 
 static const struct sock_diag_handler inet_diag_handler = {
 	.family = AF_INET,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
 	.family = AF_INET6,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v5 4/4] net: diag: Support destroying TCP sockets.
  2015-12-14 17:29                                                     ` Lorenzo Colitti
                                                                         ` (2 preceding siblings ...)
  2015-12-14 17:29                                                       ` [PATCH v5 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
@ 2015-12-14 17:29                                                       ` Lorenzo Colitti
  2015-12-14 17:51                                                         ` kbuild test robot
                                                                           ` (3 more replies)
  3 siblings, 4 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-14 17:29 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This implements SOCK_DESTROY for TCP sockets. It causes all
blocking calls on the socket to fail fast with ECONNABORTED and
causes a protocol close of the socket. It informs the other end
of the connection by sending a RST, i.e., initiating a TCP ABORT
as per RFC 793. ECONNABORTED was chosen for consistency with
FreeBSD.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/net/tcp.h   |  4 ++++
 net/ipv4/Kconfig    | 13 +++++++++++++
 net/ipv4/tcp.c      | 34 ++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_diag.c | 19 +++++++++++++++++++
 net/ipv4/tcp_ipv4.c |  3 +++
 net/ipv6/tcp_ipv6.c |  3 +++
 6 files changed, 76 insertions(+)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f80e74c..505cef5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1170,6 +1170,10 @@ void tcp_set_state(struct sock *sk, int state);
 
 void tcp_done(struct sock *sk);
 
+#if CONFIG_INET_DIAG_DESTROY
+int tcp_abort(struct sock *sk);
+#endif
+
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
 	rx_opt->dsack = 0;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 416dfa0..31c4496 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -436,6 +436,19 @@ config INET_UDP_DIAG
 	  Support for UDP socket monitoring interface used by the ss tool.
 	  If unsure, say Y.
 
+config INET_DIAG_DESTROY
+	bool "INET: allow privileged process to administratively close sockets"
+	depends on INET_DIAG && (IPV6 || IPV6=n)
+	default n
+	---help---
+	  Provides a SOCK_DESTROY operation that allows privileged processes
+	  (e.g., a connection manager or a network administration tool such as
+	  ss) to close sockets opened by other processes. Closing a socket in
+	  this way interrupts any blocking read/writes/connect operations on
+	  the socket and causes future socket calls to behave as if the socket
+	  had been disconnected.
+	  If unsure, say N.
+
 menuconfig TCP_CONG_ADVANCED
 	bool "TCP: advanced congestion control"
 	---help---
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c82cca1..fc5068d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3080,6 +3080,40 @@ void tcp_done(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_done);
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+int tcp_abort(struct sock *sk)
+{
+	if (!sk_fullsock(sk)) {
+		sock_gen_put(sk);
+		return -EOPNOTSUPP;
+	}
+
+	/* Don't race with userspace socket closes such as tcp_close. */
+	lock_sock(sk);
+
+	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_err = ECONNABORTED;
+		/* This barrier is coupled with smp_rmb() in tcp_poll() */
+		smp_wmb();
+		sk->sk_error_report(sk);
+		if (tcp_need_reset(sk->sk_state))
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+		tcp_done(sk);
+	}
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_abort);
+#endif
+
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index b316040..8d435f17 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
+#include <linux/sock_diag.h>
 #include <linux/inet_diag.h>
 
 #include <linux/tcp.h>
@@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	return sock_diag_destroy(sk);
+}
+#endif
+
 static const struct inet_diag_handler tcp_diag_handler = {
 	.dump		 = tcp_diag_dump,
 	.dump_one	 = tcp_diag_dump_one,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
 	.idiag_info_size = sizeof(struct tcp_info),
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy	 = tcp_diag_destroy,
+#endif
 };
 
 static int __init tcp_diag_init(void)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index db00343..5e28bf1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2342,6 +2342,9 @@ struct proto tcp_prot = {
 	.destroy_cgroup		= tcp_destroy_cgroup,
 	.proto_cgroup		= tcp_proto_cgroup,
 #endif
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.diag_destroy		= tcp_abort,
+#endif
 };
 EXPORT_SYMBOL(tcp_prot);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c16e3fb..289c5db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1890,6 +1890,9 @@ struct proto tcpv6_prot = {
 	.proto_cgroup		= tcp_proto_cgroup,
 #endif
 	.clear_sk		= tcp_v6_clear_sk,
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.diag_destroy		= tcp_abort,
+#endif
 };
 
 static const struct inet6_protocol tcpv6_protocol = {
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP sockets.
  2015-12-14 17:29                                                       ` [PATCH v5 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
@ 2015-12-14 17:51                                                         ` kbuild test robot
  2015-12-14 17:52                                                         ` Tom Herbert
                                                                           ` (2 subsequent siblings)
  3 siblings, 0 replies; 110+ messages in thread
From: kbuild test robot @ 2015-12-14 17:51 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: kbuild-all, netdev, davem, hannes, eric.dumazet, ek, tom,
	zenczykowski, Lorenzo Colitti

[-- Attachment #1: Type: text/plain, Size: 1944 bytes --]

Hi Lorenzo,

[auto build test WARNING on net/master]
[also build test WARNING on v4.4-rc5]
[cannot apply to next-20151214]

url:    https://github.com/0day-ci/linux/commits/Lorenzo-Colitti/net-diag-Add-the-ability-to-destroy-a-socket/20151215-013254
config: x86_64-randconfig-x013-12141150 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from net/ipv4/route.c:103:0:
>> include/net/tcp.h:1173:5: warning: "CONFIG_INET_DIAG_DESTROY" is not defined [-Wundef]
    #if CONFIG_INET_DIAG_DESTROY
        ^
   In file included from net/ipv4/route.c:103:0:
>> include/net/tcp.h:1173:5: warning: "CONFIG_INET_DIAG_DESTROY" is not defined [-Wundef]
    #if CONFIG_INET_DIAG_DESTROY
        ^
--
   In file included from net/ipv4/ip_forward.c:31:0:
>> include/net/tcp.h:1173:5: warning: "CONFIG_INET_DIAG_DESTROY" is not defined [-Wundef]
    #if CONFIG_INET_DIAG_DESTROY
        ^

vim +/CONFIG_INET_DIAG_DESTROY +1173 include/net/tcp.h

  1157	
  1158	bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
  1159	
  1160	#undef STATE_TRACE
  1161	
  1162	#ifdef STATE_TRACE
  1163	static const char *statename[]={
  1164		"Unused","Established","Syn Sent","Syn Recv",
  1165		"Fin Wait 1","Fin Wait 2","Time Wait", "Close",
  1166		"Close Wait","Last ACK","Listen","Closing"
  1167	};
  1168	#endif
  1169	void tcp_set_state(struct sock *sk, int state);
  1170	
  1171	void tcp_done(struct sock *sk);
  1172	
> 1173	#if CONFIG_INET_DIAG_DESTROY
  1174	int tcp_abort(struct sock *sk);
  1175	#endif
  1176	
  1177	static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
  1178	{
  1179		rx_opt->dsack = 0;
  1180		rx_opt->num_sacks = 0;
  1181	}

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 28096 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP sockets.
  2015-12-14 17:29                                                       ` [PATCH v5 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
  2015-12-14 17:51                                                         ` kbuild test robot
@ 2015-12-14 17:52                                                         ` Tom Herbert
  2015-12-14 18:03                                                         ` Eric Dumazet
  2015-12-14 19:37                                                         ` David Miller
  3 siblings, 0 replies; 110+ messages in thread
From: Tom Herbert @ 2015-12-14 17:52 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Linux Kernel Network Developers, David S. Miller,
	Hannes Frederic Sowa, Eric Dumazet, Erik Kline,
	Maciej Żenczykowski

On Mon, Dec 14, 2015 at 9:29 AM, Lorenzo Colitti <lorenzo@google.com> wrote:
> This implements SOCK_DESTROY for TCP sockets. It causes all
> blocking calls on the socket to fail fast with ECONNABORTED and
> causes a protocol close of the socket. It informs the other end
> of the connection by sending a RST, i.e., initiating a TCP ABORT
> as per RFC 793. ECONNABORTED was chosen for consistency with
> FreeBSD.
>
> Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
> ---
>  include/net/tcp.h   |  4 ++++
>  net/ipv4/Kconfig    | 13 +++++++++++++
>  net/ipv4/tcp.c      | 34 ++++++++++++++++++++++++++++++++++
>  net/ipv4/tcp_diag.c | 19 +++++++++++++++++++
>  net/ipv4/tcp_ipv4.c |  3 +++
>  net/ipv6/tcp_ipv6.c |  3 +++
>  6 files changed, 76 insertions(+)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index f80e74c..505cef5 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1170,6 +1170,10 @@ void tcp_set_state(struct sock *sk, int state);
>
>  void tcp_done(struct sock *sk);
>
> +#if CONFIG_INET_DIAG_DESTROY
> +int tcp_abort(struct sock *sk);
> +#endif
> +
>  static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
>  {
>         rx_opt->dsack = 0;
> diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
> index 416dfa0..31c4496 100644
> --- a/net/ipv4/Kconfig
> +++ b/net/ipv4/Kconfig
> @@ -436,6 +436,19 @@ config INET_UDP_DIAG
>           Support for UDP socket monitoring interface used by the ss tool.
>           If unsure, say Y.
>
> +config INET_DIAG_DESTROY
> +       bool "INET: allow privileged process to administratively close sockets"
> +       depends on INET_DIAG && (IPV6 || IPV6=n)
> +       default n
> +       ---help---
> +         Provides a SOCK_DESTROY operation that allows privileged processes
> +         (e.g., a connection manager or a network administration tool such as
> +         ss) to close sockets opened by other processes. Closing a socket in
> +         this way interrupts any blocking read/writes/connect operations on
> +         the socket and causes future socket calls to behave as if the socket
> +         had been disconnected.
> +         If unsure, say N.
> +
>  menuconfig TCP_CONG_ADVANCED
>         bool "TCP: advanced congestion control"
>         ---help---
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index c82cca1..fc5068d 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -3080,6 +3080,40 @@ void tcp_done(struct sock *sk)
>  }
>  EXPORT_SYMBOL_GPL(tcp_done);
>
> +#ifdef CONFIG_INET_DIAG_DESTROY

This is a general use function it should not be under a CONFIG.

> +int tcp_abort(struct sock *sk)

Please add an err argument for setting sk->err. A value of zero could
mean to use the default value (ECONNABORTED).

> +{
> +       if (!sk_fullsock(sk)) {
> +               sock_gen_put(sk);
> +               return -EOPNOTSUPP;
> +       }
> +
> +       /* Don't race with userspace socket closes such as tcp_close. */
> +       lock_sock(sk);
> +
> +       /* Don't race with BH socket closes such as inet_csk_listen_stop. */
> +       local_bh_disable();
> +       bh_lock_sock(sk);
> +
> +       if (!sock_flag(sk, SOCK_DEAD)) {
> +               sk->sk_err = ECONNABORTED;
> +               /* This barrier is coupled with smp_rmb() in tcp_poll() */
> +               smp_wmb();
> +               sk->sk_error_report(sk);
> +               if (tcp_need_reset(sk->sk_state))
> +                       tcp_send_active_reset(sk, GFP_ATOMIC);
> +               tcp_done(sk);
> +       }
> +
> +       bh_unlock_sock(sk);
> +       local_bh_enable();
> +       release_sock(sk);
> +       sock_put(sk);
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(tcp_abort);
> +#endif
> +
>  extern struct tcp_congestion_ops tcp_reno;
>
>  static __initdata unsigned long thash_entries;
> diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
> index b316040..8d435f17 100644
> --- a/net/ipv4/tcp_diag.c
> +++ b/net/ipv4/tcp_diag.c
> @@ -10,6 +10,8 @@
>   */
>
>  #include <linux/module.h>
> +#include <linux/net.h>
> +#include <linux/sock_diag.h>
>  #include <linux/inet_diag.h>
>
>  #include <linux/tcp.h>
> @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
>         return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
>  }
>
> +#ifdef CONFIG_INET_DIAG_DESTROY
> +static int tcp_diag_destroy(struct sk_buff *in_skb,
> +                           const struct inet_diag_req_v2 *req)
> +{
> +       struct net *net = sock_net(in_skb->sk);
> +       struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
> +
> +       if (IS_ERR(sk))
> +               return PTR_ERR(sk);
> +
> +       return sock_diag_destroy(sk);
> +}
> +#endif
> +
>  static const struct inet_diag_handler tcp_diag_handler = {
>         .dump            = tcp_diag_dump,
>         .dump_one        = tcp_diag_dump_one,
>         .idiag_get_info  = tcp_diag_get_info,
>         .idiag_type      = IPPROTO_TCP,
>         .idiag_info_size = sizeof(struct tcp_info),
> +#ifdef CONFIG_INET_DIAG_DESTROY
> +       .destroy         = tcp_diag_destroy,
> +#endif
>  };
>
>  static int __init tcp_diag_init(void)
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index db00343..5e28bf1 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -2342,6 +2342,9 @@ struct proto tcp_prot = {
>         .destroy_cgroup         = tcp_destroy_cgroup,
>         .proto_cgroup           = tcp_proto_cgroup,
>  #endif
> +#ifdef CONFIG_INET_DIAG_DESTROY
> +       .diag_destroy           = tcp_abort,
> +#endif
>  };
>  EXPORT_SYMBOL(tcp_prot);
>
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index c16e3fb..289c5db 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -1890,6 +1890,9 @@ struct proto tcpv6_prot = {
>         .proto_cgroup           = tcp_proto_cgroup,
>  #endif
>         .clear_sk               = tcp_v6_clear_sk,
> +#ifdef CONFIG_INET_DIAG_DESTROY
> +       .diag_destroy           = tcp_abort,
> +#endif
>  };
>
>  static const struct inet6_protocol tcpv6_protocol = {
> --
> 2.6.0.rc2.230.g3dd15c0
>

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP sockets.
  2015-12-14 17:29                                                       ` [PATCH v5 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
  2015-12-14 17:51                                                         ` kbuild test robot
  2015-12-14 17:52                                                         ` Tom Herbert
@ 2015-12-14 18:03                                                         ` Eric Dumazet
  2015-12-14 19:37                                                         ` David Miller
  3 siblings, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-12-14 18:03 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev, davem, hannes, ek, tom, zenczykowski

On Tue, 2015-12-15 at 02:29 +0900, Lorenzo Colitti wrote:

> +config INET_DIAG_DESTROY
> +	bool "INET: allow privileged process to administratively close sockets"
> +	depends on INET_DIAG && (IPV6 || IPV6=n)
> +	default n

I can see the effect of (IPV6 || IPv6=n) for a tristate,
but what is the intent for a bool ?

It seems INET_DIAG_DESTROY can be selected regardless of IPV6 being off,
on, or a module ?

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP sockets.
  2015-12-14 17:29                                                       ` [PATCH v5 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
                                                                           ` (2 preceding siblings ...)
  2015-12-14 18:03                                                         ` Eric Dumazet
@ 2015-12-14 19:37                                                         ` David Miller
  2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
  3 siblings, 1 reply; 110+ messages in thread
From: David Miller @ 2015-12-14 19:37 UTC (permalink / raw)
  To: lorenzo; +Cc: netdev, hannes, eric.dumazet, ek, tom, zenczykowski

From: Lorenzo Colitti <lorenzo@google.com>
Date: Tue, 15 Dec 2015 02:29:57 +0900

> +#if CONFIG_INET_DIAG_DESTROY

Config symbols are to be checked with "ifdef" not "if".

> +config INET_DIAG_DESTROY
> +	bool "INET: allow privileged process to administratively close sockets"
> +	depends on INET_DIAG && (IPV6 || IPV6=n)
> +	default n

As Eric Dumazet stated, this "(X || X=n)" construct only makes sense for
tristate Kconfig entries.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP socketsr
  2015-12-14 19:37                                                         ` David Miller
@ 2015-12-15 17:17                                                           ` Lorenzo Colitti
  2015-12-15 17:17                                                             ` [PATCH v6 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
                                                                               ` (5 more replies)
  0 siblings, 6 replies; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-15 17:17 UTC (permalink / raw)
  To: netdev; +Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski

Thanks for the reviews. This version fixes a few issues and
addresses Tom's comments on v5. Specifically:

1. As requested by Tom, tcp_abort is no longer behind a config
   option, and now allows the caller to specify the error with
   which to interrupt blocking operations. (The SOCK_DIAG
   codepath to close TCP sockets from userspace always uses
   ECONNABORTED.)

2. Config symbols are checked with "ifdef" not "if".

3. The boolean Kconfig option no longer depends on
   INET_DIAG && (IPV6 || IPV6=n) but only on INET_DIAG

The name of the operation is still SOCK_DESTROY, and the
functions that implement it still have "destroy" in their
name. I'm leaning towards changing this to SOCK_ABORT instead,
both to reduce confusion with e.g., sock_diag_destroy_group,
and because it seems more correct in general, at least for
TCP. Thoughts on the name?

^ permalink raw reply	[flat|nested] 110+ messages in thread

* [PATCH v6 1/4] net: diag: split inet_diag_dump_one_icsk into two
  2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
@ 2015-12-15 17:17                                                             ` Lorenzo Colitti
  2015-12-15 17:44                                                               ` Eric Dumazet
  2015-12-15 17:17                                                             ` [PATCH v6 2/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
                                                                               ` (4 subsequent siblings)
  5 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-15 17:17 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

Currently, inet_diag_dump_one_icsk finds a socket and then dumps
its information to userspace. Split it into a part that finds the
socket and a part that dumps the information.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  5 +++++
 net/ipv4/inet_diag.c      | 42 +++++++++++++++++++++++++++---------------
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 0e707f0..e7032f04 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -3,6 +3,7 @@
 
 #include <uapi/linux/inet_diag.h>
 
+struct net;
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
@@ -41,6 +42,10 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 			    struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 			    const struct inet_diag_req_v2 *req);
 
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ab9f8a6..cfabb8f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -350,17 +350,12 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 				  nlmsg_flags, unlh);
 }
 
-int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
-			    struct sk_buff *in_skb,
-			    const struct nlmsghdr *nlh,
-			    const struct inet_diag_req_v2 *req)
+struct sock *inet_diag_find_one_icsk(struct net *net,
+				     struct inet_hashinfo *hashinfo,
+				     const struct inet_diag_req_v2 *req)
 {
-	struct net *net = sock_net(in_skb->sk);
-	struct sk_buff *rep;
 	struct sock *sk;
-	int err;
 
-	err = -EINVAL;
 	if (req->sdiag_family == AF_INET)
 		sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
 				 req->id.idiag_dport, req->id.idiag_src[0],
@@ -375,15 +370,33 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 				  req->id.idiag_if);
 #endif
 	else
-		goto out_nosk;
+		return ERR_PTR(-EINVAL);
 
-	err = -ENOENT;
 	if (!sk)
-		goto out_nosk;
+		return ERR_PTR(-ENOENT);
 
-	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
-	if (err)
-		goto out;
+	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+		sock_gen_put(sk);
+		return ERR_PTR(-ENOENT);
+	}
+
+	return sk;
+}
+EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);
+
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+			    struct sk_buff *in_skb,
+			    const struct nlmsghdr *nlh,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *rep;
+	struct sock *sk;
+	int err;
+
+	sk = inet_diag_find_one_icsk(net, hashinfo, req);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
 
 	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
 	if (!rep) {
@@ -409,7 +422,6 @@ out:
 	if (sk)
 		sock_gen_put(sk);
 
-out_nosk:
 	return err;
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v6 2/4] net: diag: Add the ability to destroy a socket.
  2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
  2015-12-15 17:17                                                             ` [PATCH v6 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
@ 2015-12-15 17:17                                                             ` Lorenzo Colitti
  2015-12-15 17:44                                                               ` Eric Dumazet
  2015-12-15 17:17                                                             ` [PATCH v6 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
                                                                               ` (3 subsequent siblings)
  5 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-15 17:17 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This adds a diag_destroy pointer to struct proto that allows a
socket to be administratively closed without any action from the
process owning the socket or the socket protocol.

This allows a privileged userspace process, such as a connection
manager or system administration tool, to close sockets belonging
to other apps when the network they were established on has
disconnected. It is needed on laptops and mobile hosts to ensure
that network switches / disconnects do not result in applications
being blocked for long periods of time (minutes) in read or
connect calls on TCP sockets that will never succeed because the
IP address they are bound to is no longer on the system. Closing
the sockets causes these calls to fail fast and allows the apps
to reconnect on another network.

For many years Android kernels have supported this via an
out-of-tree SIOCKILLADDR ioctl that is called on every
RTM_DELADDR event, but this solution is cleaner, more robust and
more flexible: the connection manager can iterate over all
connections on the deleted IP address and close all of them. It
can also be used to close all sockets opened by a given app
process, for example if the user has restricted that app from
using the network.

It also allows in-kernel callers to perform the same sort of
operation by invoking sk->sk_prot->diag_destroy(sk) directly.

This patch adds a SOCK_DESTROY operation, a destroy function
pointer to sock_diag_handler, and a diag_destroy function
pointer.  It does not include any implementation code.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/sock_diag.h      |  2 ++
 include/net/sock.h             |  1 +
 include/uapi/linux/sock_diag.h |  1 +
 net/core/sock_diag.c           | 23 ++++++++++++++++++++---
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index fddebc6..4018b48 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -15,6 +15,7 @@ struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
 	int (*get_info)(struct sk_buff *skb, struct sock *sk);
+	int (*destroy)(struct sk_buff *skb, struct nlmsghdr *nlh);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -68,4 +69,5 @@ bool sock_diag_has_destroy_listeners(const struct sock *sk)
 }
 void sock_diag_broadcast_destroy(struct sock *sk);
 
+int sock_diag_destroy(struct sock *sk, int err);
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 0ca22b0..c9218f5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1060,6 +1060,7 @@ struct proto {
 	void			(*destroy_cgroup)(struct mem_cgroup *memcg);
 	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
 #endif
+	int			(*diag_destroy)(struct sock *sk, int err);
 };
 
 int proto_register(struct proto *prot, int alloc_slab);
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index 49230d3..bae2d80 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #define SOCK_DIAG_BY_FAMILY 20
+#define SOCK_DESTROY 21
 
 struct sock_diag_req {
 	__u8	sdiag_family;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 0c1d58d..a996ce8 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -214,7 +214,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
 }
 EXPORT_SYMBOL_GPL(sock_diag_unregister);
 
-static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int err;
 	struct sock_diag_req *req = nlmsg_data(nlh);
@@ -234,8 +234,12 @@ static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	hndl = sock_diag_handlers[req->sdiag_family];
 	if (hndl == NULL)
 		err = -ENOENT;
-	else
+	else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
 		err = hndl->dump(skb, nlh);
+	else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
+		err = hndl->destroy(skb, nlh);
+	else
+		err = -EOPNOTSUPP;
 	mutex_unlock(&sock_diag_table_mutex);
 
 	return err;
@@ -261,7 +265,8 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		return ret;
 	case SOCK_DIAG_BY_FAMILY:
-		return __sock_diag_rcv_msg(skb, nlh);
+	case SOCK_DESTROY:
+		return __sock_diag_cmd(skb, nlh);
 	default:
 		return -EINVAL;
 	}
@@ -295,6 +300,18 @@ static int sock_diag_bind(struct net *net, int group)
 	return 0;
 }
 
+int sock_diag_destroy(struct sock *sk, int err)
+{
+	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!sk->sk_prot->diag_destroy)
+		return -EOPNOTSUPP;
+
+	return sk->sk_prot->diag_destroy(sk, err);
+}
+EXPORT_SYMBOL_GPL(sock_diag_destroy);
+
 static int __net_init diag_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v6 3/4] net: diag: Support SOCK_DESTROY for inet sockets.
  2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
  2015-12-15 17:17                                                             ` [PATCH v6 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
  2015-12-15 17:17                                                             ` [PATCH v6 2/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
@ 2015-12-15 17:17                                                             ` Lorenzo Colitti
  2015-12-15 17:45                                                               ` Eric Dumazet
  2015-12-15 17:17                                                             ` [PATCH v6 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
                                                                               ` (2 subsequent siblings)
  5 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-15 17:17 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This passes the SOCK_DESTROY operation to the underlying protocol
diag handler, or returns -EOPNOTSUPP if that handler does not
define a destroy operation.

Most of this patch is just renaming functions. This is not
strictly necessary, but it would be fairly counterintuitive to
have the code to destroy inet sockets be in a function whose name
starts with inet_diag_get.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/linux/inet_diag.h |  4 ++++
 net/ipv4/inet_diag.c      | 23 +++++++++++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index e7032f04..7c27fa1 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -24,6 +24,10 @@ struct inet_diag_handler {
 	void		(*idiag_get_info)(struct sock *sk,
 					  struct inet_diag_msg *r,
 					  void *info);
+
+	int		(*destroy)(struct sk_buff *in_skb,
+				   const struct inet_diag_req_v2 *req);
+
 	__u16		idiag_type;
 	__u16		idiag_info_size;
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index cfabb8f..8bb8e7a 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -426,7 +426,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
-static int inet_diag_get_exact(struct sk_buff *in_skb,
+static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
 			       const struct inet_diag_req_v2 *req)
 {
@@ -436,8 +436,12 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	handler = inet_diag_lock_handler(req->sdiag_protocol);
 	if (IS_ERR(handler))
 		err = PTR_ERR(handler);
-	else
+	else if (cmd == SOCK_DIAG_BY_FAMILY)
 		err = handler->dump_one(in_skb, nlh, req);
+	else if (cmd == SOCK_DESTROY && handler->destroy)
+		err = handler->destroy(in_skb, req);
+	else
+		err = -EOPNOTSUPP;
 	inet_diag_unlock_handler(handler);
 
 	return err;
@@ -950,7 +954,7 @@ static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 	req.idiag_states = rc->idiag_states;
 	req.id = rc->id;
 
-	return inet_diag_get_exact(in_skb, nlh, &req);
+	return inet_diag_cmd_exact(SOCK_DIAG_BY_FAMILY, in_skb, nlh, &req);
 }
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -984,7 +988,7 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact_compat(skb, nlh);
 }
 
-static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h)
 {
 	int hdrlen = sizeof(struct inet_diag_req_v2);
 	struct net *net = sock_net(skb->sk);
@@ -992,7 +996,8 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
 
-	if (h->nlmsg_flags & NLM_F_DUMP) {
+	if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY &&
+	    h->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(h, hdrlen)) {
 			struct nlattr *attr;
 
@@ -1011,7 +1016,7 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 		}
 	}
 
-	return inet_diag_get_exact(skb, h, nlmsg_data(h));
+	return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h));
 }
 
 static
@@ -1062,14 +1067,16 @@ int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
 
 static const struct sock_diag_handler inet_diag_handler = {
 	.family = AF_INET,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
 	.family = AF_INET6,
-	.dump = inet_diag_handler_dump,
+	.dump = inet_diag_handler_cmd,
 	.get_info = inet_diag_handler_get_info,
+	.destroy = inet_diag_handler_cmd,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v6 4/4] net: diag: Support destroying TCP sockets.
  2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
                                                                               ` (2 preceding siblings ...)
  2015-12-15 17:17                                                             ` [PATCH v6 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
@ 2015-12-15 17:17                                                             ` Lorenzo Colitti
  2015-12-15 17:46                                                               ` Eric Dumazet
  2015-12-15 18:36                                                             ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Maciej Żenczykowski
  2015-12-15 18:38                                                             ` David Miller
  5 siblings, 1 reply; 110+ messages in thread
From: Lorenzo Colitti @ 2015-12-15 17:17 UTC (permalink / raw)
  To: netdev
  Cc: davem, hannes, eric.dumazet, ek, tom, zenczykowski, Lorenzo Colitti

This implements SOCK_DESTROY for TCP sockets. It causes all
blocking calls on the socket to fail fast with ECONNABORTED and
causes a protocol close of the socket. It informs the other end
of the connection by sending a RST, i.e., initiating a TCP ABORT
as per RFC 793. ECONNABORTED was chosen for consistency with
FreeBSD.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
---
 include/net/tcp.h   |  2 ++
 net/ipv4/Kconfig    | 13 +++++++++++++
 net/ipv4/tcp.c      | 32 ++++++++++++++++++++++++++++++++
 net/ipv4/tcp_diag.c | 19 +++++++++++++++++++
 net/ipv4/tcp_ipv4.c |  1 +
 net/ipv6/tcp_ipv6.c |  1 +
 6 files changed, 68 insertions(+)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f80e74c..3077735b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1170,6 +1170,8 @@ void tcp_set_state(struct sock *sk, int state);
 
 void tcp_done(struct sock *sk);
 
+int tcp_abort(struct sock *sk, int err);
+
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
 	rx_opt->dsack = 0;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 416dfa0..c229205 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -436,6 +436,19 @@ config INET_UDP_DIAG
 	  Support for UDP socket monitoring interface used by the ss tool.
 	  If unsure, say Y.
 
+config INET_DIAG_DESTROY
+	bool "INET: allow privileged process to administratively close sockets"
+	depends on INET_DIAG
+	default n
+	---help---
+	  Provides a SOCK_DESTROY operation that allows privileged processes
+	  (e.g., a connection manager or a network administration tool such as
+	  ss) to close sockets opened by other processes. Closing a socket in
+	  this way interrupts any blocking read/write/connect operations on
+	  the socket and causes future socket calls to behave as if the socket
+	  had been disconnected.
+	  If unsure, say N.
+
 menuconfig TCP_CONG_ADVANCED
 	bool "TCP: advanced congestion control"
 	---help---
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c82cca1..c270205 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_done);
 
+int tcp_abort(struct sock *sk, int err)
+{
+	if (!sk_fullsock(sk)) {
+		sock_gen_put(sk);
+		return -EOPNOTSUPP;
+	}
+
+	/* Don't race with userspace socket closes such as tcp_close. */
+	lock_sock(sk);
+
+	/* Don't race with BH socket closes such as inet_csk_listen_stop. */
+	local_bh_disable();
+	bh_lock_sock(sk);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		sk->sk_err = err;
+		/* This barrier is coupled with smp_rmb() in tcp_poll() */
+		smp_wmb();
+		sk->sk_error_report(sk);
+		if (tcp_need_reset(sk->sk_state))
+			tcp_send_active_reset(sk, GFP_ATOMIC);
+		tcp_done(sk);
+	}
+
+	bh_unlock_sock(sk);
+	local_bh_enable();
+	release_sock(sk);
+	sock_put(sk);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tcp_abort);
+
 extern struct tcp_congestion_ops tcp_reno;
 
 static __initdata unsigned long thash_entries;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index b316040..4d61093 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -10,6 +10,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/net.h>
+#include <linux/sock_diag.h>
 #include <linux/inet_diag.h>
 
 #include <linux/tcp.h>
@@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int tcp_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *req)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	return sock_diag_destroy(sk, ECONNABORTED);
+}
+#endif
+
 static const struct inet_diag_handler tcp_diag_handler = {
 	.dump		 = tcp_diag_dump,
 	.dump_one	 = tcp_diag_dump_one,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
 	.idiag_info_size = sizeof(struct tcp_info),
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy	 = tcp_diag_destroy,
+#endif
 };
 
 static int __init tcp_diag_init(void)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index db00343..7aa13bd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2342,6 +2342,7 @@ struct proto tcp_prot = {
 	.destroy_cgroup		= tcp_destroy_cgroup,
 	.proto_cgroup		= tcp_proto_cgroup,
 #endif
+	.diag_destroy		= tcp_abort,
 };
 EXPORT_SYMBOL(tcp_prot);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c16e3fb..5382c26 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = {
 	.proto_cgroup		= tcp_proto_cgroup,
 #endif
 	.clear_sk		= tcp_v6_clear_sk,
+	.diag_destroy		= tcp_abort,
 };
 
 static const struct inet6_protocol tcpv6_protocol = {
-- 
2.6.0.rc2.230.g3dd15c0

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* Re: [PATCH v6 1/4] net: diag: split inet_diag_dump_one_icsk into two
  2015-12-15 17:17                                                             ` [PATCH v6 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
@ 2015-12-15 17:44                                                               ` Eric Dumazet
  0 siblings, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-12-15 17:44 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev, davem, hannes, ek, tom, zenczykowski

On Wed, 2015-12-16 at 02:17 +0900, Lorenzo Colitti wrote:
> Currently, inet_diag_dump_one_icsk finds a socket and then dumps
> its information to userspace. Split it into a part that finds the
> socket and a part that dumps the information.
> 
> Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
> ---
>  include/linux/inet_diag.h |  5 +++++
>  net/ipv4/inet_diag.c      | 42 +++++++++++++++++++++++++++---------------
>  2 files changed, 32 insertions(+), 15 deletions(-)

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v6 2/4] net: diag: Add the ability to destroy a socket.
  2015-12-15 17:17                                                             ` [PATCH v6 2/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
@ 2015-12-15 17:44                                                               ` Eric Dumazet
  0 siblings, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-12-15 17:44 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev, davem, hannes, ek, tom, zenczykowski

On Wed, 2015-12-16 at 02:17 +0900, Lorenzo Colitti wrote:
> This adds a diag_destroy pointer to struct proto that allows a
> socket to be administratively closed without any action from the
> process owning the socket or the socket protocol.

> 
> Signed-off-by: Lorenzo Colitti <lorenzo@google.com>

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v6 3/4] net: diag: Support SOCK_DESTROY for inet sockets.
  2015-12-15 17:17                                                             ` [PATCH v6 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
@ 2015-12-15 17:45                                                               ` Eric Dumazet
  0 siblings, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-12-15 17:45 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev, davem, hannes, ek, tom, zenczykowski

On Wed, 2015-12-16 at 02:17 +0900, Lorenzo Colitti wrote:
> This passes the SOCK_DESTROY operation to the underlying protocol
> diag handler, or returns -EOPNOTSUPP if that handler does not
> define a destroy operation.
> 
> Most of this patch is just renaming functions. This is not
> strictly necessary, but it would be fairly counterintuitive to
> have the code to destroy inet sockets be in a function whose name
> starts with inet_diag_get.
> 
> Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
> ---

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v6 4/4] net: diag: Support destroying TCP sockets.
  2015-12-15 17:17                                                             ` [PATCH v6 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
@ 2015-12-15 17:46                                                               ` Eric Dumazet
  0 siblings, 0 replies; 110+ messages in thread
From: Eric Dumazet @ 2015-12-15 17:46 UTC (permalink / raw)
  To: Lorenzo Colitti; +Cc: netdev, davem, hannes, ek, tom, zenczykowski

On Wed, 2015-12-16 at 02:17 +0900, Lorenzo Colitti wrote:
> This implements SOCK_DESTROY for TCP sockets. It causes all
> blocking calls on the socket to fail fast with ECONNABORTED and
> causes a protocol close of the socket. It informs the other end
> of the connection by sending a RST, i.e., initiating a TCP ABORT
> as per RFC 793. ECONNABORTED was chosen for consistency with
> FreeBSD.
> 
> Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
> ---

Acked-by: Eric Dumazet <edumazet@google.com>

Thanks Lorenzo !

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP socketsr
  2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
                                                                               ` (3 preceding siblings ...)
  2015-12-15 17:17                                                             ` [PATCH v6 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
@ 2015-12-15 18:36                                                             ` Maciej Żenczykowski
  2015-12-15 18:46                                                               ` Rustad, Mark D
  2015-12-15 18:38                                                             ` David Miller
  5 siblings, 1 reply; 110+ messages in thread
From: Maciej Żenczykowski @ 2015-12-15 18:36 UTC (permalink / raw)
  To: Lorenzo Colitti
  Cc: Linux NetDev, David Miller, Hannes Frederic Sowa, Eric Dumazet,
	Erik Kline, Tom Herbert

I'd tend to agree that reset or abort would be preferable to destroy.
After all... the socket doesn't actually go away.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP socketsr
  2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
                                                                               ` (4 preceding siblings ...)
  2015-12-15 18:36                                                             ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Maciej Żenczykowski
@ 2015-12-15 18:38                                                             ` David Miller
  5 siblings, 0 replies; 110+ messages in thread
From: David Miller @ 2015-12-15 18:38 UTC (permalink / raw)
  To: lorenzo; +Cc: netdev, hannes, eric.dumazet, ek, tom, zenczykowski


Please submit this with a proper "[PATCH vX 0/4] xxx" cover-letter.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v5 4/4] net: diag: Support destroying TCP socketsr
  2015-12-15 18:36                                                             ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Maciej Żenczykowski
@ 2015-12-15 18:46                                                               ` Rustad, Mark D
  0 siblings, 0 replies; 110+ messages in thread
From: Rustad, Mark D @ 2015-12-15 18:46 UTC (permalink / raw)
  To: Maciej Żenczykowski
  Cc: Lorenzo Colitti, Linux NetDev, David Miller,
	Hannes Frederic Sowa, Eric Dumazet, Erik Kline, Tom Herbert

[-- Attachment #1: Type: text/plain, Size: 406 bytes --]

Maciej Żenczykowski <zenczykowski@gmail.com> wrote:

> I'd tend to agree that reset or abort would be preferable to destroy.
> After all... the socket doesn't actually go away.

Or maybe terminate? Reset kind of implies to me that it may resume operation. Abort could be ok. I think terminate is somewhat more neutral, if that makes sense.

--
Mark Rustad, Networking Division, Intel Corporation

[-- Attachment #2: Message signed with OpenPGP using GPGMail --]
[-- Type: application/pgp-signature, Size: 841 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

end of thread, other threads:[~2015-12-15 18:46 UTC | newest]

Thread overview: 110+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-18  1:43 Add a SOCK_DESTROY operation to close sockets from userspace Lorenzo Colitti
2015-11-18  1:43 ` [PATCH 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
2015-11-18  1:43 ` [PATCH 2/4] net: diag: Add the ability to destroy a socket from userspace Lorenzo Colitti
2015-11-18  1:43 ` [PATCH 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
2015-11-18  1:43 ` [PATCH 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
2015-11-18  3:43   ` kbuild test robot
2015-11-18  4:46     ` Lorenzo Colitti
2015-11-18  4:25   ` kbuild test robot
2015-11-18  3:27 ` Add a SOCK_DESTROY operation to close sockets from userspace Stephen Hemminger
     [not found]   ` <CAAedzxqiXnKzCyevNipNnXEc_+TEjnVphLfseTo4ykZ8SAVt_w@mail.gmail.com>
2015-11-18  3:36     ` Erik Kline
2015-11-18  3:57   ` Maciej Żenczykowski
2015-11-18 11:56     ` David Laight
2015-11-18  4:04   ` Eric Dumazet
2015-11-18 10:19     ` Hannes Frederic Sowa
2015-11-18 10:47       ` Lorenzo Colitti
2015-11-18 11:19         ` Hannes Frederic Sowa
2015-11-18 12:54           ` Eric Dumazet
2015-11-18 13:04           ` Lorenzo Colitti
2015-11-18 13:31             ` Hannes Frederic Sowa
2015-11-18 14:45               ` Lorenzo Colitti
2015-11-18 14:56                 ` Hannes Frederic Sowa
2015-11-18 15:16                   ` Eric Dumazet
2015-11-18 15:32                     ` Hannes Frederic Sowa
2015-11-18 15:33                       ` Hannes Frederic Sowa
2015-11-18 20:35         ` David Miller
2015-11-18 20:43           ` Hannes Frederic Sowa
2015-11-19  3:49             ` David Miller
2015-11-19  5:12               ` Tom Herbert
2015-11-19 15:54                 ` Hannes Frederic Sowa
2015-11-19 23:54                   ` Maciej Żenczykowski
2015-11-19  5:13               ` Lorenzo Colitti
2015-11-19  5:53                 ` David Miller
2015-11-19  7:19                   ` Maciej Żenczykowski
2015-11-19 15:48                     ` David Miller
2015-11-19 16:19                       ` Eric Dumazet
2015-11-19 16:33                         ` David Miller
2015-11-19 16:43                           ` Eric Dumazet
2015-11-19 16:50                             ` David Miller
2015-11-19 16:47                           ` Eric Dumazet
2015-11-19 17:02                             ` David Miller
2015-11-19 17:44                               ` Eric Dumazet
2015-11-19 22:55                           ` Lorenzo Colitti
2015-11-19 17:08                         ` Hannes Frederic Sowa
2015-11-19 17:38                           ` Tom Herbert
2015-11-19 18:09                             ` David Miller
2015-11-19 18:27                               ` Hannes Frederic Sowa
2015-11-19 23:02                                 ` Hannes Frederic Sowa
2015-11-19 23:47                                   ` Lorenzo Colitti
2015-11-19 22:33                             ` Lorenzo Colitti
2015-11-19 22:38                               ` Hannes Frederic Sowa
2015-11-19 23:24                                 ` Tom Herbert
2015-11-19 21:29                         ` Tom Herbert
2015-11-19 21:41                           ` Eric Dumazet
2015-11-19 21:53                             ` Hannes Frederic Sowa
2015-11-19 22:04                               ` Eric Dumazet
2015-11-19 22:09                                 ` Hannes Frederic Sowa
2015-11-19 22:15                                   ` Eric Dumazet
2015-11-19 22:31                                     ` Hannes Frederic Sowa
2015-11-19 22:36                                       ` Eric Dumazet
2015-11-19 21:53                             ` Tom Herbert
2015-11-19 22:07                               ` Eric Dumazet
2015-11-19 22:14                                 ` Tom Herbert
2015-11-19 22:33                                   ` Eric Dumazet
2015-11-20  0:04                                     ` Tom Herbert
2015-11-20  0:09                                       ` Lorenzo Colitti
2015-11-20  0:15                                         ` Tom Herbert
2015-11-20  2:25                                           ` Maciej Żenczykowski
2015-12-01  2:32                                             ` Lorenzo Colitti
2015-12-01  2:32                                               ` [PATCH v3 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
2015-12-01  2:32                                               ` [PATCH v3 2/4] net: diag: Add the ability to destroy a socket from userspace Lorenzo Colitti
2015-12-01  2:32                                               ` [PATCH v3 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
2015-12-01  2:32                                               ` [PATCH v3 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
2015-12-01  6:23                                                 ` kbuild test robot
2015-12-01  7:12                                                   ` Lorenzo Colitti
2015-12-01  2:53                                               ` Add a SOCK_DESTROY operation to close sockets from userspace Tom Herbert
2015-12-02 15:18                                                 ` Lorenzo Colitti
2015-12-02 16:12                                                   ` Tom Herbert
2015-12-02 16:30                                                     ` Lorenzo Colitti
2015-12-02 17:09                                                       ` Tom Herbert
2015-12-14 17:29                                                     ` Lorenzo Colitti
2015-12-14 17:29                                                       ` [PATCH v5 1/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
2015-12-14 17:29                                                       ` [PATCH v5 2/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
2015-12-14 17:29                                                       ` [PATCH v5 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
2015-12-14 17:29                                                       ` [PATCH v5 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
2015-12-14 17:51                                                         ` kbuild test robot
2015-12-14 17:52                                                         ` Tom Herbert
2015-12-14 18:03                                                         ` Eric Dumazet
2015-12-14 19:37                                                         ` David Miller
2015-12-15 17:17                                                           ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Lorenzo Colitti
2015-12-15 17:17                                                             ` [PATCH v6 1/4] net: diag: split inet_diag_dump_one_icsk into two Lorenzo Colitti
2015-12-15 17:44                                                               ` Eric Dumazet
2015-12-15 17:17                                                             ` [PATCH v6 2/4] net: diag: Add the ability to destroy a socket Lorenzo Colitti
2015-12-15 17:44                                                               ` Eric Dumazet
2015-12-15 17:17                                                             ` [PATCH v6 3/4] net: diag: Support SOCK_DESTROY for inet sockets Lorenzo Colitti
2015-12-15 17:45                                                               ` Eric Dumazet
2015-12-15 17:17                                                             ` [PATCH v6 4/4] net: diag: Support destroying TCP sockets Lorenzo Colitti
2015-12-15 17:46                                                               ` Eric Dumazet
2015-12-15 18:36                                                             ` [PATCH v5 4/4] net: diag: Support destroying TCP socketsr Maciej Żenczykowski
2015-12-15 18:46                                                               ` Rustad, Mark D
2015-12-15 18:38                                                             ` David Miller
2015-11-20  0:12                         ` Add a SOCK_DESTROY operation to close sockets from userspace Maciej Żenczykowski
2015-11-20  0:19                   ` Lorenzo Colitti
2015-11-20  0:55                     ` David Miller
2015-11-20  1:00                       ` Maciej Żenczykowski
2015-11-20  1:55                       ` Lorenzo Colitti
2015-11-20 16:51                         ` David Ahern
2015-11-18  3:56 ` Tom Herbert
2015-11-18  4:23   ` Lorenzo Colitti
2015-11-18  4:31     ` Tom Herbert
2015-11-18 10:12 ` Hannes Frederic Sowa

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.