All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
@ 2016-09-13 17:19 Cyrill Gorcunov
  2016-09-13 18:33 ` Greg
  2016-09-15 19:53 ` David Ahern
  0 siblings, 2 replies; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-13 17:19 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: David Miller, dsa, eric.dumazet, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

In criu we are actively using diag interface to collect sockets
present in the system when dumping applications. And while for
unix, tcp, udp[lite], packet, netlink it works as expected,
the raw sockets do not have. Thus add it.

v2:
 - add missing sock_put calls in raw_diag_dump_one (by eric.dumazet@)
 - implement @destroy for diag requests (by dsa@)

v3:
 - add export of raw_abort for IPv6 (by dsa@)
 - pass net-admin flag into inet_sk_diag_fill due to
   changes in net-next branch (by dsa@)

CC: David S. Miller <davem@davemloft.net>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Ahern <dsa@cumulusnetworks.com>
CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
CC: James Morris <jmorris@namei.org>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
CC: Patrick McHardy <kaber@trash.net>
CC: Andrey Vagin <avagin@openvz.org>
CC: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---

 include/net/raw.h   |    6 +
 include/net/rawv6.h |    7 +
 net/ipv4/Kconfig    |    8 +
 net/ipv4/Makefile   |    1 
 net/ipv4/raw.c      |   21 ++++
 net/ipv4/raw_diag.c |  226 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/raw.c      |    7 +
 7 files changed, 272 insertions(+), 4 deletions(-)

Index: linux-ml.git/include/net/raw.h
===================================================================
--- linux-ml.git.orig/include/net/raw.h
+++ linux-ml.git/include/net/raw.h
@@ -23,6 +23,12 @@
 
 extern struct proto raw_prot;
 
+extern struct raw_hashinfo raw_v4_hashinfo;
+struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
+			     unsigned short num, __be32 raddr,
+			     __be32 laddr, int dif);
+
+int raw_abort(struct sock *sk, int err);
 void raw_icmp_error(struct sk_buff *, int, u32);
 int raw_local_deliver(struct sk_buff *, int);
 
Index: linux-ml.git/include/net/rawv6.h
===================================================================
--- linux-ml.git.orig/include/net/rawv6.h
+++ linux-ml.git/include/net/rawv6.h
@@ -3,6 +3,13 @@
 
 #include <net/protocol.h>
 
+extern struct raw_hashinfo raw_v6_hashinfo;
+struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+			     unsigned short num, const struct in6_addr *loc_addr,
+			     const struct in6_addr *rmt_addr, int dif);
+
+int raw_abort(struct sock *sk, int err);
+
 void raw6_icmp_error(struct sk_buff *, int nexthdr,
 		u8 type, u8 code, int inner_offset, __be32);
 bool raw6_local_deliver(struct sk_buff *, int);
Index: linux-ml.git/net/ipv4/Kconfig
===================================================================
--- linux-ml.git.orig/net/ipv4/Kconfig
+++ linux-ml.git/net/ipv4/Kconfig
@@ -430,6 +430,14 @@ config INET_UDP_DIAG
 	  Support for UDP socket monitoring interface used by the ss tool.
 	  If unsure, say Y.
 
+config INET_RAW_DIAG
+	tristate "RAW: socket monitoring interface"
+	depends on INET_DIAG && (IPV6 || IPV6=n)
+	default n
+	---help---
+	  Support for RAW socket monitoring interface used by the ss tool.
+	  If unsure, say Y.
+
 config INET_DIAG_DESTROY
 	bool "INET: allow privileged process to administratively close sockets"
 	depends on INET_DIAG
Index: linux-ml.git/net/ipv4/Makefile
===================================================================
--- linux-ml.git.orig/net/ipv4/Makefile
+++ linux-ml.git/net/ipv4/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER)	+= netfilter.o n
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
+obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
 obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
Index: linux-ml.git/net/ipv4/raw.c
===================================================================
--- linux-ml.git.orig/net/ipv4/raw.c
+++ linux-ml.git/net/ipv4/raw.c
@@ -89,9 +89,10 @@ struct raw_frag_vec {
 	int hlen;
 };
 
-static struct raw_hashinfo raw_v4_hashinfo = {
+struct raw_hashinfo raw_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
 };
+EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
 
 int raw_hash_sk(struct sock *sk)
 {
@@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(raw_unhash_sk);
 
-static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 		unsigned short num, __be32 raddr, __be32 laddr, int dif)
 {
 	sk_for_each_from(sk) {
@@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(stru
 found:
 	return sk;
 }
+EXPORT_SYMBOL_GPL(__raw_v4_lookup);
 
 /*
  *	0 - deliver
@@ -918,6 +920,20 @@ static int compat_raw_ioctl(struct sock
 }
 #endif
 
+int raw_abort(struct sock *sk, int err)
+{
+	lock_sock(sk);
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+	udp_disconnect(sk, 0);
+
+	release_sock(sk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(raw_abort);
+
 struct proto raw_prot = {
 	.name		   = "RAW",
 	.owner		   = THIS_MODULE,
@@ -943,6 +959,7 @@ struct proto raw_prot = {
 	.compat_getsockopt = compat_raw_getsockopt,
 	.compat_ioctl	   = compat_raw_ioctl,
 #endif
+	.diag_destroy	   = raw_abort,
 };
 
 #ifdef CONFIG_PROC_FS
Index: linux-ml.git/net/ipv4/raw_diag.c
===================================================================
--- /dev/null
+++ linux-ml.git/net/ipv4/raw_diag.c
@@ -0,0 +1,226 @@
+#include <linux/module.h>
+
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+#include <net/raw.h>
+#include <net/rawv6.h>
+
+#ifdef pr_fmt
+# undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+static struct raw_hashinfo *
+raw_get_hashinfo(const struct inet_diag_req_v2 *r)
+{
+	if (r->sdiag_family == AF_INET) {
+		return &raw_v4_hashinfo;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (r->sdiag_family == AF_INET6) {
+		return &raw_v6_hashinfo;
+#endif
+	} else {
+		pr_warn_once("Unexpected inet family %d\n",
+			     r->sdiag_family);
+		WARN_ON_ONCE(1);
+		return ERR_PTR(-EINVAL);
+	}
+}
+
+static struct sock *raw_lookup(struct net *net, struct sock *from,
+			       const struct inet_diag_req_v2 *r)
+{
+	struct sock *sk = NULL;
+
+	if (r->sdiag_family == AF_INET)
+		sk = __raw_v4_lookup(net, from, r->sdiag_protocol,
+				     r->id.idiag_dst[0],
+				     r->id.idiag_src[0],
+				     r->id.idiag_if);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		sk = __raw_v6_lookup(net, from, r->sdiag_protocol,
+				     (const struct in6_addr *)r->id.idiag_src,
+				     (const struct in6_addr *)r->id.idiag_dst,
+				     r->id.idiag_if);
+#endif
+	return sk;
+}
+
+static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
+{
+	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+	struct sock *sk = NULL, *s;
+	int slot;
+
+	if (IS_ERR(hashinfo))
+		return ERR_CAST(hashinfo);
+
+	read_lock(&hashinfo->lock);
+	for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
+		sk_for_each(s, &hashinfo->ht[slot]) {
+			sk = raw_lookup(net, s, r);
+			if (sk)
+				break;
+		}
+	}
+	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+		sk = NULL;
+	read_unlock(&hashinfo->lock);
+
+	return sk ? sk : ERR_PTR(-ENOENT);
+}
+
+static int raw_diag_dump_one(struct sk_buff *in_skb,
+			     const struct nlmsghdr *nlh,
+			     const struct inet_diag_req_v2 *r)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *rep;
+	struct sock *sk;
+	int err;
+
+	sk = raw_sock_get(net, r);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
+			sizeof(struct inet_diag_meminfo) + 64,
+			GFP_KERNEL);
+	if (!rep) {
+		sock_put(sk);
+		return -ENOMEM;
+	}
+
+	err = inet_sk_diag_fill(sk, NULL, rep, r,
+				sk_user_ns(NETLINK_CB(in_skb).sk),
+				NETLINK_CB(in_skb).portid,
+				nlh->nlmsg_seq, 0, nlh,
+				netlink_net_capable(in_skb, CAP_NET_ADMIN));
+	sock_put(sk);
+
+	if (err < 0) {
+		kfree_skb(rep);
+		return err;
+	}
+
+	err = netlink_unicast(net->diag_nlsk, rep,
+			      NETLINK_CB(in_skb).portid,
+			      MSG_DONTWAIT);
+	if (err > 0)
+		err = 0;
+	return err;
+}
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
+			struct netlink_callback *cb,
+			const struct inet_diag_req_v2 *r,
+			struct nlattr *bc, bool net_admin)
+{
+	if (!inet_diag_bc_sk(bc, sk))
+		return 0;
+
+	return inet_sk_diag_fill(sk, NULL, skb, r,
+			sk_user_ns(NETLINK_CB(cb->skb).sk),
+			NETLINK_CB(cb->skb).portid,
+			cb->nlh->nlmsg_seq, NLM_F_MULTI,
+			cb->nlh, net_admin);
+}
+
+static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+			  const struct inet_diag_req_v2 *r, struct nlattr *bc)
+{
+	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+	struct net *net = sock_net(skb->sk);
+	int num, s_num, slot, s_slot;
+	struct sock *sk = NULL;
+
+	if (IS_ERR(hashinfo))
+		return;
+
+	s_slot = cb->args[0];
+	num = s_num = cb->args[1];
+
+	read_lock(&hashinfo->lock);
+	for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
+		num = 0;
+
+		sk_for_each(sk, &hashinfo->ht[slot]) {
+			struct inet_sock *inet = inet_sk(sk);
+
+			if (!net_eq(sock_net(sk), net))
+				continue;
+			if (num < s_num)
+				goto next;
+			if (sk->sk_family != r->sdiag_family)
+				goto next;
+			if (r->id.idiag_sport != inet->inet_sport &&
+			    r->id.idiag_sport)
+				goto next;
+			if (r->id.idiag_dport != inet->inet_dport &&
+			    r->id.idiag_dport)
+				goto next;
+			if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
+				goto out_unlock;
+next:
+			num++;
+		}
+	}
+
+out_unlock:
+	read_unlock(&hashinfo->lock);
+
+	cb->args[0] = slot;
+	cb->args[1] = num;
+}
+
+static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+			      void *info)
+{
+	r->idiag_rqueue = sk_rmem_alloc_get(sk);
+	r->idiag_wqueue = sk_wmem_alloc_get(sk);
+}
+
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int raw_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *r)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sock *sk;
+
+	sk = raw_sock_get(net, r);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+	return sock_diag_destroy(sk, ECONNABORTED);
+}
+#endif
+
+static const struct inet_diag_handler raw_diag_handler = {
+	.dump			= raw_diag_dump,
+	.dump_one		= raw_diag_dump_one,
+	.idiag_get_info		= raw_diag_get_info,
+	.idiag_type		= IPPROTO_RAW,
+	.idiag_info_size	= 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy		= raw_diag_destroy,
+#endif
+};
+
+static int __init raw_diag_init(void)
+{
+	return inet_diag_register(&raw_diag_handler);
+}
+
+static void __exit raw_diag_exit(void)
+{
+	inet_diag_unregister(&raw_diag_handler);
+}
+
+module_init(raw_diag_init);
+module_exit(raw_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
Index: linux-ml.git/net/ipv6/raw.c
===================================================================
--- linux-ml.git.orig/net/ipv6/raw.c
+++ linux-ml.git/net/ipv6/raw.c
@@ -65,11 +65,12 @@
 
 #define	ICMPV6_HDRLEN	4	/* ICMPv6 header, RFC 4443 Section 2.1 */
 
-static struct raw_hashinfo raw_v6_hashinfo = {
+struct raw_hashinfo raw_v6_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
 };
+EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
 
-static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 		unsigned short num, const struct in6_addr *loc_addr,
 		const struct in6_addr *rmt_addr, int dif)
 {
@@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(stru
 found:
 	return sk;
 }
+EXPORT_SYMBOL_GPL(__raw_v6_lookup);
 
 /*
  *	0 - deliver
@@ -1252,6 +1254,7 @@ struct proto rawv6_prot = {
 	.compat_getsockopt = compat_rawv6_getsockopt,
 	.compat_ioctl	   = compat_rawv6_ioctl,
 #endif
+	.diag_destroy	   = raw_abort,
 };
 
 #ifdef CONFIG_PROC_FS

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-13 17:19 [PATCH v3] net: ip, diag -- Add diag interface for raw sockets Cyrill Gorcunov
@ 2016-09-13 18:33 ` Greg
  2016-09-13 20:18   ` Rustad, Mark D
  2016-09-15 19:53 ` David Ahern
  1 sibling, 1 reply; 28+ messages in thread
From: Greg @ 2016-09-13 18:33 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: netdev, linux-kernel, David Miller, dsa, eric.dumazet, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Tue, 2016-09-13 at 20:19 +0300, Cyrill Gorcunov wrote:
> In criu we are actively using diag interface to collect sockets
> present in the system when dumping applications. And while for
> unix, tcp, udp[lite], packet, netlink it works as expected,
> the raw sockets do not have. Thus add it.
> 
> v2:
>  - add missing sock_put calls in raw_diag_dump_one (by eric.dumazet@)
>  - implement @destroy for diag requests (by dsa@)
> 
> v3:
>  - add export of raw_abort for IPv6 (by dsa@)
>  - pass net-admin flag into inet_sk_diag_fill due to
>    changes in net-next branch (by dsa@)
> 
> CC: David S. Miller <davem@davemloft.net>
> CC: Eric Dumazet <eric.dumazet@gmail.com>
> CC: David Ahern <dsa@cumulusnetworks.com>
> CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
> CC: James Morris <jmorris@namei.org>
> CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
> CC: Patrick McHardy <kaber@trash.net>
> CC: Andrey Vagin <avagin@openvz.org>
> CC: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---
> 
>  include/net/raw.h   |    6 +
>  include/net/rawv6.h |    7 +
>  net/ipv4/Kconfig    |    8 +
>  net/ipv4/Makefile   |    1 
>  net/ipv4/raw.c      |   21 ++++
>  net/ipv4/raw_diag.c |  226 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  net/ipv6/raw.c      |    7 +
>  7 files changed, 272 insertions(+), 4 deletions(-)
> 
> Index: linux-ml.git/include/net/raw.h
> ===================================================================
> --- linux-ml.git.orig/include/net/raw.h
> +++ linux-ml.git/include/net/raw.h
> @@ -23,6 +23,12 @@
>  
>  extern struct proto raw_prot;
>  
> +extern struct raw_hashinfo raw_v4_hashinfo;
> +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
> +			     unsigned short num, __be32 raddr,
> +			     __be32 laddr, int dif);
> +
> +int raw_abort(struct sock *sk, int err);
>  void raw_icmp_error(struct sk_buff *, int, u32);
>  int raw_local_deliver(struct sk_buff *, int);
>  
> Index: linux-ml.git/include/net/rawv6.h
> ===================================================================
> --- linux-ml.git.orig/include/net/rawv6.h
> +++ linux-ml.git/include/net/rawv6.h
> @@ -3,6 +3,13 @@
>  
>  #include <net/protocol.h>
>  
> +extern struct raw_hashinfo raw_v6_hashinfo;
> +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
> +			     unsigned short num, const struct in6_addr *loc_addr,
> +			     const struct in6_addr *rmt_addr, int dif);
> +
> +int raw_abort(struct sock *sk, int err);
> +
>  void raw6_icmp_error(struct sk_buff *, int nexthdr,
>  		u8 type, u8 code, int inner_offset, __be32);
>  bool raw6_local_deliver(struct sk_buff *, int);
> Index: linux-ml.git/net/ipv4/Kconfig
> ===================================================================
> --- linux-ml.git.orig/net/ipv4/Kconfig
> +++ linux-ml.git/net/ipv4/Kconfig
> @@ -430,6 +430,14 @@ config INET_UDP_DIAG
>  	  Support for UDP socket monitoring interface used by the ss tool.
>  	  If unsure, say Y.
>  
> +config INET_RAW_DIAG
> +	tristate "RAW: socket monitoring interface"
> +	depends on INET_DIAG && (IPV6 || IPV6=n)
> +	default n
> +	---help---
> +	  Support for RAW socket monitoring interface used by the ss tool.
> +	  If unsure, say Y.
> +
>  config INET_DIAG_DESTROY
>  	bool "INET: allow privileged process to administratively close sockets"
>  	depends on INET_DIAG
> Index: linux-ml.git/net/ipv4/Makefile
> ===================================================================
> --- linux-ml.git.orig/net/ipv4/Makefile
> +++ linux-ml.git/net/ipv4/Makefile
> @@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER)	+= netfilter.o n
>  obj-$(CONFIG_INET_DIAG) += inet_diag.o 
>  obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
>  obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
> +obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
>  obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
>  obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
>  obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
> Index: linux-ml.git/net/ipv4/raw.c
> ===================================================================
> --- linux-ml.git.orig/net/ipv4/raw.c
> +++ linux-ml.git/net/ipv4/raw.c
> @@ -89,9 +89,10 @@ struct raw_frag_vec {
>  	int hlen;
>  };
>  
> -static struct raw_hashinfo raw_v4_hashinfo = {
> +struct raw_hashinfo raw_v4_hashinfo = {
>  	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
>  };
> +EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
>  
>  int raw_hash_sk(struct sock *sk)
>  {
> @@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk)
>  }
>  EXPORT_SYMBOL_GPL(raw_unhash_sk);
>  
> -static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
> +struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
>  		unsigned short num, __be32 raddr, __be32 laddr, int dif)
>  {
>  	sk_for_each_from(sk) {
> @@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(stru
>  found:
>  	return sk;
>  }
> +EXPORT_SYMBOL_GPL(__raw_v4_lookup);
>  
>  /*
>   *	0 - deliver
> @@ -918,6 +920,20 @@ static int compat_raw_ioctl(struct sock
>  }
>  #endif
>  
> +int raw_abort(struct sock *sk, int err)
> +{
> +	lock_sock(sk);
> +
> +	sk->sk_err = err;
> +	sk->sk_error_report(sk);
> +	udp_disconnect(sk, 0);
> +
> +	release_sock(sk);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(raw_abort);
> +
>  struct proto raw_prot = {
>  	.name		   = "RAW",
>  	.owner		   = THIS_MODULE,
> @@ -943,6 +959,7 @@ struct proto raw_prot = {
>  	.compat_getsockopt = compat_raw_getsockopt,
>  	.compat_ioctl	   = compat_raw_ioctl,
>  #endif
> +	.diag_destroy	   = raw_abort,
>  };
>  
>  #ifdef CONFIG_PROC_FS
> Index: linux-ml.git/net/ipv4/raw_diag.c
> ===================================================================
> --- /dev/null
> +++ linux-ml.git/net/ipv4/raw_diag.c
> @@ -0,0 +1,226 @@
> +#include <linux/module.h>
> +
> +#include <linux/inet_diag.h>
> +#include <linux/sock_diag.h>
> +
> +#include <net/raw.h>
> +#include <net/rawv6.h>
> +
> +#ifdef pr_fmt
> +# undef pr_fmt
> +#endif
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +static struct raw_hashinfo *
> +raw_get_hashinfo(const struct inet_diag_req_v2 *r)
> +{
> +	if (r->sdiag_family == AF_INET) {
> +		return &raw_v4_hashinfo;
> +#if IS_ENABLED(CONFIG_IPV6)
> +	} else if (r->sdiag_family == AF_INET6) {
> +		return &raw_v6_hashinfo;
> +#endif

Someday Linux will be a modern OS that just includes IPV6 and forces a
config option to NOT have it.

That'll be great.  All the IS_ENABLED_(CONFIG_IPV6) scattered everywhere
is nuts.

</editorial comment>

- Greg

> +	} else {
> +		pr_warn_once("Unexpected inet family %d\n",
> +			     r->sdiag_family);
> +		WARN_ON_ONCE(1);
> +		return ERR_PTR(-EINVAL);
> +	}
> +}
> +
> +static struct sock *raw_lookup(struct net *net, struct sock *from,
> +			       const struct inet_diag_req_v2 *r)
> +{
> +	struct sock *sk = NULL;
> +
> +	if (r->sdiag_family == AF_INET)
> +		sk = __raw_v4_lookup(net, from, r->sdiag_protocol,
> +				     r->id.idiag_dst[0],
> +				     r->id.idiag_src[0],
> +				     r->id.idiag_if);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	else
> +		sk = __raw_v6_lookup(net, from, r->sdiag_protocol,
> +				     (const struct in6_addr *)r->id.idiag_src,
> +				     (const struct in6_addr *)r->id.idiag_dst,
> +				     r->id.idiag_if);
> +#endif
> +	return sk;
> +}
> +
> +static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
> +{
> +	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
> +	struct sock *sk = NULL, *s;
> +	int slot;
> +
> +	if (IS_ERR(hashinfo))
> +		return ERR_CAST(hashinfo);
> +
> +	read_lock(&hashinfo->lock);
> +	for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
> +		sk_for_each(s, &hashinfo->ht[slot]) {
> +			sk = raw_lookup(net, s, r);
> +			if (sk)
> +				break;
> +		}
> +	}
> +	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
> +		sk = NULL;
> +	read_unlock(&hashinfo->lock);
> +
> +	return sk ? sk : ERR_PTR(-ENOENT);
> +}
> +
> +static int raw_diag_dump_one(struct sk_buff *in_skb,
> +			     const struct nlmsghdr *nlh,
> +			     const struct inet_diag_req_v2 *r)
> +{
> +	struct net *net = sock_net(in_skb->sk);
> +	struct sk_buff *rep;
> +	struct sock *sk;
> +	int err;
> +
> +	sk = raw_sock_get(net, r);
> +	if (IS_ERR(sk))
> +		return PTR_ERR(sk);
> +
> +	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
> +			sizeof(struct inet_diag_meminfo) + 64,
> +			GFP_KERNEL);
> +	if (!rep) {
> +		sock_put(sk);
> +		return -ENOMEM;
> +	}
> +
> +	err = inet_sk_diag_fill(sk, NULL, rep, r,
> +				sk_user_ns(NETLINK_CB(in_skb).sk),
> +				NETLINK_CB(in_skb).portid,
> +				nlh->nlmsg_seq, 0, nlh,
> +				netlink_net_capable(in_skb, CAP_NET_ADMIN));
> +	sock_put(sk);
> +
> +	if (err < 0) {
> +		kfree_skb(rep);
> +		return err;
> +	}
> +
> +	err = netlink_unicast(net->diag_nlsk, rep,
> +			      NETLINK_CB(in_skb).portid,
> +			      MSG_DONTWAIT);
> +	if (err > 0)
> +		err = 0;
> +	return err;
> +}
> +
> +static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
> +			struct netlink_callback *cb,
> +			const struct inet_diag_req_v2 *r,
> +			struct nlattr *bc, bool net_admin)
> +{
> +	if (!inet_diag_bc_sk(bc, sk))
> +		return 0;
> +
> +	return inet_sk_diag_fill(sk, NULL, skb, r,
> +			sk_user_ns(NETLINK_CB(cb->skb).sk),
> +			NETLINK_CB(cb->skb).portid,
> +			cb->nlh->nlmsg_seq, NLM_F_MULTI,
> +			cb->nlh, net_admin);
> +}
> +
> +static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
> +			  const struct inet_diag_req_v2 *r, struct nlattr *bc)
> +{
> +	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
> +	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
> +	struct net *net = sock_net(skb->sk);
> +	int num, s_num, slot, s_slot;
> +	struct sock *sk = NULL;
> +
> +	if (IS_ERR(hashinfo))
> +		return;
> +
> +	s_slot = cb->args[0];
> +	num = s_num = cb->args[1];
> +
> +	read_lock(&hashinfo->lock);
> +	for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
> +		num = 0;
> +
> +		sk_for_each(sk, &hashinfo->ht[slot]) {
> +			struct inet_sock *inet = inet_sk(sk);
> +
> +			if (!net_eq(sock_net(sk), net))
> +				continue;
> +			if (num < s_num)
> +				goto next;
> +			if (sk->sk_family != r->sdiag_family)
> +				goto next;
> +			if (r->id.idiag_sport != inet->inet_sport &&
> +			    r->id.idiag_sport)
> +				goto next;
> +			if (r->id.idiag_dport != inet->inet_dport &&
> +			    r->id.idiag_dport)
> +				goto next;
> +			if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
> +				goto out_unlock;
> +next:
> +			num++;
> +		}
> +	}
> +
> +out_unlock:
> +	read_unlock(&hashinfo->lock);
> +
> +	cb->args[0] = slot;
> +	cb->args[1] = num;
> +}
> +
> +static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
> +			      void *info)
> +{
> +	r->idiag_rqueue = sk_rmem_alloc_get(sk);
> +	r->idiag_wqueue = sk_wmem_alloc_get(sk);
> +}
> +
> +#ifdef CONFIG_INET_DIAG_DESTROY
> +static int raw_diag_destroy(struct sk_buff *in_skb,
> +			    const struct inet_diag_req_v2 *r)
> +{
> +	struct net *net = sock_net(in_skb->sk);
> +	struct sock *sk;
> +
> +	sk = raw_sock_get(net, r);
> +	if (IS_ERR(sk))
> +		return PTR_ERR(sk);
> +	return sock_diag_destroy(sk, ECONNABORTED);
> +}
> +#endif
> +
> +static const struct inet_diag_handler raw_diag_handler = {
> +	.dump			= raw_diag_dump,
> +	.dump_one		= raw_diag_dump_one,
> +	.idiag_get_info		= raw_diag_get_info,
> +	.idiag_type		= IPPROTO_RAW,
> +	.idiag_info_size	= 0,
> +#ifdef CONFIG_INET_DIAG_DESTROY
> +	.destroy		= raw_diag_destroy,
> +#endif
> +};
> +
> +static int __init raw_diag_init(void)
> +{
> +	return inet_diag_register(&raw_diag_handler);
> +}
> +
> +static void __exit raw_diag_exit(void)
> +{
> +	inet_diag_unregister(&raw_diag_handler);
> +}
> +
> +module_init(raw_diag_init);
> +module_exit(raw_diag_exit);
> +MODULE_LICENSE("GPL");
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
> Index: linux-ml.git/net/ipv6/raw.c
> ===================================================================
> --- linux-ml.git.orig/net/ipv6/raw.c
> +++ linux-ml.git/net/ipv6/raw.c
> @@ -65,11 +65,12 @@
>  
>  #define	ICMPV6_HDRLEN	4	/* ICMPv6 header, RFC 4443 Section 2.1 */
>  
> -static struct raw_hashinfo raw_v6_hashinfo = {
> +struct raw_hashinfo raw_v6_hashinfo = {
>  	.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
>  };
> +EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
>  
> -static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
> +struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
>  		unsigned short num, const struct in6_addr *loc_addr,
>  		const struct in6_addr *rmt_addr, int dif)
>  {
> @@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(stru
>  found:
>  	return sk;
>  }
> +EXPORT_SYMBOL_GPL(__raw_v6_lookup);
>  
>  /*
>   *	0 - deliver
> @@ -1252,6 +1254,7 @@ struct proto rawv6_prot = {
>  	.compat_getsockopt = compat_rawv6_getsockopt,
>  	.compat_ioctl	   = compat_rawv6_ioctl,
>  #endif
> +	.diag_destroy	   = raw_abort,
>  };
>  
>  #ifdef CONFIG_PROC_FS

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-13 18:33 ` Greg
@ 2016-09-13 20:18   ` Rustad, Mark D
  2016-09-13 20:32     ` Greg
  0 siblings, 1 reply; 28+ messages in thread
From: Rustad, Mark D @ 2016-09-13 20:18 UTC (permalink / raw)
  To: Greg
  Cc: Cyrill Gorcunov, Linux Kernel Network Developers, LKML,
	David Miller, dsa, eric.dumazet, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

[-- Attachment #1: Type: text/plain, Size: 1160 bytes --]

Greg <gvrose8192@gmail.com> wrote:

> Someday Linux will be a modern OS that just includes IPV6 and forces a
> config option to NOT have it.
>
> That'll be great.  All the IS_ENABLED_(CONFIG_IPV6) scattered everywhere
> is nuts.
>
> </editorial comment>

Better wait until everyone at least *has* IPv6! I have yet to have IPv6  
deployed on any of my employer's networks or get IPv6 service from any ISP  
at my home. When I was at Apple in the 90's I was told that Apple needed  
IPv6 by next year or "we were dead". Well Apple nearly died, but IPv6 had  
nothing to do with that! And I still haven't experienced an IPv6  
deployment! Yeah, I have run it a bit point-to-point to resolve technical  
issues, but that isn't a "deployment" and not very interesting.

As much as we would like things to move faster, much of the world just  
doesn't. Witness the e1000 discussion today for example. Hardware doesn't  
vanish overnight, and I know that my ISP has a network full of CPE that  
doesn't do IPv6, so I'm not expecting their status to change any time soon.

It would be great though.
</pipedream>

--
Mark Rustad, Networking Division, Intel Corporation

[-- Attachment #2: Message signed with OpenPGP using GPGMail --]
[-- Type: application/pgp-signature, Size: 841 bytes --]

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-13 20:18   ` Rustad, Mark D
@ 2016-09-13 20:32     ` Greg
  0 siblings, 0 replies; 28+ messages in thread
From: Greg @ 2016-09-13 20:32 UTC (permalink / raw)
  To: Rustad, Mark D
  Cc: Cyrill Gorcunov, Linux Kernel Network Developers, LKML,
	David Miller, dsa, eric.dumazet, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On Tue, 2016-09-13 at 20:18 +0000, Rustad, Mark D wrote:
> Greg <gvrose8192@gmail.com> wrote:
> 
> > Someday Linux will be a modern OS that just includes IPV6 and forces a
> > config option to NOT have it.
> >
> > That'll be great.  All the IS_ENABLED_(CONFIG_IPV6) scattered everywhere
> > is nuts.
> >
> > </editorial comment>
> 
> Better wait until everyone at least *has* IPv6! I have yet to have IPv6  
> deployed on any of my employer's networks or get IPv6 service from any ISP  
> at my home. When I was at Apple in the 90's I was told that Apple needed  
> IPv6 by next year or "we were dead". Well Apple nearly died, but IPv6 had  
> nothing to do with that! And I still haven't experienced an IPv6  
> deployment! Yeah, I have run it a bit point-to-point to resolve technical  
> issues, but that isn't a "deployment" and not very interesting.
> 
> As much as we would like things to move faster, much of the world just  
> doesn't. Witness the e1000 discussion today for example. Hardware doesn't  
> vanish overnight, and I know that my ISP has a network full of CPE that  
> doesn't do IPv6, so I'm not expecting their status to change any time soon.

Well that's why we can have a configuration to turn it off...

But yeah.  /pipedream

- Greg

> 
> It would be great though.
> </pipedream>
> 
> --
> Mark Rustad, Networking Division, Intel Corporation

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-13 17:19 [PATCH v3] net: ip, diag -- Add diag interface for raw sockets Cyrill Gorcunov
  2016-09-13 18:33 ` Greg
@ 2016-09-15 19:53 ` David Ahern
  2016-09-15 20:22   ` Cyrill Gorcunov
  1 sibling, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-15 19:53 UTC (permalink / raw)
  To: Cyrill Gorcunov, netdev, linux-kernel
  Cc: David Miller, eric.dumazet, kuznet, jmorris, yoshfuji, kaber,
	avagin, stephen

On 9/13/16 11:19 AM, Cyrill Gorcunov wrote:
> In criu we are actively using diag interface to collect sockets
> present in the system when dumping applications. And while for
> unix, tcp, udp[lite], packet, netlink it works as expected,
> the raw sockets do not have. Thus add it.
> 
> v2:
>  - add missing sock_put calls in raw_diag_dump_one (by eric.dumazet@)
>  - implement @destroy for diag requests (by dsa@)
> 
> v3:
>  - add export of raw_abort for IPv6 (by dsa@)
>  - pass net-admin flag into inet_sk_diag_fill due to
>    changes in net-next branch (by dsa@)
> 
> CC: David S. Miller <davem@davemloft.net>
> CC: Eric Dumazet <eric.dumazet@gmail.com>
> CC: David Ahern <dsa@cumulusnetworks.com>
> CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
> CC: James Morris <jmorris@namei.org>
> CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
> CC: Patrick McHardy <kaber@trash.net>
> CC: Andrey Vagin <avagin@openvz.org>
> CC: Stephen Hemminger <stephen@networkplumber.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---

ss -K is not working. Socket lookup fails to find a match due to a protocol mismatch.

haven't had time to track down why there is a mismatch since the kill uses the socket returned from the dump. Won't have time to come back to this until early next week.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 19:53 ` David Ahern
@ 2016-09-15 20:22   ` Cyrill Gorcunov
  2016-09-15 20:25     ` David Ahern
  2016-09-15 20:54     ` David Ahern
  0 siblings, 2 replies; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-15 20:22 UTC (permalink / raw)
  To: David Ahern
  Cc: netdev, linux-kernel, David Miller, eric.dumazet, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Thu, Sep 15, 2016 at 01:53:13PM -0600, David Ahern wrote:
> On 9/13/16 11:19 AM, Cyrill Gorcunov wrote:
> > In criu we are actively using diag interface to collect sockets
> > present in the system when dumping applications. And while for
> > unix, tcp, udp[lite], packet, netlink it works as expected,
> > the raw sockets do not have. Thus add it.
> > 
> > v2:
> >  - add missing sock_put calls in raw_diag_dump_one (by eric.dumazet@)
> >  - implement @destroy for diag requests (by dsa@)
> > 
> > v3:
> >  - add export of raw_abort for IPv6 (by dsa@)
> >  - pass net-admin flag into inet_sk_diag_fill due to
> >    changes in net-next branch (by dsa@)
> > 
> > CC: David S. Miller <davem@davemloft.net>
> > CC: Eric Dumazet <eric.dumazet@gmail.com>
> > CC: David Ahern <dsa@cumulusnetworks.com>
> > CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
> > CC: James Morris <jmorris@namei.org>
> > CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
> > CC: Patrick McHardy <kaber@trash.net>
> > CC: Andrey Vagin <avagin@openvz.org>
> > CC: Stephen Hemminger <stephen@networkplumber.org>
> > Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> > ---
> 
> ss -K is not working. Socket lookup fails to find a match due to a protocol mismatch.
> 
> haven't had time to track down why there is a mismatch since the kill uses the socket returned
> from the dump. Won't have time to come back to this until early next week.

Have you ran iproute2 patched? I just ran ss -K and all sockets get closed
(including raw ones), which actually kicked me off the testing machine sshd :/

	Cyrill

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 20:22   ` Cyrill Gorcunov
@ 2016-09-15 20:25     ` David Ahern
  2016-09-15 20:36       ` Eric Dumazet
  2016-09-15 20:54     ` David Ahern
  1 sibling, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-15 20:25 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: netdev, linux-kernel, David Miller, eric.dumazet, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On 9/15/16 2:22 PM, Cyrill Gorcunov wrote:
>> ss -K is not working. Socket lookup fails to find a match due to a protocol mismatch.
>>
>> haven't had time to track down why there is a mismatch since the kill uses the socket returned
>> from the dump. Won't have time to come back to this until early next week.
> 
> Have you ran iproute2 patched? I just ran ss -K and all sockets get closed
> (including raw ones), which actually kicked me off the testing machine sshd :/

yes.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 20:25     ` David Ahern
@ 2016-09-15 20:36       ` Eric Dumazet
  2016-09-15 20:39         ` David Ahern
  0 siblings, 1 reply; 28+ messages in thread
From: Eric Dumazet @ 2016-09-15 20:36 UTC (permalink / raw)
  To: David Ahern
  Cc: Cyrill Gorcunov, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Thu, 2016-09-15 at 14:25 -0600, David Ahern wrote:
> On 9/15/16 2:22 PM, Cyrill Gorcunov wrote:
> >> ss -K is not working. Socket lookup fails to find a match due to a protocol mismatch.
> >>
> >> haven't had time to track down why there is a mismatch since the kill uses the socket returned
> >> from the dump. Won't have time to come back to this until early next week.
> > 
> > Have you ran iproute2 patched? I just ran ss -K and all sockets get closed
> > (including raw ones), which actually kicked me off the testing machine sshd :/
> 
> yes.
> 

And CONFIG_INET_DIAG_DESTROY is also set in your .config ?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 20:36       ` Eric Dumazet
@ 2016-09-15 20:39         ` David Ahern
  0 siblings, 0 replies; 28+ messages in thread
From: David Ahern @ 2016-09-15 20:39 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Cyrill Gorcunov, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On 9/15/16 2:36 PM, Eric Dumazet wrote:
> On Thu, 2016-09-15 at 14:25 -0600, David Ahern wrote:
>> On 9/15/16 2:22 PM, Cyrill Gorcunov wrote:
>>>> ss -K is not working. Socket lookup fails to find a match due to a protocol mismatch.
>>>>
>>>> haven't had time to track down why there is a mismatch since the kill uses the socket returned
>>>> from the dump. Won't have time to come back to this until early next week.
>>>
>>> Have you ran iproute2 patched? I just ran ss -K and all sockets get closed
>>> (including raw ones), which actually kicked me off the testing machine sshd :/
>>
>> yes.
>>
> 
> And CONFIG_INET_DIAG_DESTROY is also set in your .config ?
yes

dsa@kenny:~/kernel.git$ grep INET_DIAG_DESTROY kbuild/perf/.config
CONFIG_INET_DIAG_DESTROY=y

raw_diag_destroy is getting called, but protocol is 255:

diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index c730e14618ab..95542b3dad76 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -192,6 +192,11 @@ static int raw_diag_destroy(struct sk_buff *in_skb,
        struct sock *sk;

        sk = raw_sock_get(net, r);
+
+if (r->sdiag_family == AF_INET)
+pr_warn("raw_diag_destroy: family IPv4 protocol %d dst %pI4 src %pI4 dev %d sk %p\n",
+        r->sdiag_protocol, &r->id.idiag_dst[0], &r->id.idiag_src[0], r->id.idiag_if, sk);
+
        if (IS_ERR(sk))
                return PTR_ERR(sk);
        return sock_diag_destroy(sk, ECONNABORTED);



so it never finds a match to an actual raw socket:

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 03618ed03532..6d0489629e74 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -124,9 +124,14 @@ EXPORT_SYMBOL_GPL(raw_unhash_sk);
 struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
                unsigned short num, __be32 raddr, __be32 laddr, int dif)
 {
+pr_warn("num %d raddr %pI4 laddr %pI4 dif %d\n", num, &raddr, &laddr, dif);
+
        sk_for_each_from(sk) {
                struct inet_sock *inet = inet_sk(sk);

+pr_warn("sk: num %d raddr %pI4 laddr %pI4 dif %d\n",
+       inet->inet_num, &inet->inet_daddr, &inet->inet_rcv_saddr,sk->sk_bound_dev_if);
+
                if (net_eq(sock_net(sk), net) && inet->inet_num == num  &&
                    !(inet->inet_daddr && inet->inet_daddr != raddr)    &&
                    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&

so raw_abort is not called.

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 20:22   ` Cyrill Gorcunov
  2016-09-15 20:25     ` David Ahern
@ 2016-09-15 20:54     ` David Ahern
  2016-09-15 21:01       ` Cyrill Gorcunov
  1 sibling, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-15 20:54 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: netdev, linux-kernel, David Miller, eric.dumazet, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On 9/15/16 2:22 PM, Cyrill Gorcunov wrote:
>> ss -K is not working. Socket lookup fails to find a match due to a protocol mismatch.
>>
>> haven't had time to track down why there is a mismatch since the kill uses the socket returned
>> from the dump. Won't have time to come back to this until early next week.
> 
> Have you ran iproute2 patched? I just ran ss -K and all sockets get closed
> (including raw ones), which actually kicked me off the testing machine sshd :/
> 


This is the patch I applied to iproute2; the change in your goo.gl link plus a debug to confirm the kill action is initiated by ss:

diff --git a/misc/ss.c b/misc/ss.c
index 3b268d999426..4d98411738ea 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -2334,6 +2334,10 @@ static int show_one_inet_sock(const struct sockaddr_nl *addr,
        if (diag_arg->f->f && run_ssfilter(diag_arg->f->f, &s) == 0)
                return 0;

+       if (diag_arg->f->kill) {
+printf("want to kill:\n");
+       err = inet_show_sock(h, &s, diag_arg->protocol);
+       }
        if (diag_arg->f->kill && kill_inet_sock(h, arg) != 0) {
                if (errno == EOPNOTSUPP || errno == ENOENT) {
                        /* Socket can't be closed, or is already closed. */
@@ -2631,6 +2635,10 @@ static int raw_show(struct filter *f)

        dg_proto = RAW_PROTO;

+if (!getenv("PROC_NET_RAW") && !getenv("PROC_ROOT") &&
+inet_show_netlink(f, NULL, IPPROTO_RAW) == 0)
+return 0;
+
        if (f->families&(1<<AF_INET)) {
                if ((fp = net_raw_open()) == NULL)
                        goto outerr;

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 20:54     ` David Ahern
@ 2016-09-15 21:01       ` Cyrill Gorcunov
  2016-09-15 22:48         ` Eric Dumazet
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-15 21:01 UTC (permalink / raw)
  To: David Ahern
  Cc: netdev, linux-kernel, David Miller, eric.dumazet, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Thu, Sep 15, 2016 at 02:54:57PM -0600, David Ahern wrote:
> On 9/15/16 2:22 PM, Cyrill Gorcunov wrote:
> >> ss -K is not working. Socket lookup fails to find a match due to a protocol mismatch.
> >>
> >> haven't had time to track down why there is a mismatch since the kill uses the socket returned
> >> from the dump. Won't have time to come back to this until early next week.
> > 
> > Have you ran iproute2 patched? I just ran ss -K and all sockets get closed
> > (including raw ones), which actually kicked me off the testing machine sshd :/
> > 
> 
> 
> This is the patch I applied to iproute2; the change in your goo.gl link plus a debug to confirm the kill action is initiated by ss:
> 
> diff --git a/misc/ss.c b/misc/ss.c
> index 3b268d999426..4d98411738ea 100644
> --- a/misc/ss.c
> +++ b/misc/ss.c
> @@ -2334,6 +2334,10 @@ static int show_one_inet_sock(const struct sockaddr_nl *addr,
>         if (diag_arg->f->f && run_ssfilter(diag_arg->f->f, &s) == 0)
>                 return 0;
> 
> +       if (diag_arg->f->kill) {
> +printf("want to kill:\n");
> +       err = inet_show_sock(h, &s, diag_arg->protocol);
> +       }
>         if (diag_arg->f->kill && kill_inet_sock(h, arg) != 0) {
>                 if (errno == EOPNOTSUPP || errno == ENOENT) {
>                         /* Socket can't be closed, or is already closed. */
> @@ -2631,6 +2635,10 @@ static int raw_show(struct filter *f)
> 
>         dg_proto = RAW_PROTO;
> 
> +if (!getenv("PROC_NET_RAW") && !getenv("PROC_ROOT") &&
> +inet_show_netlink(f, NULL, IPPROTO_RAW) == 0)
> +return 0;
> +
>         if (f->families&(1<<AF_INET)) {
>                 if ((fp = net_raw_open()) == NULL)
>                         goto outerr;
> 

Hmm. Weird. I'm running net-next kernel
---
[root@pcs7 ~]# /root/sock &
[1] 5108

This is a trivial program which opens raw sockets 

[root@pcs7 iproute2]# misc/ss -A raw
State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
ESTAB      0      0                                         127.0.0.1:ipproto-255                                            127.0.0.10:ipproto-9090         
UNCONN     0      0                                        127.0.0.10:ipproto-255                                                     *:*                    
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
ESTAB      0      0                                               ::1:ipproto-255                                                   ::1:ipproto-9091         
UNCONN     0      0                                               ::1:ipproto-255                                                    :::*                    
[root@pcs7 iproute2]# 

[root@pcs7 iproute2]# misc/ss -K
Netid  State      Recv-Q Send-Q                             Local Address:Port                                              Peer Address:Port                
u_str  ESTAB      0      0                /var/run/dbus/system_bus_socket 18071                                                        * 16297                
u_str  ESTAB      0      0                    /run/systemd/journal/stdout 18756                                                        * 16188                
u_str  ESTAB      0      0                    /run/systemd/journal/stdout 23014                                                        * 23013                
u_str  ESTAB      0      0                                              * 18909                                                        * 16298                
u_str  ESTAB      0      0                /var/run/dbus/system_bus_socket 19154                                                        * 18163                
...
???    ESTAB      0      0                                      127.0.0.1:ipproto-255                                         127.0.0.10:ipproto-9090         
???    UNCONN     0      0                                     127.0.0.10:ipproto-255                                                  *:*                    
???    ESTAB      0      0                                            ::1:ipproto-255                                                ::1:ipproto-9091         
???    UNCONN     0      0                                            ::1:ipproto-255                                                 :::*            
---

Here I get kicked off the server. Login back

[cyrill@uranus ~] ssh root@pcs7 
Last login: Thu Sep 15 23:20:42 2016 from gateway
[root@pcs7 ~]# cd /home/iproute2/
[root@pcs7 iproute2]# misc/ss -A raw
State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    

Maybe I do something wrong for testing?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 21:01       ` Cyrill Gorcunov
@ 2016-09-15 22:48         ` Eric Dumazet
  2016-09-15 23:45           ` David Ahern
  0 siblings, 1 reply; 28+ messages in thread
From: Eric Dumazet @ 2016-09-15 22:48 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: David Ahern, netdev, linux-kernel, David Miller, kuznet, jmorris,
	yoshfuji, kaber, avagin, stephen

On Fri, 2016-09-16 at 00:01 +0300, Cyrill Gorcunov wrote:

> Here I get kicked off the server. Login back
> 
> [cyrill@uranus ~] ssh root@pcs7 
> Last login: Thu Sep 15 23:20:42 2016 from gateway
> [root@pcs7 ~]# cd /home/iproute2/
> [root@pcs7 iproute2]# misc/ss -A raw
> State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> 
> Maybe I do something wrong for testing?

If you kill your shell, maybe /root/sock is killer as well, thus its raw
sockets are closed.

Try to be selective in the -K , do not kill tcp sockets ?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 22:48         ` Eric Dumazet
@ 2016-09-15 23:45           ` David Ahern
  2016-09-16  7:06             ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-15 23:45 UTC (permalink / raw)
  To: Eric Dumazet, Cyrill Gorcunov
  Cc: netdev, linux-kernel, David Miller, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On 9/15/16 4:48 PM, Eric Dumazet wrote:
> On Fri, 2016-09-16 at 00:01 +0300, Cyrill Gorcunov wrote:
> 
>> Here I get kicked off the server. Login back
>>
>> [cyrill@uranus ~] ssh root@pcs7 
>> Last login: Thu Sep 15 23:20:42 2016 from gateway
>> [root@pcs7 ~]# cd /home/iproute2/
>> [root@pcs7 iproute2]# misc/ss -A raw
>> State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
>> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
>> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
>>
>> Maybe I do something wrong for testing?
> 
> If you kill your shell, maybe /root/sock is killer as well, thus its raw
> sockets are closed.
> 
> Try to be selective in the -K , do not kill tcp sockets ?
> 
> 

I am running
   ss -aKw 'dev == red'

to kill raw sockets bound to device named 'red'.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-15 23:45           ` David Ahern
@ 2016-09-16  7:06             ` Cyrill Gorcunov
  2016-09-16 19:00               ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-16  7:06 UTC (permalink / raw)
  To: David Ahern, Eric Dumazet
  Cc: netdev, linux-kernel, David Miller, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On Thu, Sep 15, 2016 at 05:45:02PM -0600, David Ahern wrote:
> > 
> > Try to be selective in the -K , do not kill tcp sockets ?
> 
> I am running
>    ss -aKw 'dev == red'
> 
> to kill raw sockets bound to device named 'red'.

Thanks David, Eric! I'll play with this option today and report the results.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16  7:06             ` Cyrill Gorcunov
@ 2016-09-16 19:00               ` Cyrill Gorcunov
  2016-09-16 19:30                 ` David Ahern
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-16 19:00 UTC (permalink / raw)
  To: David Ahern, Eric Dumazet
  Cc: netdev, linux-kernel, David Miller, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On Fri, Sep 16, 2016 at 10:06:23AM +0300, Cyrill Gorcunov wrote:
> On Thu, Sep 15, 2016 at 05:45:02PM -0600, David Ahern wrote:
> > > 
> > > Try to be selective in the -K , do not kill tcp sockets ?
> > 
> > I am running
> >    ss -aKw 'dev == red'
> > 
> > to kill raw sockets bound to device named 'red'.
> 
> Thanks David, Eric! I'll play with this option today and report the results.

I created veth pair and bound raw socket into it.

[root@pcs7 iproute2]# misc/ss -A raw
State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
ESTAB      0      0                                         127.0.0.1:ipproto-255                                            127.0.0.10:ipproto-9090         
UNCONN     0      0                                        127.0.0.10:ipproto-255                                                     *:*                    
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
ESTAB      0      0                                               ::1:ipproto-255                                                   ::1:ipproto-9091         
UNCONN     0      0                                           ::1%vm1:ipproto-255                                                    :::*                    
[root@pcs7 iproute2]# 

[root@pcs7 iproute2]# misc/ss -aKw 'dev == vm1'
State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
UNCONN     0      0                                           ::1%vm1:ipproto-255                                                    :::*                    

[root@pcs7 iproute2]# misc/ss -A raw
State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
ESTAB      0      0                                         127.0.0.1:ipproto-255                                            127.0.0.10:ipproto-9090         
UNCONN     0      0                                        127.0.0.10:ipproto-255                                                     *:*                    
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
ESTAB      0      0                                               ::1:ipproto-255                                                   ::1:ipproto-9091         

so it get zapped out. Is there some other way to test it?

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16 19:00               ` Cyrill Gorcunov
@ 2016-09-16 19:30                 ` David Ahern
  2016-09-16 19:39                   ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-16 19:30 UTC (permalink / raw)
  To: Cyrill Gorcunov, Eric Dumazet
  Cc: netdev, linux-kernel, David Miller, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On 9/16/16 1:00 PM, Cyrill Gorcunov wrote:
> I created veth pair and bound raw socket into it.
> 
> [root@pcs7 iproute2]# misc/ss -A raw
> State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
> ESTAB      0      0                                         127.0.0.1:ipproto-255                                            127.0.0.10:ipproto-9090         
> UNCONN     0      0                                        127.0.0.10:ipproto-255                                                     *:*                    
> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> ESTAB      0      0                                               ::1:ipproto-255                                                   ::1:ipproto-9091         
> UNCONN     0      0                                           ::1%vm1:ipproto-255                                                    :::*                    
> [root@pcs7 iproute2]# 
> 
> [root@pcs7 iproute2]# misc/ss -aKw 'dev == vm1'
> State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
> UNCONN     0      0                                           ::1%vm1:ipproto-255                                                    :::*                    
> 
> [root@pcs7 iproute2]# misc/ss -A raw
> State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
> ESTAB      0      0                                         127.0.0.1:ipproto-255                                            127.0.0.10:ipproto-9090         
> UNCONN     0      0                                        127.0.0.10:ipproto-255                                                     *:*                    
> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> ESTAB      0      0                                               ::1:ipproto-255                                                   ::1:ipproto-9091         
> 
> so it get zapped out. Is there some other way to test it?
> 

I'm guessing you passed IPPROTO_RAW (255) as the protocol to socket(). If you pass something else (IPPROTO_ICMP for example) it won't work.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16 19:30                 ` David Ahern
@ 2016-09-16 19:39                   ` Cyrill Gorcunov
  2016-09-16 19:47                     ` David Ahern
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-16 19:39 UTC (permalink / raw)
  To: David Ahern
  Cc: Eric Dumazet, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Fri, Sep 16, 2016 at 01:30:28PM -0600, David Ahern wrote:
> > [root@pcs7 iproute2]# misc/ss -A raw
> > State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
> > ESTAB      0      0                                         127.0.0.1:ipproto-255                                            127.0.0.10:ipproto-9090         
> > UNCONN     0      0                                        127.0.0.10:ipproto-255                                                     *:*                    
> > UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> > UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
> > ESTAB      0      0                                               ::1:ipproto-255                                                   ::1:ipproto-9091         
> > 
> > so it get zapped out. Is there some other way to test it?
> > 
> 
> I'm guessing you passed IPPROTO_RAW (255) as the protocol to socket(). If you pass something
> else (IPPROTO_ICMP for example) it won't work.

True. To support IPPROTO_ICMP it need enhancement. I thought start with
plain _RAW first and then extend to support _ICMP.

	Cyrill

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16 19:39                   ` Cyrill Gorcunov
@ 2016-09-16 19:47                     ` David Ahern
  2016-09-16 19:52                       ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-16 19:47 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Eric Dumazet, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On 9/16/16 1:39 PM, Cyrill Gorcunov wrote:
> On Fri, Sep 16, 2016 at 01:30:28PM -0600, David Ahern wrote:
>>> [root@pcs7 iproute2]# misc/ss -A raw
>>> State      Recv-Q Send-Q                                Local Address:Port                                                 Peer Address:Port                
>>> ESTAB      0      0                                         127.0.0.1:ipproto-255                                            127.0.0.10:ipproto-9090         
>>> UNCONN     0      0                                        127.0.0.10:ipproto-255                                                     *:*                    
>>> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
>>> UNCONN     0      0                                                :::ipv6-icmp                                                      :::*                    
>>> ESTAB      0      0                                               ::1:ipproto-255                                                   ::1:ipproto-9091         
>>>
>>> so it get zapped out. Is there some other way to test it?
>>>
>>
>> I'm guessing you passed IPPROTO_RAW (255) as the protocol to socket(). If you pass something
>> else (IPPROTO_ICMP for example) it won't work.
> 
> True. To support IPPROTO_ICMP it need enhancement. I thought start with
> plain _RAW first and then extend to support _ICMP.

I thought raw in this case was SOCK_RAW as in the socket type.

Since the display is showing sockets in addition to IPPROTO_RAW:

$ ss -A raw
State      Recv-Q Send-Q        Local Address:Port                         Peer Address:Port
UNCONN     0      0                    *%eth0:icmp                                    *:*

It is going to be confusing if only ipproto-255 sockets can be killed.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16 19:47                     ` David Ahern
@ 2016-09-16 19:52                       ` Cyrill Gorcunov
  2016-09-16 19:55                         ` David Ahern
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-16 19:52 UTC (permalink / raw)
  To: David Ahern
  Cc: Eric Dumazet, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Fri, Sep 16, 2016 at 01:47:57PM -0600, David Ahern wrote:
> >>
> >> I'm guessing you passed IPPROTO_RAW (255) as the protocol to socket(). If you pass something
> >> else (IPPROTO_ICMP for example) it won't work.
> > 
> > True. To support IPPROTO_ICMP it need enhancement. I thought start with
> > plain _RAW first and then extend to support _ICMP.
> 
> I thought raw in this case was SOCK_RAW as in the socket type.
> 
> Since the display is showing sockets in addition to IPPROTO_RAW:
> 
> $ ss -A raw
> State      Recv-Q Send-Q        Local Address:Port                         Peer Address:Port
> UNCONN     0      0                    *%eth0:icmp                                    *:*
> 
> It is going to be confusing if only ipproto-255 sockets can be killed.

OK, gimme some time to implement it. Hopefully on the weekend or monday.
Thanks a huge for feedback!

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16 19:52                       ` Cyrill Gorcunov
@ 2016-09-16 19:55                         ` David Ahern
  2016-09-16 20:07                           ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-16 19:55 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Eric Dumazet, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On 9/16/16 1:52 PM, Cyrill Gorcunov wrote:
> On Fri, Sep 16, 2016 at 01:47:57PM -0600, David Ahern wrote:
>>>>
>>>> I'm guessing you passed IPPROTO_RAW (255) as the protocol to socket(). If you pass something
>>>> else (IPPROTO_ICMP for example) it won't work.
>>>
>>> True. To support IPPROTO_ICMP it need enhancement. I thought start with
>>> plain _RAW first and then extend to support _ICMP.
>>
>> I thought raw in this case was SOCK_RAW as in the socket type.
>>
>> Since the display is showing sockets in addition to IPPROTO_RAW:
>>
>> $ ss -A raw
>> State      Recv-Q Send-Q        Local Address:Port                         Peer Address:Port
>> UNCONN     0      0                    *%eth0:icmp                                    *:*
>>
>> It is going to be confusing if only ipproto-255 sockets can be killed.
> 
> OK, gimme some time to implement it. Hopefully on the weekend or monday.
> Thanks a huge for feedback!
> 

It may well be a ss bug / problem. As I mentioned I am always seeing 255 for the protocol which is odd since ss does a dump and takes the matches and invokes the kill. Thanks for taking the time to do the kill piece.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16 19:55                         ` David Ahern
@ 2016-09-16 20:07                           ` Cyrill Gorcunov
  2016-09-20 21:13                             ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-16 20:07 UTC (permalink / raw)
  To: David Ahern
  Cc: Eric Dumazet, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Fri, Sep 16, 2016 at 01:55:42PM -0600, David Ahern wrote:
> >> Since the display is showing sockets in addition to IPPROTO_RAW:
> >>
> >> $ ss -A raw
> >> State      Recv-Q Send-Q        Local Address:Port                         Peer Address:Port
> >> UNCONN     0      0                    *%eth0:icmp                                    *:*
> >>
> >> It is going to be confusing if only ipproto-255 sockets can be killed.
> > 
> > OK, gimme some time to implement it. Hopefully on the weekend or monday.
> > Thanks a huge for feedback!
> > 
> 
> It may well be a ss bug / problem. As I mentioned I am always seeing 255 for the protocol which

It is rather not addressed in ss. I mean, look, when we send out a diag packet
the kernel look ups for a handler, which for raw protocol we register as

static const struct inet_diag_handler raw_diag_handler = {
	.dump= raw_diag_dump,
	.dump_one= raw_diag_dump_one,
	.idiag_get_info= raw_diag_get_info,
	.idiag_type= IPPROTO_RAW,
	.idiag_info_size= 0,
#ifdef CONFIG_INET_DIAG_DESTROY
	.destroy= raw_diag_destroy,
#endif
};

so if we patch ss and ask for IPPROTO_ICMP in netlink packet the
kernel simply won't find anything. Thus I think we need (well, I need)
to extend the patch and register IPPROTO_ICMP diag type, then
extend ss as well. (If only I didn't miss somethin obvious).

> is odd since ss does a dump and takes the matches and invokes the kill. Thanks for taking
> the time to do the kill piece.

Sure!

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-16 20:07                           ` Cyrill Gorcunov
@ 2016-09-20 21:13                             ` Cyrill Gorcunov
  2016-09-26 22:38                               ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-20 21:13 UTC (permalink / raw)
  To: David Ahern, Eric Dumazet
  Cc: netdev, linux-kernel, David Miller, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On Fri, Sep 16, 2016 at 11:07:22PM +0300, Cyrill Gorcunov wrote:
> > It may well be a ss bug / problem. As I mentioned I am always seeing 255 for the protocol which
> 
> It is rather not addressed in ss. I mean, look, when we send out a diag packet
> the kernel look ups for a handler, which for raw protocol we register as
> 
> static const struct inet_diag_handler raw_diag_handler = {
> 	.dump= raw_diag_dump,
> 	.dump_one= raw_diag_dump_one,
> 	.idiag_get_info= raw_diag_get_info,
> 	.idiag_type= IPPROTO_RAW,
> 	.idiag_info_size= 0,
> #ifdef CONFIG_INET_DIAG_DESTROY
> 	.destroy= raw_diag_destroy,
> #endif
> };
> 
> so if we patch ss and ask for IPPROTO_ICMP in netlink packet the
> kernel simply won't find anything. Thus I think we need (well, I need)
> to extend the patch and register IPPROTO_ICMP diag type, then
> extend ss as well. (If only I didn't miss somethin obvious).
> 
> > is odd since ss does a dump and takes the matches and invokes the kill.
> > Thanks for taking the time to do the kill piece.

Sorry for delay in reply (I got flu unexpectedly). You know, it eventually
become uneasy to implement handling for sock-raw because they are special.
They described as ipproto-ip in net/ipv4/af_inet.c, so it matches any
protocol specified with the socket call. In turn inet-diag module handled
predefined protocols only, in particular IPPROTO_RAW in our case. Thus
to fecth some real protocol sitting in raw sockets hashes we need some
kind of additional argument passed in the request. I guess we may
use @idiag_ext field for this sake? Or require @idiag_ext to have
INET_DIAG_PROTOCOL bit set and then fetch real protocol from
additional attribute? Sounds ok?

	Cyrill

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-20 21:13                             ` Cyrill Gorcunov
@ 2016-09-26 22:38                               ` Cyrill Gorcunov
  2016-09-27  1:54                                 ` David Ahern
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-26 22:38 UTC (permalink / raw)
  To: David Ahern, Eric Dumazet
  Cc: netdev, linux-kernel, David Miller, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On Wed, Sep 21, 2016 at 12:13:43AM +0300, Cyrill Gorcunov wrote:
> On Fri, Sep 16, 2016 at 11:07:22PM +0300, Cyrill Gorcunov wrote:
> > > It may well be a ss bug / problem. As I mentioned I am always seeing 255 for the protocol which
> > 
> > It is rather not addressed in ss. I mean, look, when we send out a diag packet
> > the kernel look ups for a handler, which for raw protocol we register as
> > 
> > static const struct inet_diag_handler raw_diag_handler = {
> > 	.dump= raw_diag_dump,
> > 	.dump_one= raw_diag_dump_one,
> > 	.idiag_get_info= raw_diag_get_info,
> > 	.idiag_type= IPPROTO_RAW,
> > 	.idiag_info_size= 0,
> > #ifdef CONFIG_INET_DIAG_DESTROY
> > 	.destroy= raw_diag_destroy,
> > #endif
> > };
> > 
> > so if we patch ss and ask for IPPROTO_ICMP in netlink packet the
> > kernel simply won't find anything. Thus I think we need (well, I need)
> > to extend the patch and register IPPROTO_ICMP diag type, then
> > extend ss as well. (If only I didn't miss somethin obvious).
> > 
> > > is odd since ss does a dump and takes the matches and invokes the kill.
> > > Thanks for taking the time to do the kill piece.
> 
> Sorry for delay in reply (I got flu unexpectedly). You know, it eventually
> become uneasy to implement handling for sock-raw because they are special.
> They described as ipproto-ip in net/ipv4/af_inet.c, so it matches any
> protocol specified with the socket call. In turn inet-diag module handled
> predefined protocols only, in particular IPPROTO_RAW in our case. Thus
> to fecth some real protocol sitting in raw sockets hashes we need some
> kind of additional argument passed in the request. I guess we may
> use @idiag_ext field for this sake? Or require @idiag_ext to have
> INET_DIAG_PROTOCOL bit set and then fetch real protocol from
> additional attribute? Sounds ok?

Something like

Index: linux-ml.git/include/uapi/linux/inet_diag.h
===================================================================
--- linux-ml.git.orig/include/uapi/linux/inet_diag.h    2016-09-11 20:56:18.191584145 +0300
+++ linux-ml.git/include/uapi/linux/inet_diag.h 2016-09-27 01:34:08.413172394 +0300
@@ -38,7 +38,7 @@ struct inet_diag_req_v2 {
        __u8    sdiag_family;
        __u8    sdiag_protocol;
        __u8    idiag_ext;
-       __u8    pad;
+       __u8    sdiag_raw_protocol;     /* SOCK_RAW only, @pad for others */
        __u32   idiag_states;
        struct inet_diag_sockid id;
 };

and in raw-diag module we will use @sdiag_raw_protocol instead of
@sdiag_protocol field. Didn't cover ss tool source code yet but
I think the idea is seen. Still not sure if start using @pad here
is a good idea (it's uapi), maybe beter to ask nla attribute which would
come right afterh the inet_diag_req_v2 message?

	Cyrill

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-26 22:38                               ` Cyrill Gorcunov
@ 2016-09-27  1:54                                 ` David Ahern
  2016-09-27  7:48                                   ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: David Ahern @ 2016-09-27  1:54 UTC (permalink / raw)
  To: Cyrill Gorcunov, Eric Dumazet
  Cc: netdev, linux-kernel, David Miller, kuznet, jmorris, yoshfuji,
	kaber, avagin, stephen

On 9/26/16 4:38 PM, Cyrill Gorcunov wrote:
> Something like
> 
> Index: linux-ml.git/include/uapi/linux/inet_diag.h
> ===================================================================
> --- linux-ml.git.orig/include/uapi/linux/inet_diag.h    2016-09-11 20:56:18.191584145 +0300
> +++ linux-ml.git/include/uapi/linux/inet_diag.h 2016-09-27 01:34:08.413172394 +0300
> @@ -38,7 +38,7 @@ struct inet_diag_req_v2 {
>         __u8    sdiag_family;
>         __u8    sdiag_protocol;
>         __u8    idiag_ext;
> -       __u8    pad;
> +       __u8    sdiag_raw_protocol;     /* SOCK_RAW only, @pad for others */

Seems like that should be a union to keep the API.


>         __u32   idiag_states;
>         struct inet_diag_sockid id;
>  };
> 
> and in raw-diag module we will use @sdiag_raw_protocol instead of
> @sdiag_protocol field. Didn't cover ss tool source code yet but
> I think the idea is seen. Still not sure if start using @pad here
> is a good idea (it's uapi), maybe beter to ask nla attribute which would
> come right afterh the inet_diag_req_v2 message?
> 

seems reasonable to me since 2 protocols need to be sent to the kernel.

Alternatively, sdiag_protocol could be the actual protocol and the pad union be a flag field with say bit 0 = INET_DIAG_FLAG_SOCK_RAW. Allows other overrides in the future if needed.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-27  1:54                                 ` David Ahern
@ 2016-09-27  7:48                                   ` Cyrill Gorcunov
  0 siblings, 0 replies; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-27  7:48 UTC (permalink / raw)
  To: David Ahern
  Cc: Eric Dumazet, netdev, linux-kernel, David Miller, kuznet,
	jmorris, yoshfuji, kaber, avagin, stephen

On Mon, Sep 26, 2016 at 07:54:37PM -0600, David Ahern wrote:
> On 9/26/16 4:38 PM, Cyrill Gorcunov wrote:
> > Something like
> > 
> > Index: linux-ml.git/include/uapi/linux/inet_diag.h
> > ===================================================================
> > --- linux-ml.git.orig/include/uapi/linux/inet_diag.h    2016-09-11 20:56:18.191584145 +0300
> > +++ linux-ml.git/include/uapi/linux/inet_diag.h 2016-09-27 01:34:08.413172394 +0300
> > @@ -38,7 +38,7 @@ struct inet_diag_req_v2 {
> >         __u8    sdiag_family;
> >         __u8    sdiag_protocol;
> >         __u8    idiag_ext;
> > -       __u8    pad;
> > +       __u8    sdiag_raw_protocol;     /* SOCK_RAW only, @pad for others */
> 
> Seems like that should be a union to keep the API.

Is anonymous union (which is not part of c99) are acceptable in uapi?
Initially I declared it as union but then scratched my head if this
would be acceptable.

> 
> 
> >         __u32   idiag_states;
> >         struct inet_diag_sockid id;
> >  };
> > 
> > and in raw-diag module we will use @sdiag_raw_protocol instead of
> > @sdiag_protocol field. Didn't cover ss tool source code yet but
> > I think the idea is seen. Still not sure if start using @pad here
> > is a good idea (it's uapi), maybe beter to ask nla attribute which would
> > come right afterh the inet_diag_req_v2 message?
> > 
> 
> seems reasonable to me since 2 protocols need to be sent to the kernel.
> 
> Alternatively, sdiag_protocol could be the actual protocol and the pad union be a flag field
> with say bit 0 = INET_DIAG_FLAG_SOCK_RAW. Allows other overrides in the future if needed.

The @sdiag_protocol used for matching in diag module handler, so no, I think
we should not change this semantics. I would stick with @pad usage and if
anonymous unions are acceptable this would be just great.

	Cyrill

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-13 15:57         ` David Miller
@ 2016-09-13 16:31           ` Cyrill Gorcunov
  0 siblings, 0 replies; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-13 16:31 UTC (permalink / raw)
  To: David Miller
  Cc: dsa, netdev, linux-kernel, eric.dumazet, kuznet, jmorris,
	yoshfuji, kaber, avagin, stephen

On Tue, Sep 13, 2016 at 11:57:35AM -0400, David Miller wrote:
> > 
> > Thanks for review, David. I updated against net-next.
> 
> Please do not post new versions of patches as replies to existing
> discussions.
> 
> Instead, make fresh patch postings to the list.

Oh, will do. Sorry for inconvenience.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-11 19:17       ` [PATCH v3] " Cyrill Gorcunov
@ 2016-09-13 15:57         ` David Miller
  2016-09-13 16:31           ` Cyrill Gorcunov
  0 siblings, 1 reply; 28+ messages in thread
From: David Miller @ 2016-09-13 15:57 UTC (permalink / raw)
  To: gorcunov
  Cc: dsa, netdev, linux-kernel, eric.dumazet, kuznet, jmorris,
	yoshfuji, kaber, avagin, stephen

From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Sun, 11 Sep 2016 22:17:15 +0300

> On Sat, Sep 10, 2016 at 04:28:40PM -0600, David Ahern wrote:
>> On 9/10/16 4:05 PM, Cyrill Gorcunov wrote:
>> > On Sat, Sep 10, 2016 at 10:31:35AM -0600, David Ahern wrote:
>> >>
>> >> Would you mind adding the destroy capability as well? The udp version
>> >> should be close to what is needed for raw sockets. See udp_diag_destroy
>> >> and udp_abort.
>> > 
>> > Should be something like below. Didn't tested it yet so for review only.
>> > Will do testing at Monday.
>> 
>> doesn't compile:
>> - raw_abort needs to be in a header for ipv6, and
>> - inet_sk_diag_fill args have changed due to a recent commit
> 
> Thanks for review, David. I updated against net-next.

Please do not post new versions of patches as replies to existing
discussions.

Instead, make fresh patch postings to the list.

Thanks.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* [PATCH v3] net: ip, diag -- Add diag interface for raw sockets
  2016-09-10 22:28     ` David Ahern
@ 2016-09-11 19:17       ` Cyrill Gorcunov
  2016-09-13 15:57         ` David Miller
  0 siblings, 1 reply; 28+ messages in thread
From: Cyrill Gorcunov @ 2016-09-11 19:17 UTC (permalink / raw)
  To: David Ahern
  Cc: NETDEV, LKML, David S. Miller, Eric Dumazet, Alexey Kuznetsov,
	James Morris, Hideaki YOSHIFUJI, Patrick McHardy, Andrey Vagin,
	Stephen Hemminger

On Sat, Sep 10, 2016 at 04:28:40PM -0600, David Ahern wrote:
> On 9/10/16 4:05 PM, Cyrill Gorcunov wrote:
> > On Sat, Sep 10, 2016 at 10:31:35AM -0600, David Ahern wrote:
> >>
> >> Would you mind adding the destroy capability as well? The udp version
> >> should be close to what is needed for raw sockets. See udp_diag_destroy
> >> and udp_abort.
> > 
> > Should be something like below. Didn't tested it yet so for review only.
> > Will do testing at Monday.
> 
> doesn't compile:
> - raw_abort needs to be in a header for ipv6, and
> - inet_sk_diag_fill args have changed due to a recent commit

Thanks for review, David. I updated against net-next.
---
From: Cyrill Gorcunov <gorcunov@openvz.org>
Subject: [PATCH v3] net: ip, diag -- Add diag interface for raw sockets

In criu we are actively using diag interface to collect sockets
present in the system when dumping applications. And while for
unix, tcp, udp[lite], packet, netlink it works as expected,
the raw sockets do not have. Thus add it.

v2:
 - add missing sock_put calls in raw_diag_dump_one (by eric.dumazet@)
 - implement @destroy for diag requests (by dsa@)

v3:
 - add export of raw_abort for IPv6 (by dsa@)
 - pass net-admin flag into inet_sk_diag_fill due to
   changes in net-next branch (by dsa@)

CC: David S. Miller <davem@davemloft.net>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Ahern <dsa@cumulusnetworks.com>
CC: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
CC: James Morris <jmorris@namei.org>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
CC: Patrick McHardy <kaber@trash.net>
CC: Andrey Vagin <avagin@openvz.org>
CC: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---

 include/net/raw.h   |    5 +
 include/net/rawv6.h |    7 +
 net/ipv4/Kconfig    |    8 +
 net/ipv4/Makefile   |    1 
 net/ipv4/raw.c      |   21 ++++
 net/ipv4/raw_diag.c |  226 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/raw.c      |    7 +
 7 files changed, 271 insertions(+), 4 deletions(-)

Index: linux-ml.git/include/net/raw.h
===================================================================
--- linux-ml.git.orig/include/net/raw.h
+++ linux-ml.git/include/net/raw.h
@@ -23,6 +23,11 @@
 
 extern struct proto raw_prot;
 
+extern struct raw_hashinfo raw_v4_hashinfo;
+struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
+			     unsigned short num, __be32 raddr,
+			     __be32 laddr, int dif);
+
 void raw_icmp_error(struct sk_buff *, int, u32);
 int raw_local_deliver(struct sk_buff *, int);
 
Index: linux-ml.git/include/net/rawv6.h
===================================================================
--- linux-ml.git.orig/include/net/rawv6.h
+++ linux-ml.git/include/net/rawv6.h
@@ -3,6 +3,13 @@
 
 #include <net/protocol.h>
 
+extern struct raw_hashinfo raw_v6_hashinfo;
+struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+			     unsigned short num, const struct in6_addr *loc_addr,
+			     const struct in6_addr *rmt_addr, int dif);
+
+int raw_abort(struct sock *sk, int err);
+
 void raw6_icmp_error(struct sk_buff *, int nexthdr,
 		u8 type, u8 code, int inner_offset, __be32);
 bool raw6_local_deliver(struct sk_buff *, int);
Index: linux-ml.git/net/ipv4/Kconfig
===================================================================
--- linux-ml.git.orig/net/ipv4/Kconfig
+++ linux-ml.git/net/ipv4/Kconfig
@@ -430,6 +430,14 @@ config INET_UDP_DIAG
 	  Support for UDP socket monitoring interface used by the ss tool.
 	  If unsure, say Y.
 
+config INET_RAW_DIAG
+	tristate "RAW: socket monitoring interface"
+	depends on INET_DIAG && (IPV6 || IPV6=n)
+	default n
+	---help---
+	  Support for RAW socket monitoring interface used by the ss tool.
+	  If unsure, say Y.
+
 config INET_DIAG_DESTROY
 	bool "INET: allow privileged process to administratively close sockets"
 	depends on INET_DIAG
Index: linux-ml.git/net/ipv4/Makefile
===================================================================
--- linux-ml.git.orig/net/ipv4/Makefile
+++ linux-ml.git/net/ipv4/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_NETFILTER)	+= netfilter.o n
 obj-$(CONFIG_INET_DIAG) += inet_diag.o 
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
+obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
 obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
Index: linux-ml.git/net/ipv4/raw.c
===================================================================
--- linux-ml.git.orig/net/ipv4/raw.c
+++ linux-ml.git/net/ipv4/raw.c
@@ -89,9 +89,10 @@ struct raw_frag_vec {
 	int hlen;
 };
 
-static struct raw_hashinfo raw_v4_hashinfo = {
+struct raw_hashinfo raw_v4_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
 };
+EXPORT_SYMBOL_GPL(raw_v4_hashinfo);
 
 int raw_hash_sk(struct sock *sk)
 {
@@ -120,7 +121,7 @@ void raw_unhash_sk(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(raw_unhash_sk);
 
-static struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
 		unsigned short num, __be32 raddr, __be32 laddr, int dif)
 {
 	sk_for_each_from(sk) {
@@ -136,6 +137,7 @@ static struct sock *__raw_v4_lookup(stru
 found:
 	return sk;
 }
+EXPORT_SYMBOL_GPL(__raw_v4_lookup);
 
 /*
  *	0 - deliver
@@ -918,6 +920,20 @@ static int compat_raw_ioctl(struct sock
 }
 #endif
 
+int raw_abort(struct sock *sk, int err)
+{
+	lock_sock(sk);
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+	udp_disconnect(sk, 0);
+
+	release_sock(sk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(raw_abort);
+
 struct proto raw_prot = {
 	.name		   = "RAW",
 	.owner		   = THIS_MODULE,
@@ -943,6 +959,7 @@ struct proto raw_prot = {
 	.compat_getsockopt = compat_raw_getsockopt,
 	.compat_ioctl	   = compat_raw_ioctl,
 #endif
+	.diag_destroy	   = raw_abort,
 };
 
 #ifdef CONFIG_PROC_FS
Index: linux-ml.git/net/ipv4/raw_diag.c
===================================================================
--- /dev/null
+++ linux-ml.git/net/ipv4/raw_diag.c
@@ -0,0 +1,226 @@
+#include <linux/module.h>
+
+#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
+
+#include <net/raw.h>
+#include <net/rawv6.h>
+
+#ifdef pr_fmt
+# undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+static struct raw_hashinfo *
+raw_get_hashinfo(const struct inet_diag_req_v2 *r)
+{
+	if (r->sdiag_family == AF_INET) {
+		return &raw_v4_hashinfo;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (r->sdiag_family == AF_INET6) {
+		return &raw_v6_hashinfo;
+#endif
+	} else {
+		pr_warn_once("Unexpected inet family %d\n",
+			     r->sdiag_family);
+		WARN_ON_ONCE(1);
+		return ERR_PTR(-EINVAL);
+	}
+}
+
+static struct sock *raw_lookup(struct net *net, struct sock *from,
+			       const struct inet_diag_req_v2 *r)
+{
+	struct sock *sk = NULL;
+
+	if (r->sdiag_family == AF_INET)
+		sk = __raw_v4_lookup(net, from, r->sdiag_protocol,
+				     r->id.idiag_dst[0],
+				     r->id.idiag_src[0],
+				     r->id.idiag_if);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		sk = __raw_v6_lookup(net, from, r->sdiag_protocol,
+				     (const struct in6_addr *)r->id.idiag_src,
+				     (const struct in6_addr *)r->id.idiag_dst,
+				     r->id.idiag_if);
+#endif
+	return sk;
+}
+
+static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
+{
+	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+	struct sock *sk = NULL, *s;
+	int slot;
+
+	if (IS_ERR(hashinfo))
+		return ERR_CAST(hashinfo);
+
+	read_lock(&hashinfo->lock);
+	for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
+		sk_for_each(s, &hashinfo->ht[slot]) {
+			sk = raw_lookup(net, s, r);
+			if (sk)
+				break;
+		}
+	}
+	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+		sk = NULL;
+	read_unlock(&hashinfo->lock);
+
+	return sk ? sk : ERR_PTR(-ENOENT);
+}
+
+static int raw_diag_dump_one(struct sk_buff *in_skb,
+			     const struct nlmsghdr *nlh,
+			     const struct inet_diag_req_v2 *r)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sk_buff *rep;
+	struct sock *sk;
+	int err;
+
+	sk = raw_sock_get(net, r);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+
+	rep = nlmsg_new(sizeof(struct inet_diag_msg) +
+			sizeof(struct inet_diag_meminfo) + 64,
+			GFP_KERNEL);
+	if (!rep) {
+		sock_put(sk);
+		return -ENOMEM;
+	}
+
+	err = inet_sk_diag_fill(sk, NULL, rep, r,
+				sk_user_ns(NETLINK_CB(in_skb).sk),
+				NETLINK_CB(in_skb).portid,
+				nlh->nlmsg_seq, 0, nlh,
+				netlink_net_capable(in_skb, CAP_NET_ADMIN));
+	sock_put(sk);
+
+	if (err < 0) {
+		kfree_skb(rep);
+		return err;
+	}
+
+	err = netlink_unicast(net->diag_nlsk, rep,
+			      NETLINK_CB(in_skb).portid,
+			      MSG_DONTWAIT);
+	if (err > 0)
+		err = 0;
+	return err;
+}
+
+static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
+			struct netlink_callback *cb,
+			const struct inet_diag_req_v2 *r,
+			struct nlattr *bc, bool net_admin)
+{
+	if (!inet_diag_bc_sk(bc, sk))
+		return 0;
+
+	return inet_sk_diag_fill(sk, NULL, skb, r,
+			sk_user_ns(NETLINK_CB(cb->skb).sk),
+			NETLINK_CB(cb->skb).portid,
+			cb->nlh->nlmsg_seq, NLM_F_MULTI,
+			cb->nlh, net_admin);
+}
+
+static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+			  const struct inet_diag_req_v2 *r, struct nlattr *bc)
+{
+	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+	struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
+	struct net *net = sock_net(skb->sk);
+	int num, s_num, slot, s_slot;
+	struct sock *sk = NULL;
+
+	if (IS_ERR(hashinfo))
+		return;
+
+	s_slot = cb->args[0];
+	num = s_num = cb->args[1];
+
+	read_lock(&hashinfo->lock);
+	for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
+		num = 0;
+
+		sk_for_each(sk, &hashinfo->ht[slot]) {
+			struct inet_sock *inet = inet_sk(sk);
+
+			if (!net_eq(sock_net(sk), net))
+				continue;
+			if (num < s_num)
+				goto next;
+			if (sk->sk_family != r->sdiag_family)
+				goto next;
+			if (r->id.idiag_sport != inet->inet_sport &&
+			    r->id.idiag_sport)
+				goto next;
+			if (r->id.idiag_dport != inet->inet_dport &&
+			    r->id.idiag_dport)
+				goto next;
+			if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
+				goto out_unlock;
+next:
+			num++;
+		}
+	}
+
+out_unlock:
+	read_unlock(&hashinfo->lock);
+
+	cb->args[0] = slot;
+	cb->args[1] = num;
+}
+
+static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
+			      void *info)
+{
+	r->idiag_rqueue = sk_rmem_alloc_get(sk);
+	r->idiag_wqueue = sk_wmem_alloc_get(sk);
+}
+
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int raw_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *r)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sock *sk;
+
+	sk = raw_sock_get(net, r);
+	if (IS_ERR(sk))
+		return PTR_ERR(sk);
+	return sock_diag_destroy(sk, ECONNABORTED);
+}
+#endif
+
+static const struct inet_diag_handler raw_diag_handler = {
+	.dump			= raw_diag_dump,
+	.dump_one		= raw_diag_dump_one,
+	.idiag_get_info		= raw_diag_get_info,
+	.idiag_type		= IPPROTO_RAW,
+	.idiag_info_size	= 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy		= raw_diag_destroy,
+#endif
+};
+
+static int __init raw_diag_init(void)
+{
+	return inet_diag_register(&raw_diag_handler);
+}
+
+static void __exit raw_diag_exit(void)
+{
+	inet_diag_unregister(&raw_diag_handler);
+}
+
+module_init(raw_diag_init);
+module_exit(raw_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
Index: linux-ml.git/net/ipv6/raw.c
===================================================================
--- linux-ml.git.orig/net/ipv6/raw.c
+++ linux-ml.git/net/ipv6/raw.c
@@ -65,11 +65,12 @@
 
 #define	ICMPV6_HDRLEN	4	/* ICMPv6 header, RFC 4443 Section 2.1 */
 
-static struct raw_hashinfo raw_v6_hashinfo = {
+struct raw_hashinfo raw_v6_hashinfo = {
 	.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
 };
+EXPORT_SYMBOL_GPL(raw_v6_hashinfo);
 
-static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
+struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
 		unsigned short num, const struct in6_addr *loc_addr,
 		const struct in6_addr *rmt_addr, int dif)
 {
@@ -102,6 +103,7 @@ static struct sock *__raw_v6_lookup(stru
 found:
 	return sk;
 }
+EXPORT_SYMBOL_GPL(__raw_v6_lookup);
 
 /*
  *	0 - deliver
@@ -1252,6 +1254,7 @@ struct proto rawv6_prot = {
 	.compat_getsockopt = compat_rawv6_getsockopt,
 	.compat_ioctl	   = compat_rawv6_ioctl,
 #endif
+	.diag_destroy	   = raw_abort,
 };
 
 #ifdef CONFIG_PROC_FS

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2016-09-27  7:49 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-13 17:19 [PATCH v3] net: ip, diag -- Add diag interface for raw sockets Cyrill Gorcunov
2016-09-13 18:33 ` Greg
2016-09-13 20:18   ` Rustad, Mark D
2016-09-13 20:32     ` Greg
2016-09-15 19:53 ` David Ahern
2016-09-15 20:22   ` Cyrill Gorcunov
2016-09-15 20:25     ` David Ahern
2016-09-15 20:36       ` Eric Dumazet
2016-09-15 20:39         ` David Ahern
2016-09-15 20:54     ` David Ahern
2016-09-15 21:01       ` Cyrill Gorcunov
2016-09-15 22:48         ` Eric Dumazet
2016-09-15 23:45           ` David Ahern
2016-09-16  7:06             ` Cyrill Gorcunov
2016-09-16 19:00               ` Cyrill Gorcunov
2016-09-16 19:30                 ` David Ahern
2016-09-16 19:39                   ` Cyrill Gorcunov
2016-09-16 19:47                     ` David Ahern
2016-09-16 19:52                       ` Cyrill Gorcunov
2016-09-16 19:55                         ` David Ahern
2016-09-16 20:07                           ` Cyrill Gorcunov
2016-09-20 21:13                             ` Cyrill Gorcunov
2016-09-26 22:38                               ` Cyrill Gorcunov
2016-09-27  1:54                                 ` David Ahern
2016-09-27  7:48                                   ` Cyrill Gorcunov
  -- strict thread matches above, loose matches on Subject: below --
2016-09-09 18:26 [PATCH] " Cyrill Gorcunov
2016-09-10 16:31 ` David Ahern
2016-09-10 22:05   ` [PATCH v2] " Cyrill Gorcunov
2016-09-10 22:28     ` David Ahern
2016-09-11 19:17       ` [PATCH v3] " Cyrill Gorcunov
2016-09-13 15:57         ` David Miller
2016-09-13 16:31           ` Cyrill Gorcunov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.