From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tonghao Zhang Subject: [PATCH v5 2/2] sock: Move the socket inuse to namespace. Date: Thu, 7 Dec 2017 08:45:48 -0800 Message-ID: <1512665148-2413-2-git-send-email-xiangxia.m.yue@gmail.com> References: <1512665148-2413-1-git-send-email-xiangxia.m.yue@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: netdev@vger.kernel.org, Tonghao Zhang To: davem@davemloft.net, xiyou.wangcong@gmail.com, edumazet@google.com, willemb@google.com Return-path: Received: from mail-pg0-f68.google.com ([74.125.83.68]:37767 "EHLO mail-pg0-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754295AbdLGQqj (ORCPT ); Thu, 7 Dec 2017 11:46:39 -0500 Received: by mail-pg0-f68.google.com with SMTP id y6so4842524pgp.4 for ; Thu, 07 Dec 2017 08:46:39 -0800 (PST) In-Reply-To: <1512665148-2413-1-git-send-email-xiangxia.m.yue@gmail.com> Sender: netdev-owner@vger.kernel.org List-ID: In some case, we want to know how many sockets are in use in different _net_ namespaces. It's a key resource metric. This patch add a member in struct netns_core. This is a counter for socket-inuse in the _net_ namespace. The patch will add/sub counter in the sk_alloc, sk_clone_lock and __sk_free. The main reasons for doing this are that: 1. When linux calls the 'do_exit' for process to exit, the functions 'exit_task_namespaces' and 'exit_task_work' will be called sequentially. 'exit_task_namespaces' may have destroyed the _net_ namespace, but 'sock_release' called in 'exit_task_work' may use the _net_ namespace if we counter the socket-inuse in sock_release. 2. socket and sock are in pair. More important, sock holds the _net_ namespace. We counter the socket-inuse in sock, for avoiding holding _net_ namespace again in socket. It's a easy way to maintain the code. Signed-off-by: Martin Zhang Signed-off-by: Tonghao Zhang --- include/net/netns/core.h | 1 + include/net/sock.h | 1 + net/core/sock.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++-- net/socket.c | 21 ++----------------- 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 45cfb5d..d1b4748f 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -11,6 +11,7 @@ struct netns_core { int sysctl_somaxconn; + int __percpu *sock_inuse; struct prot_inuse __percpu *prot_inuse; }; diff --git a/include/net/sock.h b/include/net/sock.h index 79e1a2c..0809b31 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1266,6 +1266,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) /* Called with local bh disabled */ void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); int sock_prot_inuse_get(struct net *net, struct proto *proto); +int sock_inuse_get(struct net *net); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) diff --git a/net/core/sock.c b/net/core/sock.c index c2dd2d3..a11680a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,8 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +static void sock_inuse_add(struct net *net, int val); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@ -1534,6 +1536,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) get_net(net); sock_net_set(sk, net); + sock_inuse_add(net, 1); refcount_set(&sk->sk_wmem_alloc, 1); mem_cgroup_sk_alloc(sk); @@ -1595,6 +1598,8 @@ void sk_destruct(struct sock *sk) static void __sk_free(struct sock *sk) { + sock_inuse_add(sock_net(sk), -1); + if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) sock_diag_broadcast_destroy(sk); else @@ -1716,6 +1721,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + sock_inuse_add(sock_net(newsk), 1); /* * Before updating sk_refcnt, we must commit prior changes to memory @@ -3061,15 +3067,53 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot) } EXPORT_SYMBOL_GPL(sock_prot_inuse_get); +static void sock_inuse_add(struct net *net, int val) +{ + if (net->core.prot_inuse) + this_cpu_add(*net->core.sock_inuse, val); +} + +int sock_inuse_get(struct net *net) +{ + int cpu, res = 0; + + if (!net->core.prot_inuse) + return 0; + + for_each_possible_cpu(cpu) + res += *per_cpu_ptr(net->core.sock_inuse, cpu); + + return res >= 0 ? res : 0; +} +EXPORT_SYMBOL_GPL(sock_inuse_get); + static int __net_init sock_inuse_init_net(struct net *net) { net->core.prot_inuse = alloc_percpu(struct prot_inuse); - return net->core.prot_inuse ? 0 : -ENOMEM; + if (!net->core.prot_inuse) + return -ENOMEM; + + net->core.sock_inuse = alloc_percpu(int); + if (!net->core.sock_inuse) + goto out; + + return 0; +out: + free_percpu(net->core.prot_inuse); + return -ENOMEM; } static void __net_exit sock_inuse_exit_net(struct net *net) { - free_percpu(net->core.prot_inuse); + if (net->core.prot_inuse) { + free_percpu(net->core.prot_inuse); + net->core.prot_inuse = NULL; + } + + if (net->core.sock_inuse) { + free_percpu(net->core.sock_inuse); + net->core.prot_inuse = NULL; + } } static struct pernet_operations net_inuse_ops = { @@ -3112,6 +3156,10 @@ static inline void assign_proto_idx(struct proto *prot) static inline void release_proto_idx(struct proto *prot) { } + +static void sock_inuse_add(struct net *net, int val) +{ +} #endif static void req_prot_cleanup(struct request_sock_ops *rsk_prot) diff --git a/net/socket.c b/net/socket.c index 42d8e9c..183de8f01 100644 --- a/net/socket.c +++ b/net/socket.c @@ -163,12 +163,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* - * Statistics counters of the socket lists - */ - -static DEFINE_PER_CPU(int, sockets_in_use); - -/* * Support routines. * Move socket addresses back and forth across the kernel/user * divide and look after the messy bits. @@ -574,7 +568,6 @@ struct socket *sock_alloc(void) inode->i_gid = current_fsgid(); inode->i_op = &sockfs_inode_ops; - this_cpu_add(sockets_in_use, 1); return sock; } EXPORT_SYMBOL(sock_alloc); @@ -601,7 +594,6 @@ void sock_release(struct socket *sock) if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__); - this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); return; @@ -2644,17 +2636,8 @@ static int __init sock_init(void) #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { - int cpu; - int counter = 0; - - for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); - - /* It can be negative, by the way. 8) */ - if (counter < 0) - counter = 0; - - seq_printf(seq, "sockets: used %d\n", counter); + seq_printf(seq, "sockets: used %d\n", + sock_inuse_get(seq->private)); } #endif /* CONFIG_PROC_FS */ -- 1.8.3.1