From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tonghao Zhang Subject: [PATCH v6 2/3] sock: Move the socket inuse to namespace. Date: Sun, 10 Dec 2017 07:12:05 -0800 Message-ID: <1512918726-2731-2-git-send-email-xiangxia.m.yue@gmail.com> References: <1512918726-2731-1-git-send-email-xiangxia.m.yue@gmail.com> Cc: netdev@vger.kernel.org, Tonghao Zhang To: davem@davemloft.net, xiyou.wangcong@gmail.com, edumazet@google.com, willemb@google.com, xemul@openvz.org Return-path: Received: from mail-pl0-f67.google.com ([209.85.160.67]:40981 "EHLO mail-pl0-f67.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751726AbdLJPMy (ORCPT ); Sun, 10 Dec 2017 10:12:54 -0500 Received: by mail-pl0-f67.google.com with SMTP id g2so3091784pli.8 for ; Sun, 10 Dec 2017 07:12:53 -0800 (PST) In-Reply-To: <1512918726-2731-1-git-send-email-xiangxia.m.yue@gmail.com> Sender: netdev-owner@vger.kernel.org List-ID: In some case, we want to know how many sockets are in use in different _net_ namespaces. It's a key resource metric. This patch adds a member in struct netns_core. This is a counter for socket-inuse in the _net_ namespace. The patch will add/sub counter in the sk_alloc, sk_clone_lock and __sk_free. The main reasons for doing this are that: 1. When linux calls the 'do_exit' for processes to exit, the functions 'exit_task_namespaces' and 'exit_task_work' will be called sequentially. 'exit_task_namespaces' may have destroyed the _net_ namespace, but 'sock_release' called in 'exit_task_work' may use the _net_ namespace if we counter the socket-inuse in sock_release. 2. socket and sock are in pair. More important, sock holds the _net_ namespace. We counter the socket-inuse in sock, for avoiding holding _net_ namespace again in socket. It's a easy way to maintain the code. 3. We alloc the sock_inuse in net_alloc() and free it in net_free() because we should make sure that the sock_inuse will not be used anymore after we release it. Notice that some sockets (e.g netlink socket created in kernel) will be released after all of the network namespace exit methods. For more details, see the cleanup_net. Then, we should not use the per network namespace operations to malloc the sock_inuse. Signed-off-by: Martin Zhang Signed-off-by: Tonghao Zhang --- include/net/netns/core.h | 3 +++ include/net/sock.h | 1 + net/core/net_namespace.c | 10 ++++++++++ net/core/sock.c | 26 ++++++++++++++++++++++++++ net/socket.c | 21 ++------------------- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/include/net/netns/core.h b/include/net/netns/core.h index 45cfb5d..a5e8a66 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -11,6 +11,9 @@ struct netns_core { int sysctl_somaxconn; +#ifdef CONFIG_PROC_FS + int __percpu *sock_inuse; +#endif struct prot_inuse __percpu *prot_inuse; }; diff --git a/include/net/sock.h b/include/net/sock.h index 9155da4..44f4890 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1262,6 +1262,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) /* Called with local bh disabled */ void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); int sock_prot_inuse_get(struct net *net, struct proto *proto); +int sock_inuse_get(struct net *net); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b797832..6c191fb 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -363,6 +363,13 @@ static struct net *net_alloc(void) if (!net) goto out_free; +#ifdef CONFIG_PROC_FS + net->core.sock_inuse = alloc_percpu(int); + if (!net->core.sock_inuse) { + kmem_cache_free(net_cachep, net); + goto out_free; + } +#endif rcu_assign_pointer(net->gen, ng); out: return net; @@ -374,6 +381,9 @@ static struct net *net_alloc(void) static void net_free(struct net *net) { +#ifdef CONFIG_PROC_FS + free_percpu(net->core.sock_inuse); +#endif kfree(rcu_access_pointer(net->gen)); kmem_cache_free(net_cachep, net); } diff --git a/net/core/sock.c b/net/core/sock.c index c2dd2d3..f6974eb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -145,6 +145,8 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); +static void sock_inuse_add(struct net *net, int val); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@ -1534,6 +1536,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, if (likely(sk->sk_net_refcnt)) get_net(net); sock_net_set(sk, net); + sock_inuse_add(net, 1); refcount_set(&sk->sk_wmem_alloc, 1); mem_cgroup_sk_alloc(sk); @@ -1595,6 +1598,8 @@ void sk_destruct(struct sock *sk) static void __sk_free(struct sock *sk) { + sock_inuse_add(sock_net(sk), -1); + if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) sock_diag_broadcast_destroy(sk); else @@ -1716,6 +1721,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + sock_inuse_add(sock_net(newsk), 1); /* * Before updating sk_refcnt, we must commit prior changes to memory @@ -3061,6 +3067,22 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot) } EXPORT_SYMBOL_GPL(sock_prot_inuse_get); +static void sock_inuse_add(struct net *net, int val) +{ + this_cpu_add(*net->core.sock_inuse, val); +} + +int sock_inuse_get(struct net *net) +{ + int cpu, res = 0; + + for_each_possible_cpu(cpu) + res += *per_cpu_ptr(net->core.sock_inuse, cpu); + + return res >= 0 ? res : 0; +} +EXPORT_SYMBOL_GPL(sock_inuse_get); + static int __net_init sock_inuse_init_net(struct net *net) { net->core.prot_inuse = alloc_percpu(struct prot_inuse); @@ -3112,6 +3134,10 @@ static inline void assign_proto_idx(struct proto *prot) static inline void release_proto_idx(struct proto *prot) { } + +static void sock_inuse_add(struct net *net, int val) +{ +} #endif static void req_prot_cleanup(struct request_sock_ops *rsk_prot) diff --git a/net/socket.c b/net/socket.c index 05f361f..bbd2e9c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -163,12 +163,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* - * Statistics counters of the socket lists - */ - -static DEFINE_PER_CPU(int, sockets_in_use); - -/* * Support routines. * Move socket addresses back and forth across the kernel/user * divide and look after the messy bits. @@ -578,7 +572,6 @@ struct socket *sock_alloc(void) inode->i_gid = current_fsgid(); inode->i_op = &sockfs_inode_ops; - this_cpu_add(sockets_in_use, 1); return sock; } EXPORT_SYMBOL(sock_alloc); @@ -605,7 +598,6 @@ void sock_release(struct socket *sock) if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__); - this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); return; @@ -2622,17 +2614,8 @@ static int __init sock_init(void) #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { - int cpu; - int counter = 0; - - for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); - - /* It can be negative, by the way. 8) */ - if (counter < 0) - counter = 0; - - seq_printf(seq, "sockets: used %d\n", counter); + seq_printf(seq, "sockets: used %d\n", + sock_inuse_get(seq->private)); } #endif /* CONFIG_PROC_FS */ -- 1.8.3.1