linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jiebin Sun <jiebin.sun@intel.com>
To: akpm@linux-foundation.org, vasily.averin@linux.dev,
	shakeelb@google.com, dennis@kernel.org, tj@kernel.org,
	cl@linux.com, ebiederm@xmission.com, legion@kernel.org,
	manfred@colorfullife.com, alexander.mikhalitsyn@virtuozzo.com,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: tim.c.chen@intel.com, feng.tang@intel.com, ying.huang@intel.com,
	tianyou.li@intel.com, wangyang.guo@intel.com,
	jiebin.sun@intel.com
Subject: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
Date: Thu,  8 Sep 2022 01:25:16 +0800	[thread overview]
Message-ID: <20220907172516.1210842-1-jiebin.sun@intel.com> (raw)
In-Reply-To: <CALvZod44uUFnwfF4StC24t+d1s_XE10hkmSCgb04FjtTATo6xQ@mail.gmail.com>

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counter
greatly improve the performance. Since there is one percpu
struct per namespace, additional memory cost is minimal.
Reading of the count done in msgctl call, which is infrequent.
So the need to sum up the counts in each CPU is infrequent.


Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.17x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 include/linux/ipc_namespace.h |  5 ++--
 ipc/msg.c                     | 47 ++++++++++++++++++++++++-----------
 ipc/namespace.c               |  5 +++-
 ipc/util.h                    |  4 +--
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
 #include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
 
 struct user_namespace;
 
@@ -36,8 +37,8 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmax;
 	unsigned int	msg_ctlmnb;
 	unsigned int	msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
+	struct percpu_counter percpu_msg_bytes;
+	struct percpu_counter percpu_msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..040cfc93d7ef 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,11 +39,15 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
 #include "util.h"
 
+/* large batch size could reduce the times to sum up percpu counter */
+#define MSG_PERCPU_COUNTER_BATCH 1024
+
 /* one msq_queue structure for each present queue on the system */
 struct msg_queue {
 	struct kern_ipc_perm q_perm;
@@ -285,10 +289,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
-		atomic_dec(&ns->msg_hdrs);
+		percpu_counter_add_batch(&ns->percpu_msg_hdrs, -1, MSG_PERCPU_COUNTER_BATCH);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+	percpu_counter_add_batch(&ns->percpu_msg_bytes, -(msq->q_cbytes), MSG_PERCPU_COUNTER_BATCH);
 	ipc_update_pid(&msq->q_lspid, NULL);
 	ipc_update_pid(&msq->q_lrpid, NULL);
 	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +499,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	msginfo->msgssz = MSGSSZ;
 	msginfo->msgseg = MSGSEG;
 	down_read(&msg_ids(ns).rwsem);
-	if (cmd == MSG_INFO) {
+	if (cmd == MSG_INFO)
 		msginfo->msgpool = msg_ids(ns).in_use;
-		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
-		msginfo->msgtql = atomic_read(&ns->msg_bytes);
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
+	up_read(&msg_ids(ns).rwsem);
+	if (cmd == MSG_INFO) {
+		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_idx = ipc_get_maxidx(&msg_ids(ns));
-	up_read(&msg_ids(ns).rwsem);
 	return (max_idx < 0) ? 0 : max_idx;
 }
 
@@ -935,8 +940,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &ns->msg_bytes);
-		atomic_inc(&ns->msg_hdrs);
+		percpu_counter_add_batch(&ns->percpu_msg_bytes, msgsz, MSG_PERCPU_COUNTER_BATCH);
+		percpu_counter_add_batch(&ns->percpu_msg_hdrs, 1, MSG_PERCPU_COUNTER_BATCH);
 	}
 
 	err = 0;
@@ -1159,8 +1164,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
 			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &ns->msg_bytes);
-			atomic_dec(&ns->msg_hdrs);
+			percpu_counter_add_batch(&ns->percpu_msg_bytes, -(msg->m_ts), MSG_PERCPU_COUNTER_BATCH);
+			percpu_counter_add_batch(&ns->percpu_msg_hdrs, -1, MSG_PERCPU_COUNTER_BATCH);
 			ss_wakeup(msq, &wake_q, false);
 
 			goto out_unlock0;
@@ -1297,20 +1302,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
 }
 #endif
 
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
 {
+	int ret;
+
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 
-	atomic_set(&ns->msg_bytes, 0);
-	atomic_set(&ns->msg_hdrs, 0);
+	ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_bytes;
+	ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_hdrs;
 	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+	return 0;
+
+	fail_msg_hdrs:
+		percpu_counter_destroy(&ns->percpu_msg_bytes);
+	fail_msg_bytes:
+		return ret;
 }
 
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
+	percpu_counter_destroy(&ns->percpu_msg_bytes);
+	percpu_counter_destroy(&ns->percpu_msg_hdrs);
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index e1fcaedba4fa..8316ea585733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (!setup_ipc_sysctls(ns))
 		goto fail_mq;
 
+	err = msg_init_ns(ns);
+	if (err)
+		goto fail_put;
+
 	sem_init_ns(ns);
-	msg_init_ns(ns);
 	shm_init_ns(ns);
 
 	return ns;
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..1b0086c6346f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
 
 #ifdef CONFIG_SYSVIPC
 void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
 void shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 #else
 static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0;}
 static inline void shm_init_ns(struct ipc_namespace *ns) { }
 
 static inline void sem_exit_ns(struct ipc_namespace *ns) { }
-- 
2.31.1


  parent reply	other threads:[~2022-09-07  9:06 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-02 16:06 ` Andrew Morton
2022-09-05 11:54   ` Sun, Jiebin
2022-09-02 16:27 ` Shakeel Butt
2022-09-05 12:02   ` Sun, Jiebin
2022-09-06 18:44   ` Tim Chen
2022-09-07  9:39     ` Sun, Jiebin
2022-09-07 20:43       ` Andrew Morton
2022-09-07 17:25   ` Jiebin Sun [this message]
2022-09-07 16:01     ` [PATCH v4] ipc/msg: " Tim Chen
2022-09-07 21:34       ` Andrew Morton
2022-09-07 22:10         ` Tim Chen
2022-09-08  8:25         ` Sun, Jiebin
2022-09-08 15:38           ` Andrew Morton
2022-09-08 16:15             ` Dennis Zhou
2022-09-03 19:35 ` [PATCH] ipc/msg.c: " Manfred Spraul
2022-09-05 12:12   ` Sun, Jiebin
2022-09-05 19:35 ` [PATCH v2 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
2022-09-05 19:35   ` [PATCH v2 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-05 19:35   ` [PATCH v2 1/2] percpu: Add percpu_counter_add_local Jiebin Sun
2022-09-05 19:31     ` Shakeel Butt
2022-09-06  8:41       ` Sun, Jiebin
2022-09-06 16:54 ` [PATCH v3 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
2022-09-06 16:54   ` [PATCH v3 1/2] percpu: Add percpu_counter_add_local Jiebin Sun
2022-09-06 16:54   ` [PATCH v3 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-09 20:36 ` [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
2022-09-09 16:37     ` Tim Chen
2022-09-10  1:37     ` kernel test robot
2022-09-10  8:15     ` kernel test robot
2022-09-10  8:26     ` kernel test robot
2022-09-09 20:36   ` [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-09 16:11     ` Tim Chen
2022-09-13 19:25 ` [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
2022-09-13 19:25   ` [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
2022-09-18 11:08     ` Manfred Spraul
2022-09-20  6:01       ` Sun, Jiebin
2022-09-13 19:25   ` [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-18 12:53     ` Manfred Spraul
2022-09-20  2:36       ` Sun, Jiebin
2022-09-20  4:53         ` Manfred Spraul
2022-09-20  5:50           ` Sun, Jiebin
2022-09-20 15:08           ` [PATCH] ipc/msg: avoid negative value by overflow in msginfo Jiebin Sun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220907172516.1210842-1-jiebin.sun@intel.com \
    --to=jiebin.sun@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.mikhalitsyn@virtuozzo.com \
    --cc=cl@linux.com \
    --cc=dennis@kernel.org \
    --cc=ebiederm@xmission.com \
    --cc=feng.tang@intel.com \
    --cc=legion@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=manfred@colorfullife.com \
    --cc=shakeelb@google.com \
    --cc=tianyou.li@intel.com \
    --cc=tim.c.chen@intel.com \
    --cc=tj@kernel.org \
    --cc=vasily.averin@linux.dev \
    --cc=wangyang.guo@intel.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).