linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
@ 2022-09-02 15:22 Jiebin Sun
  2022-09-02 16:06 ` Andrew Morton
                   ` (6 more replies)
  0 siblings, 7 replies; 40+ messages in thread
From: Jiebin Sun @ 2022-09-02 15:22 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counters
greatly improve the performance. Since there is one unique
ipc namespace, additional memory cost is minimal. Reading
of the count done in msgctl call, which is infrequent. So
the need to sum up the counts in each CPU is infrequent.

Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.38x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 include/linux/ipc_namespace.h  |  5 +++--
 include/linux/percpu_counter.h |  9 +++++++++
 ipc/msg.c                      | 30 +++++++++++++++++-------------
 lib/percpu_counter.c           |  6 ++++++
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
 #include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
 
 struct user_namespace;
 
@@ -36,8 +37,8 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmax;
 	unsigned int	msg_ctlmnb;
 	unsigned int	msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
+	struct percpu_counter percpu_msg_bytes;
+	struct percpu_counter percpu_msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 01861eebed79..6eec30122cc3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -40,6 +40,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
+void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount);
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
 			      s32 batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
@@ -138,6 +139,14 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 	preempt_enable();
 }
 
+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+	preempt_disable();
+	fbc->count += amount;
+	preempt_enable();
+}
+
 static inline void
 percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..1b498537f05e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
-		atomic_dec(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+	percpu_counter_add_local(&ns->percpu_msg_bytes, -(msq->q_cbytes));
 	ipc_update_pid(&msq->q_lspid, NULL);
 	ipc_update_pid(&msq->q_lrpid, NULL);
 	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	msginfo->msgssz = MSGSSZ;
 	msginfo->msgseg = MSGSEG;
 	down_read(&msg_ids(ns).rwsem);
-	if (cmd == MSG_INFO) {
+	if (cmd == MSG_INFO)
 		msginfo->msgpool = msg_ids(ns).in_use;
-		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
-		msginfo->msgtql = atomic_read(&ns->msg_bytes);
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
+	up_read(&msg_ids(ns).rwsem);
+	if (cmd == MSG_INFO) {
+		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_idx = ipc_get_maxidx(&msg_ids(ns));
-	up_read(&msg_ids(ns).rwsem);
 	return (max_idx < 0) ? 0 : max_idx;
 }
 
@@ -935,8 +937,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &ns->msg_bytes);
-		atomic_inc(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
 	}
 
 	err = 0;
@@ -1159,8 +1161,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
 			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &ns->msg_bytes);
-			atomic_dec(&ns->msg_hdrs);
+			percpu_counter_add_local(&ns->percpu_msg_bytes, -(msg->m_ts));
+			percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
 			ss_wakeup(msq, &wake_q, false);
 
 			goto out_unlock0;
@@ -1303,14 +1305,16 @@ void msg_init_ns(struct ipc_namespace *ns)
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 
-	atomic_set(&ns->msg_bytes, 0);
-	atomic_set(&ns->msg_hdrs, 0);
+	percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+	percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
 	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
 }
 
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
+	percpu_counter_destroy(&ns->percpu_msg_bytes);
+	percpu_counter_destroy(&ns->percpu_msg_hdrs);
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index ed610b75dc32..d33cb750962a 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -72,6 +72,12 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
+void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+	this_cpu_add(*fbc->counters, amount);
+}
+EXPORT_SYMBOL(percpu_counter_add_local);
+
 /*
  * This function is both preempt and irq safe. The former is due to explicit
  * preemption disable. The latter is guaranteed by the fact that the slow path
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
@ 2022-09-02 16:06 ` Andrew Morton
  2022-09-05 11:54   ` Sun, Jiebin
  2022-09-02 16:27 ` Shakeel Butt
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 40+ messages in thread
From: Andrew Morton @ 2022-09-02 16:06 UTC (permalink / raw)
  To: Jiebin Sun
  Cc: vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel,
	tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

On Fri,  2 Sep 2022 23:22:43 +0800 Jiebin Sun <jiebin.sun@intel.com> wrote:

> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counters
> greatly improve the performance. Since there is one unique
> ipc namespace, additional memory cost is minimal. Reading
> of the count done in msgctl call, which is infrequent. So
> the need to sum up the counts in each CPU is infrequent.
> 
> Apply the patch and test the pts/stress-ng-1.4.0
> -- system v message passing (160 threads).
> 
> Score gain: 3.38x

So this test became 3x faster?

> CPU: ICX 8380 x 2 sockets
> Core number: 40 x 2 physical cores
> Benchmark: pts/stress-ng-1.4.0
> -- system v message passing (160 threads)
> 
> ...
>
> @@ -138,6 +139,14 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
>  	preempt_enable();
>  }
>  
> +static inline void
> +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> +	preempt_disable();
> +	fbc->count += amount;
> +	preempt_enable();
> +}

What's this and why is it added?

It would be best to propose this as a separate preparatory patch. 
Fully changelogged and perhaps even with a code comment explaining why
and when it should be used.

Thanks.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
  2022-09-02 16:06 ` Andrew Morton
@ 2022-09-02 16:27 ` Shakeel Butt
  2022-09-05 12:02   ` Sun, Jiebin
                     ` (2 more replies)
  2022-09-03 19:35 ` [PATCH] ipc/msg.c: " Manfred Spraul
                   ` (4 subsequent siblings)
  6 siblings, 3 replies; 40+ messages in thread
From: Shakeel Butt @ 2022-09-02 16:27 UTC (permalink / raw)
  To: Jiebin Sun
  Cc: Andrew Morton, vasily.averin, Dennis Zhou, Tejun Heo,
	Christoph Lameter, Eric W. Biederman, Alexey Gladkov,
	Manfred Spraul, alexander.mikhalitsyn, Linux MM, LKML, Chen,
	Tim C, Feng Tang, Huang Ying, tianyou.li, wangyang.guo

On Fri, Sep 2, 2022 at 12:04 AM Jiebin Sun <jiebin.sun@intel.com> wrote:
>
> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counters
> greatly improve the performance. Since there is one unique
> ipc namespace, additional memory cost is minimal. Reading
> of the count done in msgctl call, which is infrequent. So
> the need to sum up the counts in each CPU is infrequent.
>
> Apply the patch and test the pts/stress-ng-1.4.0
> -- system v message passing (160 threads).
>
> Score gain: 3.38x
>
> CPU: ICX 8380 x 2 sockets
> Core number: 40 x 2 physical cores
> Benchmark: pts/stress-ng-1.4.0
> -- system v message passing (160 threads)
>
> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
[...]
>
> +void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> +       this_cpu_add(*fbc->counters, amount);
> +}
> +EXPORT_SYMBOL(percpu_counter_add_local);

Why not percpu_counter_add()? This may drift the fbc->count more than
batch*nr_cpus. I am assuming that is not the issue for you as you
always do an expensive sum in the slow path. As Andrew asked, this
should be a separate patch.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
  2022-09-02 16:06 ` Andrew Morton
  2022-09-02 16:27 ` Shakeel Butt
@ 2022-09-03 19:35 ` Manfred Spraul
  2022-09-05 12:12   ` Sun, Jiebin
       [not found] ` <20220905193516.846647-1-jiebin.sun@intel.com>
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 40+ messages in thread
From: Manfred Spraul @ 2022-09-03 19:35 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

Hi Jiebin,

On 9/2/22 17:22, Jiebin Sun wrote:
> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counters
> greatly improve the performance. Since there is one unique
> ipc namespace, additional memory cost is minimal.

With ipc namespaces, there is one struct per namespace, correct?

The cost is probably still ok, but the change log should be correct.


> @@ -1303,14 +1305,16 @@ void msg_init_ns(struct ipc_namespace *ns)
>   	ns->msg_ctlmnb = MSGMNB;
>   	ns->msg_ctlmni = MSGMNI;
>   
> -	atomic_set(&ns->msg_bytes, 0);
> -	atomic_set(&ns->msg_hdrs, 0);
> +	percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
> +	percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
>   	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);

These calls can fail. You must add error handling.

--

     Manfred



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-02 16:06 ` Andrew Morton
@ 2022-09-05 11:54   ` Sun, Jiebin
  0 siblings, 0 replies; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-05 11:54 UTC (permalink / raw)
  To: Andrew Morton
  Cc: vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel,
	tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo


On 9/3/2022 12:06 AM, Andrew Morton wrote:
> On Fri,  2 Sep 2022 23:22:43 +0800 Jiebin Sun <jiebin.sun@intel.com> wrote:
>
>> The msg_bytes and msg_hdrs atomic counters are frequently
>> updated when IPC msg queue is in heavy use, causing heavy
>> cache bounce and overhead. Change them to percpu_counters
>> greatly improve the performance. Since there is one unique
>> ipc namespace, additional memory cost is minimal. Reading
>> of the count done in msgctl call, which is infrequent. So
>> the need to sum up the counts in each CPU is infrequent.
>>
>> Apply the patch and test the pts/stress-ng-1.4.0
>> -- system v message passing (160 threads).
>>
>> Score gain: 3.38x
> So this test became 3x faster?

Yes. It is from the phoronix test suite stress-ng-1.4.0 -- system v message
passing with dual sockets ICX servers. In this benchmark, there are 160
pairs of threads, which do msgsnd and msgrcv. The patch benefit more as the
threads of workload increase.

>
>> CPU: ICX 8380 x 2 sockets
>> Core number: 40 x 2 physical cores
>> Benchmark: pts/stress-ng-1.4.0
>> -- system v message passing (160 threads)
>>
>> ...
>>
>> @@ -138,6 +139,14 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
>>   	preempt_enable();
>>   }
>>   
>> +static inline void
>> +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
>> +{
>> +	preempt_disable();
>> +	fbc->count += amount;
>> +	preempt_enable();
>> +}
> What's this and why is it added?
>
> It would be best to propose this as a separate preparatory patch.
> Fully changelogged and perhaps even with a code comment explaining why
> and when it should be used.
>
> Thanks.

As it will always do sum in msgctl_info, there is no need to use
percpu_counter_add_batch. It will do global updating when the counter reach
the batch size. So we add percpu_counter_add_local for smp and non_smp,
which will only do local adding to the percpu counter.
I have separate the original patch into two patches.

Thanks.



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-02 16:27 ` Shakeel Butt
@ 2022-09-05 12:02   ` Sun, Jiebin
  2022-09-06 18:44   ` Tim Chen
  2022-09-07 17:25   ` [PATCH v4] ipc/msg: " Jiebin Sun
  2 siblings, 0 replies; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-05 12:02 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Andrew Morton, vasily.averin, Dennis Zhou, Tejun Heo,
	Christoph Lameter, Eric W. Biederman, Alexey Gladkov,
	Manfred Spraul, alexander.mikhalitsyn, Linux MM, LKML, Chen,
	Tim C, Feng Tang, Huang Ying, tianyou.li, wangyang.guo,
	jiebin.sun


On 9/3/2022 12:27 AM, Shakeel Butt wrote:
> On Fri, Sep 2, 2022 at 12:04 AM Jiebin Sun <jiebin.sun@intel.com> wrote:
>> The msg_bytes and msg_hdrs atomic counters are frequently
>> updated when IPC msg queue is in heavy use, causing heavy
>> cache bounce and overhead. Change them to percpu_counters
>> greatly improve the performance. Since there is one unique
>> ipc namespace, additional memory cost is minimal. Reading
>> of the count done in msgctl call, which is infrequent. So
>> the need to sum up the counts in each CPU is infrequent.
>>
>> Apply the patch and test the pts/stress-ng-1.4.0
>> -- system v message passing (160 threads).
>>
>> Score gain: 3.38x
>>
>> CPU: ICX 8380 x 2 sockets
>> Core number: 40 x 2 physical cores
>> Benchmark: pts/stress-ng-1.4.0
>> -- system v message passing (160 threads)
>>
>> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
> [...]
>> +void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
>> +{
>> +       this_cpu_add(*fbc->counters, amount);
>> +}
>> +EXPORT_SYMBOL(percpu_counter_add_local);
> Why not percpu_counter_add()? This may drift the fbc->count more than
> batch*nr_cpus. I am assuming that is not the issue for you as you
> always do an expensive sum in the slow path. As Andrew asked, this
> should be a separate patch.

Yes. It will always do sum in msgctl_info. So there is no need to
do global updating in percpu_counter_add when the percpu counter
reaches the batch size. We add percpu_counter_add_local in this
case. The sum in slow path is infrequent. So the additional cost
is much less compared to the atomic updating in do_msgsnd and
do_msgrcv every time. I have separate the original patch into two
patches.

Thanks.



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-03 19:35 ` [PATCH] ipc/msg.c: " Manfred Spraul
@ 2022-09-05 12:12   ` Sun, Jiebin
  0 siblings, 0 replies; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-05 12:12 UTC (permalink / raw)
  To: Manfred Spraul, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun


On 9/4/2022 3:35 AM, Manfred Spraul wrote:
> Hi Jiebin,
>
> On 9/2/22 17:22, Jiebin Sun wrote:
>> The msg_bytes and msg_hdrs atomic counters are frequently
>> updated when IPC msg queue is in heavy use, causing heavy
>> cache bounce and overhead. Change them to percpu_counters
>> greatly improve the performance. Since there is one unique
>> ipc namespace, additional memory cost is minimal.
>
> With ipc namespaces, there is one struct per namespace, correct?
>
> The cost is probably still ok, but the change log should be correct.
>
Yes, that's what I want to summarize. The IPC msg namespace is unique

and there is only one percpu counter in IPC msg namespace.

Thanks.

>
>> @@ -1303,14 +1305,16 @@ void msg_init_ns(struct ipc_namespace *ns)
>>       ns->msg_ctlmnb = MSGMNB;
>>       ns->msg_ctlmni = MSGMNI;
>>   -    atomic_set(&ns->msg_bytes, 0);
>> -    atomic_set(&ns->msg_hdrs, 0);
>> +    percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
>> +    percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
>>       ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
>
> These calls can fail. You must add error handling.

I have add error handling for percpu_counter_init.

Thanks.

>
> -- 
>
>     Manfred
>
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 1/2] percpu: Add percpu_counter_add_local
       [not found]   ` <20220905193516.846647-3-jiebin.sun@intel.com>
@ 2022-09-05 19:31     ` Shakeel Butt
  2022-09-06  8:41       ` Sun, Jiebin
  0 siblings, 1 reply; 40+ messages in thread
From: Shakeel Butt @ 2022-09-05 19:31 UTC (permalink / raw)
  To: Jiebin Sun
  Cc: akpm, vasily.averin, dennis, tj, cl, ebiederm, legion, manfred,
	alexander.mikhalitsyn, linux-mm, linux-kernel, tim.c.chen,
	feng.tang, ying.huang, tianyou.li, wangyang.guo

On Tue, Sep 06, 2022 at 03:35:16AM +0800, Jiebin Sun wrote:
> Add percpu_counter_add_local for only updating local counter
> without aggregating to global counter.

Please add why do we need this. Who should use this and who shouldn't.

> 
> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>

[...]

> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
> index ed610b75dc32..d33cb750962a 100644
> --- a/lib/percpu_counter.c
> +++ b/lib/percpu_counter.c
> @@ -72,6 +72,12 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
>  }
>  EXPORT_SYMBOL(percpu_counter_set);
>

Add a doc comment here on why someone want to use this?

> +void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> +	this_cpu_add(*fbc->counters, amount);
> +}
> +EXPORT_SYMBOL(percpu_counter_add_local);
> +
>  /*
>   * This function is both preempt and irq safe. The former is due to explicit
>   * preemption disable. The latter is guaranteed by the fact that the slow path
> -- 
> 2.31.1
> 


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v2 2/2] ipc/msg: mitigate the lock contention with percpu counter
       [not found] ` <20220905193516.846647-1-jiebin.sun@intel.com>
       [not found]   ` <20220905193516.846647-3-jiebin.sun@intel.com>
@ 2022-09-05 19:35   ` Jiebin Sun
  1 sibling, 0 replies; 40+ messages in thread
From: Jiebin Sun @ 2022-09-05 19:35 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counters
greatly improve the performance. Since there is one unique
ipc namespace, additional memory cost is minimal. Reading
of the count done in msgctl call, which is infrequent. So
the need to sum up the counts in each CPU is infrequent.

Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.38x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 include/linux/ipc_namespace.h |  5 ++--
 ipc/msg.c                     | 44 ++++++++++++++++++++++++-----------
 ipc/namespace.c               |  5 +++-
 ipc/util.h                    |  4 ++--
 4 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
 #include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
 
 struct user_namespace;
 
@@ -36,8 +37,8 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmax;
 	unsigned int	msg_ctlmnb;
 	unsigned int	msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
+	struct percpu_counter percpu_msg_bytes;
+	struct percpu_counter percpu_msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..87c30decb23f 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
-		atomic_dec(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+	percpu_counter_add_local(&ns->percpu_msg_bytes, -(msq->q_cbytes));
 	ipc_update_pid(&msq->q_lspid, NULL);
 	ipc_update_pid(&msq->q_lrpid, NULL);
 	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	msginfo->msgssz = MSGSSZ;
 	msginfo->msgseg = MSGSEG;
 	down_read(&msg_ids(ns).rwsem);
-	if (cmd == MSG_INFO) {
+	if (cmd == MSG_INFO)
 		msginfo->msgpool = msg_ids(ns).in_use;
-		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
-		msginfo->msgtql = atomic_read(&ns->msg_bytes);
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
+	up_read(&msg_ids(ns).rwsem);
+	if (cmd == MSG_INFO) {
+		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_idx = ipc_get_maxidx(&msg_ids(ns));
-	up_read(&msg_ids(ns).rwsem);
 	return (max_idx < 0) ? 0 : max_idx;
 }
 
@@ -935,8 +937,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &ns->msg_bytes);
-		atomic_inc(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
 	}
 
 	err = 0;
@@ -1159,8 +1161,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
 			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &ns->msg_bytes);
-			atomic_dec(&ns->msg_hdrs);
+			percpu_counter_add_local(&ns->percpu_msg_bytes, -(msg->m_ts));
+			percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
 			ss_wakeup(msq, &wake_q, false);
 
 			goto out_unlock0;
@@ -1297,20 +1299,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
 }
 #endif
 
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
 {
+	int ret;
+
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 
-	atomic_set(&ns->msg_bytes, 0);
-	atomic_set(&ns->msg_hdrs, 0);
+	ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_bytes;
+	ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_hdrs;
 	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+	return 0;
+
+	fail_msg_hdrs:
+		percpu_counter_destroy(&ns->percpu_msg_bytes);
+	fail_msg_bytes:
+		return ret;
 }
 
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
+	percpu_counter_destroy(&ns->percpu_msg_bytes);
+	percpu_counter_destroy(&ns->percpu_msg_hdrs);
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index e1fcaedba4fa..8316ea585733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (!setup_ipc_sysctls(ns))
 		goto fail_mq;
 
+	err = msg_init_ns(ns);
+	if (err)
+		goto fail_put;
+
 	sem_init_ns(ns);
-	msg_init_ns(ns);
 	shm_init_ns(ns);
 
 	return ns;
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..1b0086c6346f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
 
 #ifdef CONFIG_SYSVIPC
 void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
 void shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 #else
 static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0;}
 static inline void shm_init_ns(struct ipc_namespace *ns) { }
 
 static inline void sem_exit_ns(struct ipc_namespace *ns) { }
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 1/2] percpu: Add percpu_counter_add_local
  2022-09-05 19:31     ` [PATCH v2 1/2] percpu: Add percpu_counter_add_local Shakeel Butt
@ 2022-09-06  8:41       ` Sun, Jiebin
  0 siblings, 0 replies; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-06  8:41 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: akpm, vasily.averin, dennis, tj, cl, ebiederm, legion, manfred,
	alexander.mikhalitsyn, linux-mm, linux-kernel, tim.c.chen,
	feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun


On 9/6/2022 3:31 AM, Shakeel Butt wrote:
> On Tue, Sep 06, 2022 at 03:35:16AM +0800, Jiebin Sun wrote:
>> Add percpu_counter_add_local for only updating local counter
>> without aggregating to global counter.
> Please add why do we need this. Who should use this and who shouldn't.

Thanks. I have added the code comment and change log in patch v3 and 
provided

the info who should use it and who shouldn't.

>
>> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
> [...]
>
>> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
>> index ed610b75dc32..d33cb750962a 100644
>> --- a/lib/percpu_counter.c
>> +++ b/lib/percpu_counter.c
>> @@ -72,6 +72,12 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
>>   }
>>   EXPORT_SYMBOL(percpu_counter_set);
>>
> Add a doc comment here on why someone want to use this?
>
>> +void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
>> +{
>> +	this_cpu_add(*fbc->counters, amount);
>> +}
>> +EXPORT_SYMBOL(percpu_counter_add_local);
>> +
>>   /*
>>    * This function is both preempt and irq safe. The former is due to explicit
>>    * preemption disable. The latter is guaranteed by the fact that the slow path
>> -- 
>> 2.31.1
>>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v3 0/2] ipc/msg: mitigate the lock contention in ipc/msg
  2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
                   ` (3 preceding siblings ...)
       [not found] ` <20220905193516.846647-1-jiebin.sun@intel.com>
@ 2022-09-06 16:54 ` Jiebin Sun
  2022-09-06 16:54   ` [PATCH v3 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
  2022-09-09 20:36 ` [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
  2022-09-13 19:25 ` [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
  6 siblings, 1 reply; 40+ messages in thread
From: Jiebin Sun @ 2022-09-06 16:54 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun


Hi,

Here are two patches to mitigate the lock contention in ipc/msg.

The 1st patch is to add the new function percpu_counter_add_local if only update the local counter without aggregating to global counter. This function could be used with percpu_counter_sum together if you need high accurate counter. The combination could bring obvious performance improvement than percpu_counter_add_batch if percpu_counter_add is frequently called and percpu_counter_sum is not in the critical path.

The 2nd patch is to use percpu_counter instead of atomic update in ipc/msg.
The msg_bytes and msg_hdrs atomic counters are frequently updated when IPC msg queue is in heavy use, causing heavy cache bounce and overhead. Change them to percpu_counter greatly improve the performance. Since there is one percpu struct per namespace, additional memory cost is minimal. Reading of the count done in msgctl call, which is infrequent. So the need to sum up the counts in each CPU is infrequent.

Changes in v3:
1. Add comment and change log for the new function percpu_counter_add_local.
Who should use it and who shouldn't.

Changes in v2:
1. Separate the original patch into two patches.
2. Add error handling for percpu_counter_init.

The performance gain increases as the threads of workload become larger.
Performance gain: 3.38x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)


Regards
Jiebin



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v3 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-06 16:54 ` [PATCH v3 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
@ 2022-09-06 16:54   ` Jiebin Sun
  0 siblings, 0 replies; 40+ messages in thread
From: Jiebin Sun @ 2022-09-06 16:54 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counter
greatly improve the performance. Since there is one percpu
struct per namespace, additional memory cost is minimal.
Reading of the count done in msgctl call, which is infrequent.
So the need to sum up the counts in each CPU is infrequent.

Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.38x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 include/linux/ipc_namespace.h |  5 ++--
 ipc/msg.c                     | 44 ++++++++++++++++++++++++-----------
 ipc/namespace.c               |  5 +++-
 ipc/util.h                    |  4 ++--
 4 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
 #include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
 
 struct user_namespace;
 
@@ -36,8 +37,8 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmax;
 	unsigned int	msg_ctlmnb;
 	unsigned int	msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
+	struct percpu_counter percpu_msg_bytes;
+	struct percpu_counter percpu_msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..87c30decb23f 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
-		atomic_dec(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+	percpu_counter_add_local(&ns->percpu_msg_bytes, -(msq->q_cbytes));
 	ipc_update_pid(&msq->q_lspid, NULL);
 	ipc_update_pid(&msq->q_lrpid, NULL);
 	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	msginfo->msgssz = MSGSSZ;
 	msginfo->msgseg = MSGSEG;
 	down_read(&msg_ids(ns).rwsem);
-	if (cmd == MSG_INFO) {
+	if (cmd == MSG_INFO)
 		msginfo->msgpool = msg_ids(ns).in_use;
-		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
-		msginfo->msgtql = atomic_read(&ns->msg_bytes);
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
+	up_read(&msg_ids(ns).rwsem);
+	if (cmd == MSG_INFO) {
+		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_idx = ipc_get_maxidx(&msg_ids(ns));
-	up_read(&msg_ids(ns).rwsem);
 	return (max_idx < 0) ? 0 : max_idx;
 }
 
@@ -935,8 +937,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &ns->msg_bytes);
-		atomic_inc(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
 	}
 
 	err = 0;
@@ -1159,8 +1161,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
 			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &ns->msg_bytes);
-			atomic_dec(&ns->msg_hdrs);
+			percpu_counter_add_local(&ns->percpu_msg_bytes, -(msg->m_ts));
+			percpu_counter_add_local(&ns->percpu_msg_hdrs, -1);
 			ss_wakeup(msq, &wake_q, false);
 
 			goto out_unlock0;
@@ -1297,20 +1299,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
 }
 #endif
 
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
 {
+	int ret;
+
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 
-	atomic_set(&ns->msg_bytes, 0);
-	atomic_set(&ns->msg_hdrs, 0);
+	ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_bytes;
+	ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_hdrs;
 	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+	return 0;
+
+	fail_msg_hdrs:
+		percpu_counter_destroy(&ns->percpu_msg_bytes);
+	fail_msg_bytes:
+		return ret;
 }
 
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
+	percpu_counter_destroy(&ns->percpu_msg_bytes);
+	percpu_counter_destroy(&ns->percpu_msg_hdrs);
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index e1fcaedba4fa..8316ea585733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (!setup_ipc_sysctls(ns))
 		goto fail_mq;
 
+	err = msg_init_ns(ns);
+	if (err)
+		goto fail_put;
+
 	sem_init_ns(ns);
-	msg_init_ns(ns);
 	shm_init_ns(ns);
 
 	return ns;
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..1b0086c6346f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
 
 #ifdef CONFIG_SYSVIPC
 void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
 void shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 #else
 static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0;}
 static inline void shm_init_ns(struct ipc_namespace *ns) { }
 
 static inline void sem_exit_ns(struct ipc_namespace *ns) { }
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-02 16:27 ` Shakeel Butt
  2022-09-05 12:02   ` Sun, Jiebin
@ 2022-09-06 18:44   ` Tim Chen
  2022-09-07  9:39     ` Sun, Jiebin
  2022-09-07 17:25   ` [PATCH v4] ipc/msg: " Jiebin Sun
  2 siblings, 1 reply; 40+ messages in thread
From: Tim Chen @ 2022-09-06 18:44 UTC (permalink / raw)
  To: Shakeel Butt, Jiebin Sun
  Cc: Andrew Morton, vasily.averin, Dennis Zhou, Tejun Heo,
	Christoph Lameter, Eric W. Biederman, Alexey Gladkov,
	Manfred Spraul, alexander.mikhalitsyn, Linux MM, LKML, Chen,
	Tim C, Feng Tang, Huang Ying, tianyou.li, wangyang.guo

On Fri, 2022-09-02 at 09:27 -0700, Shakeel Butt wrote:
> On Fri, Sep 2, 2022 at 12:04 AM Jiebin Sun <jiebin.sun@intel.com> wrote:
> > The msg_bytes and msg_hdrs atomic counters are frequently
> > updated when IPC msg queue is in heavy use, causing heavy
> > cache bounce and overhead. Change them to percpu_counters
> > greatly improve the performance. Since there is one unique
> > ipc namespace, additional memory cost is minimal. Reading
> > of the count done in msgctl call, which is infrequent. So
> > the need to sum up the counts in each CPU is infrequent.
> > 
> > Apply the patch and test the pts/stress-ng-1.4.0
> > -- system v message passing (160 threads).
> > 
> > Score gain: 3.38x
> > 
> > CPU: ICX 8380 x 2 sockets
> > Core number: 40 x 2 physical cores
> > Benchmark: pts/stress-ng-1.4.0
> > -- system v message passing (160 threads)
> > 
> > Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
> [...]
> > +void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> > +{
> > +       this_cpu_add(*fbc->counters, amount);
> > +}
> > +EXPORT_SYMBOL(percpu_counter_add_local);
> 
> Why not percpu_counter_add()? This may drift the fbc->count more than
> batch*nr_cpus. I am assuming that is not the issue for you as you
> always do an expensive sum in the slow path. As Andrew asked, this
> should be a separate patch.

In the IPC case, the read is always done with the accurate read using
percpu_counter_sum() gathering all the counts and
never with percpu_counter_read() that only read global count.
So Jiebin was not worry about accuracy.

However, the counter is s64 and the local per cpu counter is S32.
So the counter size has shrunk if we only keep the count in local per
cpu counter, which can overflow a lot sooner and is not okay.

Jiebin, can you try to use percpu_counter_add_batch, but using a large
batch size.  That should achieve what you want without needing
to create a percpu_counter_add_local() function, and also the overflow
problem.

Tim




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-06 18:44   ` Tim Chen
@ 2022-09-07  9:39     ` Sun, Jiebin
  2022-09-07 20:43       ` Andrew Morton
  0 siblings, 1 reply; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-07  9:39 UTC (permalink / raw)
  To: Tim Chen, Shakeel Butt
  Cc: Andrew Morton, vasily.averin, Dennis Zhou, Tejun Heo,
	Christoph Lameter, Eric W. Biederman, Alexey Gladkov,
	Manfred Spraul, alexander.mikhalitsyn, Linux MM, LKML, Chen,
	Tim C, Feng Tang, Huang Ying, tianyou.li, wangyang.guo,
	jiebin.sun


On 9/7/2022 2:44 AM, Tim Chen wrote:
> On Fri, 2022-09-02 at 09:27 -0700, Shakeel Butt wrote:
>> On Fri, Sep 2, 2022 at 12:04 AM Jiebin Sun <jiebin.sun@intel.com> wrote:
>>> The msg_bytes and msg_hdrs atomic counters are frequently
>>> updated when IPC msg queue is in heavy use, causing heavy
>>> cache bounce and overhead. Change them to percpu_counters
>>> greatly improve the performance. Since there is one unique
>>> ipc namespace, additional memory cost is minimal. Reading
>>> of the count done in msgctl call, which is infrequent. So
>>> the need to sum up the counts in each CPU is infrequent.
>>>
>>> Apply the patch and test the pts/stress-ng-1.4.0
>>> -- system v message passing (160 threads).
>>>
>>> Score gain: 3.38x
>>>
>>> CPU: ICX 8380 x 2 sockets
>>> Core number: 40 x 2 physical cores
>>> Benchmark: pts/stress-ng-1.4.0
>>> -- system v message passing (160 threads)
>>>
>>> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
>> [...]
>>> +void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
>>> +{
>>> +       this_cpu_add(*fbc->counters, amount);
>>> +}
>>> +EXPORT_SYMBOL(percpu_counter_add_local);
>> Why not percpu_counter_add()? This may drift the fbc->count more than
>> batch*nr_cpus. I am assuming that is not the issue for you as you
>> always do an expensive sum in the slow path. As Andrew asked, this
>> should be a separate patch.
> In the IPC case, the read is always done with the accurate read using
> percpu_counter_sum() gathering all the counts and
> never with percpu_counter_read() that only read global count.
> So Jiebin was not worry about accuracy.
>
> However, the counter is s64 and the local per cpu counter is S32.
> So the counter size has shrunk if we only keep the count in local per
> cpu counter, which can overflow a lot sooner and is not okay.
>
> Jiebin, can you try to use percpu_counter_add_batch, but using a large
> batch size.  That should achieve what you want without needing
> to create a percpu_counter_add_local() function, and also the overflow
> problem.
>
> Tim
>
I have sent out the patch v4 which use percpu_counter_add_batch. If we use
a tuned large batch size (1024), the performance gain is 3.17x (patch v4)
vs 3.38x (patch v3) previously in stress-ng -- message. It still has
significant performance improvement and also good balance between
performance gain and overflow issue.

Jiebin

>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-07 17:25   ` [PATCH v4] ipc/msg: " Jiebin Sun
@ 2022-09-07 16:01     ` Tim Chen
  2022-09-07 21:34       ` Andrew Morton
  0 siblings, 1 reply; 40+ messages in thread
From: Tim Chen @ 2022-09-07 16:01 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, manfred, alexander.mikhalitsyn, linux-mm,
	linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

On Thu, 2022-09-08 at 01:25 +0800, Jiebin Sun wrote:
> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counter
> greatly improve the performance. Since there is one percpu
> struct per namespace, additional memory cost is minimal.
> Reading of the count done in msgctl call, which is infrequent.
> So the need to sum up the counts in each CPU is infrequent.
> 
> 
> Apply the patch and test the pts/stress-ng-1.4.0
> -- system v message passing (160 threads).
> 
> Score gain: 3.17x
> 
> 
...
>  
> +/* large batch size could reduce the times to sum up percpu counter */
> +#define MSG_PERCPU_COUNTER_BATCH 1024
> +

Jiebin, 

1024 is a small size (1/4 page). 
The local per cpu counter could overflow to the gloabal count quickly
if it is limited to this size, since our count tracks msg size.
  
I'll suggest something larger, say 8*1024*1024, about
8MB to accommodate about 2 large page worth of data.  Maybe that
will further improve throughput on stress-ng by reducing contention
on adding to the global count.

Tim




^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-02 16:27 ` Shakeel Butt
  2022-09-05 12:02   ` Sun, Jiebin
  2022-09-06 18:44   ` Tim Chen
@ 2022-09-07 17:25   ` Jiebin Sun
  2022-09-07 16:01     ` Tim Chen
  2 siblings, 1 reply; 40+ messages in thread
From: Jiebin Sun @ 2022-09-07 17:25 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counter
greatly improve the performance. Since there is one percpu
struct per namespace, additional memory cost is minimal.
Reading of the count done in msgctl call, which is infrequent.
So the need to sum up the counts in each CPU is infrequent.


Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.17x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 include/linux/ipc_namespace.h |  5 ++--
 ipc/msg.c                     | 47 ++++++++++++++++++++++++-----------
 ipc/namespace.c               |  5 +++-
 ipc/util.h                    |  4 +--
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
 #include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
 
 struct user_namespace;
 
@@ -36,8 +37,8 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmax;
 	unsigned int	msg_ctlmnb;
 	unsigned int	msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
+	struct percpu_counter percpu_msg_bytes;
+	struct percpu_counter percpu_msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..040cfc93d7ef 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,11 +39,15 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
 #include "util.h"
 
+/* large batch size could reduce the times to sum up percpu counter */
+#define MSG_PERCPU_COUNTER_BATCH 1024
+
 /* one msq_queue structure for each present queue on the system */
 struct msg_queue {
 	struct kern_ipc_perm q_perm;
@@ -285,10 +289,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
-		atomic_dec(&ns->msg_hdrs);
+		percpu_counter_add_batch(&ns->percpu_msg_hdrs, -1, MSG_PERCPU_COUNTER_BATCH);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+	percpu_counter_add_batch(&ns->percpu_msg_bytes, -(msq->q_cbytes), MSG_PERCPU_COUNTER_BATCH);
 	ipc_update_pid(&msq->q_lspid, NULL);
 	ipc_update_pid(&msq->q_lrpid, NULL);
 	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +499,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	msginfo->msgssz = MSGSSZ;
 	msginfo->msgseg = MSGSEG;
 	down_read(&msg_ids(ns).rwsem);
-	if (cmd == MSG_INFO) {
+	if (cmd == MSG_INFO)
 		msginfo->msgpool = msg_ids(ns).in_use;
-		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
-		msginfo->msgtql = atomic_read(&ns->msg_bytes);
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
+	up_read(&msg_ids(ns).rwsem);
+	if (cmd == MSG_INFO) {
+		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_idx = ipc_get_maxidx(&msg_ids(ns));
-	up_read(&msg_ids(ns).rwsem);
 	return (max_idx < 0) ? 0 : max_idx;
 }
 
@@ -935,8 +940,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &ns->msg_bytes);
-		atomic_inc(&ns->msg_hdrs);
+		percpu_counter_add_batch(&ns->percpu_msg_bytes, msgsz, MSG_PERCPU_COUNTER_BATCH);
+		percpu_counter_add_batch(&ns->percpu_msg_hdrs, 1, MSG_PERCPU_COUNTER_BATCH);
 	}
 
 	err = 0;
@@ -1159,8 +1164,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
 			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &ns->msg_bytes);
-			atomic_dec(&ns->msg_hdrs);
+			percpu_counter_add_batch(&ns->percpu_msg_bytes, -(msg->m_ts), MSG_PERCPU_COUNTER_BATCH);
+			percpu_counter_add_batch(&ns->percpu_msg_hdrs, -1, MSG_PERCPU_COUNTER_BATCH);
 			ss_wakeup(msq, &wake_q, false);
 
 			goto out_unlock0;
@@ -1297,20 +1302,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
 }
 #endif
 
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
 {
+	int ret;
+
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 
-	atomic_set(&ns->msg_bytes, 0);
-	atomic_set(&ns->msg_hdrs, 0);
+	ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_bytes;
+	ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_hdrs;
 	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+	return 0;
+
+	fail_msg_hdrs:
+		percpu_counter_destroy(&ns->percpu_msg_bytes);
+	fail_msg_bytes:
+		return ret;
 }
 
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
+	percpu_counter_destroy(&ns->percpu_msg_bytes);
+	percpu_counter_destroy(&ns->percpu_msg_hdrs);
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index e1fcaedba4fa..8316ea585733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (!setup_ipc_sysctls(ns))
 		goto fail_mq;
 
+	err = msg_init_ns(ns);
+	if (err)
+		goto fail_put;
+
 	sem_init_ns(ns);
-	msg_init_ns(ns);
 	shm_init_ns(ns);
 
 	return ns;
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..1b0086c6346f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
 
 #ifdef CONFIG_SYSVIPC
 void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
 void shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 #else
 static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0;}
 static inline void shm_init_ns(struct ipc_namespace *ns) { }
 
 static inline void sem_exit_ns(struct ipc_namespace *ns) { }
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter
  2022-09-07  9:39     ` Sun, Jiebin
@ 2022-09-07 20:43       ` Andrew Morton
  0 siblings, 0 replies; 40+ messages in thread
From: Andrew Morton @ 2022-09-07 20:43 UTC (permalink / raw)
  To: Sun, Jiebin
  Cc: Tim Chen, Shakeel Butt, vasily.averin, Dennis Zhou, Tejun Heo,
	Christoph Lameter, Eric W. Biederman, Alexey Gladkov,
	Manfred Spraul, alexander.mikhalitsyn, Linux MM, LKML, Chen,
	Tim C, Feng Tang, Huang Ying, tianyou.li, wangyang.guo

On Wed, 7 Sep 2022 17:39:47 +0800 "Sun, Jiebin" <jiebin.sun@intel.com> wrote:

> I have sent out the patch v4 which use percpu_counter_add_batch. If we use
> a tuned large batch size (1024),

Oh.  Why not simply use a batch size of INT_MAX?

> the performance gain is 3.17x (patch v4)
> vs 3.38x (patch v3) previously in stress-ng -- message. It still has
> significant performance improvement and also good balance between
> performance gain and overflow issue.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-07 16:01     ` Tim Chen
@ 2022-09-07 21:34       ` Andrew Morton
  2022-09-07 22:10         ` Tim Chen
  2022-09-08  8:25         ` Sun, Jiebin
  0 siblings, 2 replies; 40+ messages in thread
From: Andrew Morton @ 2022-09-07 21:34 UTC (permalink / raw)
  To: Tim Chen
  Cc: Jiebin Sun, vasily.averin, shakeelb, dennis, tj, cl, ebiederm,
	legion, manfred, alexander.mikhalitsyn, linux-mm, linux-kernel,
	tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

On Wed, 07 Sep 2022 09:01:53 -0700 Tim Chen <tim.c.chen@linux.intel.com> wrote:

> On Thu, 2022-09-08 at 01:25 +0800, Jiebin Sun wrote:
> > The msg_bytes and msg_hdrs atomic counters are frequently
> > updated when IPC msg queue is in heavy use, causing heavy
> > cache bounce and overhead. Change them to percpu_counter
> > greatly improve the performance. Since there is one percpu
> > struct per namespace, additional memory cost is minimal.
> > Reading of the count done in msgctl call, which is infrequent.
> > So the need to sum up the counts in each CPU is infrequent.
> > 
> > 
> > Apply the patch and test the pts/stress-ng-1.4.0
> > -- system v message passing (160 threads).
> > 
> > Score gain: 3.17x
> > 
> > 
> ...
> >  
> > +/* large batch size could reduce the times to sum up percpu counter */
> > +#define MSG_PERCPU_COUNTER_BATCH 1024
> > +
> 
> Jiebin, 
> 
> 1024 is a small size (1/4 page). 
> The local per cpu counter could overflow to the gloabal count quickly
> if it is limited to this size, since our count tracks msg size.
>   
> I'll suggest something larger, say 8*1024*1024, about
> 8MB to accommodate about 2 large page worth of data.  Maybe that
> will further improve throughput on stress-ng by reducing contention
> on adding to the global count.
> 

I think this concept of a percpu_counter_add() which is massively
biased to the write side and with very rare reading is a legitimate
use-case.  Perhaps it should become an addition to the formal interface.
Something like

/* 
 * comment goes here
 */
static inline void percpu_counter_add_local(struct percpu_counter *fbc,
					    s64 amount)
{
	percpu_counter_add_batch(fbc, amount, INT_MAX);
}

and percpu_counter_sub_local(), I guess.

The only instance I can see is
block/blk-cgroup-rwstat.h:blkg_rwstat_add() which is using INT_MAX/2
because it always uses percpu_counter_sum_positive() on the read side.

But that makes two!


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-07 21:34       ` Andrew Morton
@ 2022-09-07 22:10         ` Tim Chen
  2022-09-08  8:25         ` Sun, Jiebin
  1 sibling, 0 replies; 40+ messages in thread
From: Tim Chen @ 2022-09-07 22:10 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jiebin Sun, vasily.averin, shakeelb, dennis, tj, cl, ebiederm,
	legion, manfred, alexander.mikhalitsyn, linux-mm, linux-kernel,
	tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

On Wed, 2022-09-07 at 14:34 -0700, Andrew Morton wrote:
> 
> I think this concept of a percpu_counter_add() which is massively
> biased to the write side and with very rare reading is a legitimate
> use-case.  Perhaps it should become an addition to the formal interface.
> Something like
> 
> /* 
>  * comment goes here
>  */
> static inline void percpu_counter_add_local(struct percpu_counter *fbc,
> 					    s64 amount)
> {
> 	percpu_counter_add_batch(fbc, amount, INT_MAX);
> }
> 
> and percpu_counter_sub_local(), I guess.
> 
> The only instance I can see is
> block/blk-cgroup-rwstat.h:blkg_rwstat_add() which is using INT_MAX/2
> because it always uses percpu_counter_sum_positive() on the read side.
> 
> But that makes two!

Sure. We can create this function and use it for both cases.  No objections.

Tim



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-07 21:34       ` Andrew Morton
  2022-09-07 22:10         ` Tim Chen
@ 2022-09-08  8:25         ` Sun, Jiebin
  2022-09-08 15:38           ` Andrew Morton
  1 sibling, 1 reply; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-08  8:25 UTC (permalink / raw)
  To: Andrew Morton, Tim Chen
  Cc: vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel,
	tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo,
	jiebin.sun


On 9/8/2022 5:34 AM, Andrew Morton wrote:
> On Wed, 07 Sep 2022 09:01:53 -0700 Tim Chen <tim.c.chen@linux.intel.com> wrote:
>
>> On Thu, 2022-09-08 at 01:25 +0800, Jiebin Sun wrote:
>>> The msg_bytes and msg_hdrs atomic counters are frequently
>>> updated when IPC msg queue is in heavy use, causing heavy
>>> cache bounce and overhead. Change them to percpu_counter
>>> greatly improve the performance. Since there is one percpu
>>> struct per namespace, additional memory cost is minimal.
>>> Reading of the count done in msgctl call, which is infrequent.
>>> So the need to sum up the counts in each CPU is infrequent.
>>>
>>>
>>> Apply the patch and test the pts/stress-ng-1.4.0
>>> -- system v message passing (160 threads).
>>>
>>> Score gain: 3.17x
>>>
>>>
>> ...
>>>   
>>> +/* large batch size could reduce the times to sum up percpu counter */
>>> +#define MSG_PERCPU_COUNTER_BATCH 1024
>>> +
>> Jiebin,
>>
>> 1024 is a small size (1/4 page).
>> The local per cpu counter could overflow to the gloabal count quickly
>> if it is limited to this size, since our count tracks msg size.
>>    
>> I'll suggest something larger, say 8*1024*1024, about
>> 8MB to accommodate about 2 large page worth of data.  Maybe that
>> will further improve throughput on stress-ng by reducing contention
>> on adding to the global count.
>>
> I think this concept of a percpu_counter_add() which is massively
> biased to the write side and with very rare reading is a legitimate
> use-case.  Perhaps it should become an addition to the formal interface.
> Something like
>
> /*
>   * comment goes here
>   */
> static inline void percpu_counter_add_local(struct percpu_counter *fbc,
> 					    s64 amount)
> {
> 	percpu_counter_add_batch(fbc, amount, INT_MAX);
> }
>
> and percpu_counter_sub_local(), I guess.
>
> The only instance I can see is
> block/blk-cgroup-rwstat.h:blkg_rwstat_add() which is using INT_MAX/2
> because it always uses percpu_counter_sum_positive() on the read side.
>
> But that makes two!


Yes. Using INT_MAX or INT_MAX/2 could have a big improvement on the 
performance if heavy writing but rare reading. In our case, if the local 
percpu counter is near to INT_MAX and there comes a big msgsz, the 
overflow issue could happen. So I think INT_MAX/2, which is used in 
blkg_rwstat_add(), might be a better choice. /$ 
percpu_counter_add_batch(&ns->percpu_msg_bytes, msgsz, batch); /I will 
send the performance data and draft patch out for discussing.//Jiebin//



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-08  8:25         ` Sun, Jiebin
@ 2022-09-08 15:38           ` Andrew Morton
  2022-09-08 16:15             ` Dennis Zhou
  0 siblings, 1 reply; 40+ messages in thread
From: Andrew Morton @ 2022-09-08 15:38 UTC (permalink / raw)
  To: Sun, Jiebin
  Cc: Tim Chen, vasily.averin, shakeelb, dennis, tj, cl, ebiederm,
	legion, manfred, alexander.mikhalitsyn, linux-mm, linux-kernel,
	tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

On Thu, 8 Sep 2022 16:25:47 +0800 "Sun, Jiebin" <jiebin.sun@intel.com> wrote:

> In our case, if the local 
> percpu counter is near to INT_MAX and there comes a big msgsz, the 
> overflow issue could happen.

percpu_counter_add_batch() handles this - your big message
won't overflow an s64.


Lookng at percpu_counter_add_batch(), is this tweak right?

- don't need to update *fbc->counters inside the lock
- that __this_cpu_sub() is an obscure way of zeroing the thing

--- a/lib/percpu_counter.c~a
+++ a/lib/percpu_counter.c
@@ -89,8 +89,8 @@ void percpu_counter_add_batch(struct per
 		unsigned long flags;
 		raw_spin_lock_irqsave(&fbc->lock, flags);
 		fbc->count += count;
-		__this_cpu_sub(*fbc->counters, count - amount);
 		raw_spin_unlock_irqrestore(&fbc->lock, flags);
+		__this_cpu_write(*fbc->counters, 0);
 	} else {
 		this_cpu_add(*fbc->counters, amount);
 	}
_



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v4] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-08 15:38           ` Andrew Morton
@ 2022-09-08 16:15             ` Dennis Zhou
  0 siblings, 0 replies; 40+ messages in thread
From: Dennis Zhou @ 2022-09-08 16:15 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Sun, Jiebin, Tim Chen, vasily.averin, shakeelb, tj, cl, ebiederm,
	legion, manfred, alexander.mikhalitsyn, linux-mm, linux-kernel,
	tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

Hello,

On Thu, Sep 08, 2022 at 08:38:59AM -0700, Andrew Morton wrote:
> On Thu, 8 Sep 2022 16:25:47 +0800 "Sun, Jiebin" <jiebin.sun@intel.com> wrote:
> 
> > In our case, if the local 
> > percpu counter is near to INT_MAX and there comes a big msgsz, the 
> > overflow issue could happen.
> 
> percpu_counter_add_batch() handles this - your big message
> won't overflow an s64.
> 
> 
> Lookng at percpu_counter_add_batch(), is this tweak right?
> 
> - don't need to update *fbc->counters inside the lock
> - that __this_cpu_sub() is an obscure way of zeroing the thing
> 
> --- a/lib/percpu_counter.c~a
> +++ a/lib/percpu_counter.c
> @@ -89,8 +89,8 @@ void percpu_counter_add_batch(struct per
>  		unsigned long flags;
>  		raw_spin_lock_irqsave(&fbc->lock, flags);
>  		fbc->count += count;
> -		__this_cpu_sub(*fbc->counters, count - amount);
>  		raw_spin_unlock_irqrestore(&fbc->lock, flags);
> +		__this_cpu_write(*fbc->counters, 0);

I don't think this is irq safe. It'd be best to leave it inside the
spinlock as then we can use __this_cpu_write() to 0 in there.

>  	} else {
>  		this_cpu_add(*fbc->counters, amount);
>  	}
> _
> 

Thanks,
Dennis


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-09 20:36   ` [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
@ 2022-09-09 16:11     ` Tim Chen
  0 siblings, 0 replies; 40+ messages in thread
From: Tim Chen @ 2022-09-09 16:11 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, manfred, alexander.mikhalitsyn, linux-mm,
	linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

On Sat, 2022-09-10 at 04:36 +0800, Jiebin Sun wrote:
> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counter
> greatly improve the performance. Since there is one percpu
> struct per namespace, additional memory cost is minimal.
> Reading of the count done in msgctl call, which is infrequent.
> So the need to sum up the counts in each CPU is infrequent.
> 
> Apply the patch and test the pts/stress-ng-1.4.0
> -- system v message passing (160 threads).
> 
> Score gain: 3.99x
> 
> CPU: ICX 8380 x 2 sockets
> Core number: 40 x 2 physical cores
> Benchmark: pts/stress-ng-1.4.0
> -- system v message passing (160 threads)

Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>

> 
> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
> ---
> 



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
@ 2022-09-09 16:37     ` Tim Chen
  2022-09-10  1:37     ` kernel test robot
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 40+ messages in thread
From: Tim Chen @ 2022-09-09 16:37 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, manfred, alexander.mikhalitsyn, linux-mm,
	linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo

On Sat, 2022-09-10 at 04:36 +0800, Jiebin Sun wrote:
> The batch size in percpu_counter_add_batch should be very large
> in heavy writing and rare reading case. Add the "_local" version,
> and mostly it will do local adding, reduce the global updating
> and mitigate lock contention in writing.
> 
> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
> ---
>  include/linux/percpu_counter.h | 38 ++++++++++++++++++++++++++++++++++
>  1 file changed, 38 insertions(+)
> 
> diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
> index 01861eebed79..6dd7eaba8527 100644
> --- a/include/linux/percpu_counter.h
> +++ b/include/linux/percpu_counter.h
> @@ -15,6 +15,9 @@
>  #include <linux/types.h>
>  #include <linux/gfp.h>
>  
> +/* percpu_counter batch for local add or sub */
> +#define PERCPU_COUNTER_LOCAL_BATCH	INT_MAX
> +
>  #ifdef CONFIG_SMP
>  
>  struct percpu_counter {
> @@ -56,6 +59,27 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
>  	percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
>  }
>  
> +/*
> + * Use this function in heavy writing but rare reading case. The large
> + * batch size will reduce the global updating.

Suggest revising the comment, so it is clear we need to use
percpu_counter_sum() to access the counter: 

With percpu_counter_add_local() and percpu_counter_sub_local(),
counts are accumulated in local per cpu counter and not in
fbc->count until local count overflows PERCPU_COUNTER_LOCAL_BATCH.
This makes counter write efficient.

But percpu_counter_sum(), instead of percpu_counter_read(),
needs to be used to add up the counts
from each CPU to account for all the local counts.
So percpu_counter_add_local() and percpu_counter_sub_local()
should be used when a counter is updated frequently and read
rarely.


> + */
> +static inline void
> +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> +	percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
> +}
> +
> +/*
> + * Similar with percpu_counter_add_local, use it in heavy writing but
> + * rare reading case. The large batch size will reduce the global
> + * updating.
> + */
> +static inline void
> +percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
> +{
> +	percpu_counter_add_batch(fbc, -amount, PERCPU_COUNTER_LOCAL_BATCH);
> +}
> +
>  static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
>  {
>  	s64 ret = __percpu_counter_sum(fbc);
> @@ -138,6 +162,20 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
>  	preempt_enable();
>  }
>  
> +/* no smp percpu_counter_add_local is the same with percpu_counter_add */
> +static inline void
> +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> +	percpu_counter_add(fbc, amount);
> +}
> +
> +/* no smp percpu_counter_sub_local is the same with percpu_counter_sub */
> +static inline void
> +percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
> +{
> +	percpu_counter_sub(fbc, amount);
> +}
> +
>  static inline void
>  percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
>  {



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg
  2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
                   ` (4 preceding siblings ...)
  2022-09-06 16:54 ` [PATCH v3 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
@ 2022-09-09 20:36 ` Jiebin Sun
  2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
  2022-09-09 20:36   ` [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
  2022-09-13 19:25 ` [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
  6 siblings, 2 replies; 40+ messages in thread
From: Jiebin Sun @ 2022-09-09 20:36 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun


Hi,

Here are two patches to mitigate the lock contention in ipc/msg.

The 1st patch is to add the new interface percpu_counter_add_local and
percpu_counter_sub_local. The batch size in percpu_counter_add_batch should
be very large in heavy writing and rare reading case. Add the "_local"
version, and mostly it will do local adding, reduce the global updating and
mitigate lock contention in writing.

The 2nd patch is to use percpu_counter instead of atomic update in ipc/msg.
The msg_bytes and msg_hdrs atomic counters are frequently updated when IPC
msg queue is in heavy use, causing heavy cache bounce and overhead. Change
them to percpu_counter greatly improve the performance. Since there is one
percpu struct per namespace, additional memory cost is minimal. Reading of
the count done in msgctl call, which is infrequent. So the need to sum up
the counts in each CPU is infrequent.

Changes in v5:
1. Use INT_MAX as the large batch size in percpu_counter_local_add and
percpu_counter_sub_local.
2. Use the latest kernel 6.0-rc4 as the baseline for performance test.
3. Move the percpu_counter_local_add and percpu_counter_sub_local from
percpu_counter.c to percpu_counter.h.

Changes in v3:
1. Add comment and change log for the new function percpu_counter_add_local.
Who should use it and who shouldn't.

Changes in v2:
1. Separate the original patch into two patches.
2. Add error handling for percpu_counter_init.

The performance gain increases as the threads of workload become larger.
Performance gain: 3.99x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)


Regards
Jiebin



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-09 20:36 ` [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
@ 2022-09-09 20:36   ` Jiebin Sun
  2022-09-09 16:37     ` Tim Chen
                       ` (3 more replies)
  2022-09-09 20:36   ` [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
  1 sibling, 4 replies; 40+ messages in thread
From: Jiebin Sun @ 2022-09-09 20:36 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun

The batch size in percpu_counter_add_batch should be very large
in heavy writing and rare reading case. Add the "_local" version,
and mostly it will do local adding, reduce the global updating
and mitigate lock contention in writing.

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 include/linux/percpu_counter.h | 38 ++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 01861eebed79..6dd7eaba8527 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -15,6 +15,9 @@
 #include <linux/types.h>
 #include <linux/gfp.h>
 
+/* percpu_counter batch for local add or sub */
+#define PERCPU_COUNTER_LOCAL_BATCH	INT_MAX
+
 #ifdef CONFIG_SMP
 
 struct percpu_counter {
@@ -56,6 +59,27 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 	percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
 }
 
+/*
+ * Use this function in heavy writing but rare reading case. The large
+ * batch size will reduce the global updating.
+ */
+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+	percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
+}
+
+/*
+ * Similar with percpu_counter_add_local, use it in heavy writing but
+ * rare reading case. The large batch size will reduce the global
+ * updating.
+ */
+static inline void
+percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
+{
+	percpu_counter_add_batch(fbc, -amount, PERCPU_COUNTER_LOCAL_BATCH);
+}
+
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
 {
 	s64 ret = __percpu_counter_sum(fbc);
@@ -138,6 +162,20 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 	preempt_enable();
 }
 
+/* no smp percpu_counter_add_local is the same with percpu_counter_add */
+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+	percpu_counter_add(fbc, amount);
+}
+
+/* no smp percpu_counter_sub_local is the same with percpu_counter_sub */
+static inline void
+percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
+{
+	percpu_counter_sub(fbc, amount);
+}
+
 static inline void
 percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-09 20:36 ` [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
  2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
@ 2022-09-09 20:36   ` Jiebin Sun
  2022-09-09 16:11     ` Tim Chen
  1 sibling, 1 reply; 40+ messages in thread
From: Jiebin Sun @ 2022-09-09 20:36 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counter
greatly improve the performance. Since there is one percpu
struct per namespace, additional memory cost is minimal.
Reading of the count done in msgctl call, which is infrequent.
So the need to sum up the counts in each CPU is infrequent.

Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.99x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 include/linux/ipc_namespace.h |  5 ++--
 ipc/msg.c                     | 44 ++++++++++++++++++++++++-----------
 ipc/namespace.c               |  5 +++-
 ipc/util.h                    |  4 ++--
 4 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
 #include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
 
 struct user_namespace;
 
@@ -36,8 +37,8 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmax;
 	unsigned int	msg_ctlmnb;
 	unsigned int	msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
+	struct percpu_counter percpu_msg_bytes;
+	struct percpu_counter percpu_msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..f2bb4c193ecf 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
-		atomic_dec(&ns->msg_hdrs);
+		percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+	percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes);
 	ipc_update_pid(&msq->q_lspid, NULL);
 	ipc_update_pid(&msq->q_lrpid, NULL);
 	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	msginfo->msgssz = MSGSSZ;
 	msginfo->msgseg = MSGSEG;
 	down_read(&msg_ids(ns).rwsem);
-	if (cmd == MSG_INFO) {
+	if (cmd == MSG_INFO)
 		msginfo->msgpool = msg_ids(ns).in_use;
-		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
-		msginfo->msgtql = atomic_read(&ns->msg_bytes);
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
+	up_read(&msg_ids(ns).rwsem);
+	if (cmd == MSG_INFO) {
+		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_idx = ipc_get_maxidx(&msg_ids(ns));
-	up_read(&msg_ids(ns).rwsem);
 	return (max_idx < 0) ? 0 : max_idx;
 }
 
@@ -935,8 +937,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &ns->msg_bytes);
-		atomic_inc(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
 	}
 
 	err = 0;
@@ -1159,8 +1161,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
 			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &ns->msg_bytes);
-			atomic_dec(&ns->msg_hdrs);
+			percpu_counter_sub_local(&ns->percpu_msg_bytes, msg->m_ts);
+			percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
 			ss_wakeup(msq, &wake_q, false);
 
 			goto out_unlock0;
@@ -1297,20 +1299,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
 }
 #endif
 
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
 {
+	int ret;
+
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 
-	atomic_set(&ns->msg_bytes, 0);
-	atomic_set(&ns->msg_hdrs, 0);
+	ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_bytes;
+	ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_hdrs;
 	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+	return 0;
+
+	fail_msg_hdrs:
+		percpu_counter_destroy(&ns->percpu_msg_bytes);
+	fail_msg_bytes:
+		return ret;
 }
 
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
+	percpu_counter_destroy(&ns->percpu_msg_bytes);
+	percpu_counter_destroy(&ns->percpu_msg_hdrs);
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index e1fcaedba4fa..8316ea585733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (!setup_ipc_sysctls(ns))
 		goto fail_mq;
 
+	err = msg_init_ns(ns);
+	if (err)
+		goto fail_put;
+
 	sem_init_ns(ns);
-	msg_init_ns(ns);
 	shm_init_ns(ns);
 
 	return ns;
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..1b0086c6346f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
 
 #ifdef CONFIG_SYSVIPC
 void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
 void shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 #else
 static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0;}
 static inline void shm_init_ns(struct ipc_namespace *ns) { }
 
 static inline void sem_exit_ns(struct ipc_namespace *ns) { }
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
  2022-09-09 16:37     ` Tim Chen
@ 2022-09-10  1:37     ` kernel test robot
  2022-09-10  8:15     ` kernel test robot
  2022-09-10  8:26     ` kernel test robot
  3 siblings, 0 replies; 40+ messages in thread
From: kernel test robot @ 2022-09-10  1:37 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, manfred, alexander.mikhalitsyn, linux-mm,
	linux-kernel
  Cc: kbuild-all, tim.c.chen, feng.tang, ying.huang, tianyou.li,
	wangyang.guo, jiebin.sun

Hi Jiebin,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v6.0-rc4 next-20220909]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jiebin-Sun/percpu-Add-percpu_counter_add_local-and-percpu_counter_sub_local/20220910-053730
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: x86_64-randconfig-a013 (https://download.01.org/0day-ci/archive/20220910/202209100911.HBo9ZqIU-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-5) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/44e7288c01b9b125c7a5f97591ca26ffd90e3385
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jiebin-Sun/percpu-Add-percpu_counter_add_local-and-percpu_counter_sub_local/20220910-053730
        git checkout 44e7288c01b9b125c7a5f97591ca26ffd90e3385
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=x86_64 prepare

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/sched/user.h:7,
                    from include/linux/cred.h:17,
                    from include/linux/sched/signal.h:10,
                    from include/linux/rcuwait.h:6,
                    from include/linux/percpu-rwsem.h:7,
                    from include/linux/fs.h:33,
                    from include/linux/cgroup.h:17,
                    from include/linux/memcontrol.h:13,
                    from include/linux/swap.h:9,
                    from include/linux/suspend.h:5,
                    from arch/x86/kernel/asm-offsets.c:13:
   include/linux/percpu_counter.h: In function 'percpu_counter_sub_local':
>> include/linux/percpu_counter.h:176:9: error: implicit declaration of function 'percpu_counter_sub'; did you mean 'percpu_counter_set'? [-Werror=implicit-function-declaration]
     176 |         percpu_counter_sub(fbc, amount);
         |         ^~~~~~~~~~~~~~~~~~
         |         percpu_counter_set
   include/linux/percpu_counter.h: At top level:
>> include/linux/percpu_counter.h:229:20: warning: conflicting types for 'percpu_counter_sub'; have 'void(struct percpu_counter *, s64)' {aka 'void(struct percpu_counter *, long long int)'}
     229 | static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
         |                    ^~~~~~~~~~~~~~~~~~
>> include/linux/percpu_counter.h:229:20: error: static declaration of 'percpu_counter_sub' follows non-static declaration
   include/linux/percpu_counter.h:176:9: note: previous implicit declaration of 'percpu_counter_sub' with type 'void(struct percpu_counter *, s64)' {aka 'void(struct percpu_counter *, long long int)'}
     176 |         percpu_counter_sub(fbc, amount);
         |         ^~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors
   make[2]: *** [scripts/Makefile.build:117: arch/x86/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1206: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:222: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.


vim +176 include/linux/percpu_counter.h

   171	
   172	/* no smp percpu_counter_sub_local is the same with percpu_counter_sub */
   173	static inline void
   174	percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
   175	{
 > 176		percpu_counter_sub(fbc, amount);
   177	}
   178	
   179	static inline void
   180	percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
   181	{
   182		percpu_counter_add(fbc, amount);
   183	}
   184	
   185	static inline s64 percpu_counter_read(struct percpu_counter *fbc)
   186	{
   187		return fbc->count;
   188	}
   189	
   190	/*
   191	 * percpu_counter is intended to track positive numbers. In the UP case the
   192	 * number should never be negative.
   193	 */
   194	static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc)
   195	{
   196		return fbc->count;
   197	}
   198	
   199	static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
   200	{
   201		return percpu_counter_read_positive(fbc);
   202	}
   203	
   204	static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
   205	{
   206		return percpu_counter_read(fbc);
   207	}
   208	
   209	static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
   210	{
   211		return true;
   212	}
   213	
   214	static inline void percpu_counter_sync(struct percpu_counter *fbc)
   215	{
   216	}
   217	#endif	/* CONFIG_SMP */
   218	
   219	static inline void percpu_counter_inc(struct percpu_counter *fbc)
   220	{
   221		percpu_counter_add(fbc, 1);
   222	}
   223	
   224	static inline void percpu_counter_dec(struct percpu_counter *fbc)
   225	{
   226		percpu_counter_add(fbc, -1);
   227	}
   228	
 > 229	static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
   230	{
   231		percpu_counter_add(fbc, -amount);
   232	}
   233	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
  2022-09-09 16:37     ` Tim Chen
  2022-09-10  1:37     ` kernel test robot
@ 2022-09-10  8:15     ` kernel test robot
  2022-09-10  8:26     ` kernel test robot
  3 siblings, 0 replies; 40+ messages in thread
From: kernel test robot @ 2022-09-10  8:15 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, manfred, alexander.mikhalitsyn, linux-mm,
	linux-kernel
  Cc: llvm, kbuild-all, tim.c.chen, feng.tang, ying.huang, tianyou.li,
	wangyang.guo, jiebin.sun

Hi Jiebin,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v6.0-rc4 next-20220909]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jiebin-Sun/percpu-Add-percpu_counter_add_local-and-percpu_counter_sub_local/20220910-053730
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: x86_64-randconfig-a012 (https://download.01.org/0day-ci/archive/20220910/202209101659.DitbHM0V-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/44e7288c01b9b125c7a5f97591ca26ffd90e3385
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jiebin-Sun/percpu-Add-percpu_counter_add_local-and-percpu_counter_sub_local/20220910-053730
        git checkout 44e7288c01b9b125c7a5f97591ca26ffd90e3385
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 prepare

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/x86/kernel/asm-offsets.c:13:
   In file included from include/linux/suspend.h:5:
   In file included from include/linux/swap.h:9:
   In file included from include/linux/memcontrol.h:13:
   In file included from include/linux/cgroup.h:17:
   In file included from include/linux/fs.h:33:
   In file included from include/linux/percpu-rwsem.h:7:
   In file included from include/linux/rcuwait.h:6:
   In file included from include/linux/sched/signal.h:10:
   In file included from include/linux/cred.h:17:
   In file included from include/linux/sched/user.h:7:
>> include/linux/percpu_counter.h:176:2: error: implicit declaration of function 'percpu_counter_sub' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
           percpu_counter_sub(fbc, amount);
           ^
   include/linux/percpu_counter.h:176:2: note: did you mean 'percpu_counter_set'?
   include/linux/percpu_counter.h:136:20: note: 'percpu_counter_set' declared here
   static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
                      ^
   include/linux/percpu_counter.h:229:20: error: static declaration of 'percpu_counter_sub' follows non-static declaration
   static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
                      ^
   include/linux/percpu_counter.h:176:2: note: previous implicit declaration is here
           percpu_counter_sub(fbc, amount);
           ^
   2 errors generated.
   make[2]: *** [scripts/Makefile.build:117: arch/x86/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1206: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:222: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.


vim +/percpu_counter_sub +176 include/linux/percpu_counter.h

   171	
   172	/* no smp percpu_counter_sub_local is the same with percpu_counter_sub */
   173	static inline void
   174	percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
   175	{
 > 176		percpu_counter_sub(fbc, amount);
   177	}
   178	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
                       ` (2 preceding siblings ...)
  2022-09-10  8:15     ` kernel test robot
@ 2022-09-10  8:26     ` kernel test robot
  3 siblings, 0 replies; 40+ messages in thread
From: kernel test robot @ 2022-09-10  8:26 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, manfred, alexander.mikhalitsyn, linux-mm,
	linux-kernel
  Cc: llvm, kbuild-all, tim.c.chen, feng.tang, ying.huang, tianyou.li,
	wangyang.guo, jiebin.sun

Hi Jiebin,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v6.0-rc4 next-20220909]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jiebin-Sun/percpu-Add-percpu_counter_add_local-and-percpu_counter_sub_local/20220910-053730
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: hexagon-randconfig-r041-20220909 (https://download.01.org/0day-ci/archive/20220910/202209101659.cvalTsSU-lkp@intel.com/config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 1546df49f5a6d09df78f569e4137ddb365a3e827)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/44e7288c01b9b125c7a5f97591ca26ffd90e3385
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Jiebin-Sun/percpu-Add-percpu_counter_add_local-and-percpu_counter_sub_local/20220910-053730
        git checkout 44e7288c01b9b125c7a5f97591ca26ffd90e3385
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon prepare

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/hexagon/kernel/asm-offsets.c:12:
   In file included from include/linux/compat.h:17:
   In file included from include/linux/fs.h:33:
   In file included from include/linux/percpu-rwsem.h:7:
   In file included from include/linux/rcuwait.h:6:
   In file included from include/linux/sched/signal.h:10:
   In file included from include/linux/cred.h:17:
   In file included from include/linux/sched/user.h:7:
>> include/linux/percpu_counter.h:176:2: error: call to undeclared function 'percpu_counter_sub'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
           percpu_counter_sub(fbc, amount);
           ^
   include/linux/percpu_counter.h:176:2: note: did you mean 'percpu_counter_set'?
   include/linux/percpu_counter.h:136:20: note: 'percpu_counter_set' declared here
   static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
                      ^
   include/linux/percpu_counter.h:229:20: error: static declaration of 'percpu_counter_sub' follows non-static declaration
   static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
                      ^
   include/linux/percpu_counter.h:176:2: note: previous implicit declaration is here
           percpu_counter_sub(fbc, amount);
           ^
   2 errors generated.
   make[2]: *** [scripts/Makefile.build:117: arch/hexagon/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1206: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:222: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.


vim +/percpu_counter_sub +176 include/linux/percpu_counter.h

   171	
   172	/* no smp percpu_counter_sub_local is the same with percpu_counter_sub */
   173	static inline void
   174	percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
   175	{
 > 176		percpu_counter_sub(fbc, amount);
   177	}
   178	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp


^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg
  2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
                   ` (5 preceding siblings ...)
  2022-09-09 20:36 ` [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
@ 2022-09-13 19:25 ` Jiebin Sun
  2022-09-13 19:25   ` [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
  2022-09-13 19:25   ` [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
  6 siblings, 2 replies; 40+ messages in thread
From: Jiebin Sun @ 2022-09-13 19:25 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, jiebin.sun

Hi,

Here are two patches to mitigate the lock contention in ipc/msg.

The 1st patch is to add the new interface percpu_counter_add_local and
percpu_counter_sub_local. The batch size in percpu_counter_add_batch
should be very large in heavy writing and rare reading case. Add the
"_local" version, and mostly it will do local adding, reduce the global
updating and mitigate lock contention in writing.

The 2nd patch is to use percpu_counter instead of atomic update in
ipc/msg. The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy cache bounce
and overhead. Change them to percpu_counter greatly improve the
performance. Since there is one percpu struct per namespace, additional
memory cost is minimal. Reading of the count done in msgctl call, which
is infrequent. So the need to sum up the counts in each CPU is
infrequent.

Changes in v6:
1. Revise the code comment of percpu_counter_add_local in patch 1/2.
2. Get percpu_counter_sub_local from percpu_counter_add_local rather
than that from percpu_counter_add_batch for SMP and percpu_counter_sub
for non-SMP to reduce code modification.

Changes in v5:
1. Use INT_MAX as the large batch size in percpu_counter_local_add and
percpu_counter_sub_local.
2. Use the latest kernel 6.0-rc4 as the baseline for performance test.
3. Move the percpu_counter_local_add and percpu_counter_sub_local from
percpu_counter.c to percpu_counter.h.

Changes in v3:
1. Add comment and change log for the new function percpu_counter_add_local.
Who should use it and who shouldn't.

Changes in v2:
1. Separate the original patch into two patches.
2. Add error handling for percpu_counter_init.

The performance gain increases as the threads of workload become larger.
Performance gain: 3.99x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)


Regards
Jiebin



^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-13 19:25 ` [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
@ 2022-09-13 19:25   ` Jiebin Sun
  2022-09-18 11:08     ` Manfred Spraul
  2022-09-13 19:25   ` [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
  1 sibling, 1 reply; 40+ messages in thread
From: Jiebin Sun @ 2022-09-13 19:25 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo,
	jiebin.sun, Tim Chen, kernel test robot

The batch size in percpu_counter_add_batch should be very large in
heavy writing and rare reading case. Add the "_local" version, and
mostly it will do local adding, reduce the global updating and
mitigate lock contention in writing.

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Reported-by: kernel test robot <lkp@intel.com>
---
 include/linux/percpu_counter.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 01861eebed79..8ed5fba6d156 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -15,6 +15,9 @@
 #include <linux/types.h>
 #include <linux/gfp.h>
 
+/* percpu_counter batch for local add or sub */
+#define PERCPU_COUNTER_LOCAL_BATCH	INT_MAX
+
 #ifdef CONFIG_SMP
 
 struct percpu_counter {
@@ -56,6 +59,22 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 	percpu_counter_add_batch(fbc, amount, percpu_counter_batch);
 }
 
+/*
+ * With percpu_counter_add_local() and percpu_counter_sub_local(), counts
+ * are accumulated in local per cpu counter and not in fbc->count until
+ * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter
+ * write efficient.
+ * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be
+ * used to add up the counts from each CPU to account for all the local
+ * counts. So percpu_counter_add_local() and percpu_counter_sub_local()
+ * should be used when a counter is updated frequently and read rarely.
+ */
+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+	percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
+}
+
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
 {
 	s64 ret = __percpu_counter_sum(fbc);
@@ -138,6 +157,13 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 	preempt_enable();
 }
 
+/* non-SMP percpu_counter_add_local is the same with percpu_counter_add */
+static inline void
+percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
+{
+	percpu_counter_add(fbc, amount);
+}
+
 static inline void
 percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
@@ -193,4 +219,10 @@ static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount)
 	percpu_counter_add(fbc, -amount);
 }
 
+static inline void
+percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount)
+{
+	percpu_counter_add_local(fbc, -amount);
+}
+
 #endif /* _LINUX_PERCPU_COUNTER_H */
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-13 19:25 ` [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
  2022-09-13 19:25   ` [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
@ 2022-09-13 19:25   ` Jiebin Sun
  2022-09-18 12:53     ` Manfred Spraul
  1 sibling, 1 reply; 40+ messages in thread
From: Jiebin Sun @ 2022-09-13 19:25 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo,
	jiebin.sun, Tim Chen

The msg_bytes and msg_hdrs atomic counters are frequently
updated when IPC msg queue is in heavy use, causing heavy
cache bounce and overhead. Change them to percpu_counter
greatly improve the performance. Since there is one percpu
struct per namespace, additional memory cost is minimal.
Reading of the count done in msgctl call, which is infrequent.
So the need to sum up the counts in each CPU is infrequent.

Apply the patch and test the pts/stress-ng-1.4.0
-- system v message passing (160 threads).

Score gain: 3.99x

CPU: ICX 8380 x 2 sockets
Core number: 40 x 2 physical cores
Benchmark: pts/stress-ng-1.4.0
-- system v message passing (160 threads)

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/ipc_namespace.h |  5 ++--
 ipc/msg.c                     | 44 ++++++++++++++++++++++++-----------
 ipc/namespace.c               |  5 +++-
 ipc/util.h                    |  4 ++--
 4 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index e3e8c8662b49..e8240cf2611a 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -11,6 +11,7 @@
 #include <linux/refcount.h>
 #include <linux/rhashtable-types.h>
 #include <linux/sysctl.h>
+#include <linux/percpu_counter.h>
 
 struct user_namespace;
 
@@ -36,8 +37,8 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmax;
 	unsigned int	msg_ctlmnb;
 	unsigned int	msg_ctlmni;
-	atomic_t	msg_bytes;
-	atomic_t	msg_hdrs;
+	struct percpu_counter percpu_msg_bytes;
+	struct percpu_counter percpu_msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/ipc/msg.c b/ipc/msg.c
index a0d05775af2c..f2bb4c193ecf 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
 #include <linux/nsproxy.h>
 #include <linux/ipc_namespace.h>
 #include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/current.h>
 #include <linux/uaccess.h>
@@ -285,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	rcu_read_unlock();
 
 	list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
-		atomic_dec(&ns->msg_hdrs);
+		percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+	percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes);
 	ipc_update_pid(&msq->q_lspid, NULL);
 	ipc_update_pid(&msq->q_lrpid, NULL);
 	ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	msginfo->msgssz = MSGSSZ;
 	msginfo->msgseg = MSGSEG;
 	down_read(&msg_ids(ns).rwsem);
-	if (cmd == MSG_INFO) {
+	if (cmd == MSG_INFO)
 		msginfo->msgpool = msg_ids(ns).in_use;
-		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
-		msginfo->msgtql = atomic_read(&ns->msg_bytes);
+	max_idx = ipc_get_maxidx(&msg_ids(ns));
+	up_read(&msg_ids(ns).rwsem);
+	if (cmd == MSG_INFO) {
+		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
+		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
 		msginfo->msgtql = MSGTQL;
 	}
-	max_idx = ipc_get_maxidx(&msg_ids(ns));
-	up_read(&msg_ids(ns).rwsem);
 	return (max_idx < 0) ? 0 : max_idx;
 }
 
@@ -935,8 +937,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &ns->msg_bytes);
-		atomic_inc(&ns->msg_hdrs);
+		percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+		percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
 	}
 
 	err = 0;
@@ -1159,8 +1161,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
 			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &ns->msg_bytes);
-			atomic_dec(&ns->msg_hdrs);
+			percpu_counter_sub_local(&ns->percpu_msg_bytes, msg->m_ts);
+			percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
 			ss_wakeup(msq, &wake_q, false);
 
 			goto out_unlock0;
@@ -1297,20 +1299,34 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
 }
 #endif
 
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
 {
+	int ret;
+
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
 
-	atomic_set(&ns->msg_bytes, 0);
-	atomic_set(&ns->msg_hdrs, 0);
+	ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_bytes;
+	ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+	if (ret)
+		goto fail_msg_hdrs;
 	ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+	return 0;
+
+	fail_msg_hdrs:
+		percpu_counter_destroy(&ns->percpu_msg_bytes);
+	fail_msg_bytes:
+		return ret;
 }
 
 #ifdef CONFIG_IPC_NS
 void msg_exit_ns(struct ipc_namespace *ns)
 {
+	percpu_counter_destroy(&ns->percpu_msg_bytes);
+	percpu_counter_destroy(&ns->percpu_msg_hdrs);
 	free_ipcs(ns, &msg_ids(ns), freeque);
 	idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
 	rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index e1fcaedba4fa..8316ea585733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,8 +66,11 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (!setup_ipc_sysctls(ns))
 		goto fail_mq;
 
+	err = msg_init_ns(ns);
+	if (err)
+		goto fail_put;
+
 	sem_init_ns(ns);
-	msg_init_ns(ns);
 	shm_init_ns(ns);
 
 	return ns;
diff --git a/ipc/util.h b/ipc/util.h
index 2dd7ce0416d8..1b0086c6346f 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -64,7 +64,7 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
 
 #ifdef CONFIG_SYSVIPC
 void sem_init_ns(struct ipc_namespace *ns);
-void msg_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
 void shm_init_ns(struct ipc_namespace *ns);
 
 void sem_exit_ns(struct ipc_namespace *ns);
@@ -72,7 +72,7 @@ void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 #else
 static inline void sem_init_ns(struct ipc_namespace *ns) { }
-static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline int msg_init_ns(struct ipc_namespace *ns) { return 0;}
 static inline void shm_init_ns(struct ipc_namespace *ns) { }
 
 static inline void sem_exit_ns(struct ipc_namespace *ns) { }
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-13 19:25   ` [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
@ 2022-09-18 11:08     ` Manfred Spraul
  2022-09-20  6:01       ` Sun, Jiebin
  0 siblings, 1 reply; 40+ messages in thread
From: Manfred Spraul @ 2022-09-18 11:08 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo,
	Tim Chen, kernel test robot, 1vier1

[-- Attachment #1: Type: text/plain, Size: 3332 bytes --]

Hi Jiebin,

On 9/13/22 21:25, Jiebin Sun wrote:
>   
> +/*
> + * With percpu_counter_add_local() and percpu_counter_sub_local(), counts
> + * are accumulated in local per cpu counter and not in fbc->count until
> + * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter
> + * write efficient.
> + * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be
> + * used to add up the counts from each CPU to account for all the local
> + * counts. So percpu_counter_add_local() and percpu_counter_sub_local()
> + * should be used when a counter is updated frequently and read rarely.
> + */
> +static inline void
> +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
> +{
> +	percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
> +}
> +

Unrelated to your patch, and not relevant for ipc/msg as the functions 
are not called from interrupts, but:
Aren't there races with interrupts?

> *
> * This function is both preempt and irq safe. The former is due to 
> explicit
> * preemption disable. The latter is guaranteed by the fact that the 
> slow path
> * is explicitly protected by an irq-safe spinlock whereas the fast 
> patch uses
> * this_cpu_add which is irq-safe by definition. Hence there is no need 
> muck
> * with irq state before calling this one
> */
> void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, 
> s32 batch)
> {
>        s64 count;
>
>        preempt_disable();
>        count = __this_cpu_read(*fbc->counters) + amount;
>        if (abs(count) >= batch) {
>                unsigned long flags;
>                raw_spin_lock_irqsave(&fbc->lock, flags);
>                fbc->count += count;
>                __this_cpu_sub(*fbc->counters, count - amount);
>                raw_spin_unlock_irqrestore(&fbc->lock, flags);
>        } else {
>                this_cpu_add(*fbc->counters, amount);
>        }
>        preempt_enable();
> }
> EXPORT_SYMBOL(percpu_counter_add_batch);
>
>
Race 1:

start: __this_cpu_read(*fbc->counters) = INT_MAX-1.

Call: per_cpu_counter_add_batch(fbc, 1, INT_MAX);

Result:

count=INT_MAX;

if (abs(count) >= batch) { // branch taken

before the raw_spin_lock_irqsave():

Interrupt

Within interrupt:

    per_cpu_counter_add_batch(fbc, -2*(INT_MAX-1), INT_MAX)

    count=-(INT_MAX-1);

    branch not taken

    this_cpu_add() updates fbc->counters, new value is -(INT_MAX-1)

    exit interrupt

raw_spin_lock_irqsave()

__this_cpu_sub(*fbc->counters, count - amount)

will substract INT_MAX-1 from *fbc->counters. But the value is already 
-(INT_MAX-1) -> underflow.


Race 2: (much simpler)

start: __this_cpu_read(*fbc->counters) = 0.

Call: per_cpu_counter_add_batch(fbc, INT_MAX-1, INT_MAX);

amont=INT_MAX-1;

- branch not taken.

before this_cpu_add(): interrupt

within the interrupt: call per_cpu_counter_add_batch(fbc, INT_MAX-1, 
INT_MAX)

    new value of *fbc->counters: INT_MAX-1.

    exit interrupt

outside interrupt:

this_cpu_add(*fbc->counters, amount);

<<< overflow.

Attached is an incomplete patch (untested).
If needed, I could check the whole file and add/move the required 
local_irq_save() calls.


--

     Manfred

[-- Attachment #2: 0001-lib-percpu_counter-RFC-potential-overflow-underflow.patch --]
[-- Type: text/x-patch, Size: 1893 bytes --]

From 6a1d2a4beb180241b63f9bf57454bbe031915dd1 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Sun, 18 Sep 2022 12:17:27 +0200
Subject: [PATCH] lib/percpu_counter: [RFC] potential overflow/underflow

If an interrupt happens between __this_cpu_read(*fbc->counters) and
this_cpu_add(*fbc->counters, amount), and that interrupt modifies
the per_cpu_counter, then the this_cpu_add() after the interrupt
returns may under/overflow.

Thus: Disable interrupts.

Note: The patch is incomplete, if the race is real, then
more functions than just percpu_counter_add_batch() needs to be
updated.

Especially, the !CONFIG_SMP code looks wrong to me as well:
> static inline void
> percpu_counter_add(struct percpu_counter *fbc, s64 amount)
> {
>	preempt_disable();
>	fbc->count += amount;
>	preempt_enable();
> }
The update of fbc->count is not IRQ safe.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
---
 lib/percpu_counter.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index ed610b75dc32..39de94d59b4f 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -82,18 +82,20 @@ EXPORT_SYMBOL(percpu_counter_set);
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 {
 	s64 count;
+	unsigned long flags;
 
 	preempt_disable();
+	local_irq_save(flags);
 	count = __this_cpu_read(*fbc->counters) + amount;
 	if (abs(count) >= batch) {
-		unsigned long flags;
-		raw_spin_lock_irqsave(&fbc->lock, flags);
+		raw_spin_lock(&fbc->lock);
 		fbc->count += count;
 		__this_cpu_sub(*fbc->counters, count - amount);
-		raw_spin_unlock_irqrestore(&fbc->lock, flags);
+		raw_spin_unlock(&fbc->lock);
 	} else {
 		this_cpu_add(*fbc->counters, amount);
 	}
+	local_irq_restore(flags);
 	preempt_enable();
 }
 EXPORT_SYMBOL(percpu_counter_add_batch);
-- 
2.37.2


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-13 19:25   ` [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
@ 2022-09-18 12:53     ` Manfred Spraul
  2022-09-20  2:36       ` Sun, Jiebin
  0 siblings, 1 reply; 40+ messages in thread
From: Manfred Spraul @ 2022-09-18 12:53 UTC (permalink / raw)
  To: Jiebin Sun, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, Tim Chen

Hi Jiebin,

On 9/13/22 21:25, Jiebin Sun wrote:
> The msg_bytes and msg_hdrs atomic counters are frequently
> updated when IPC msg queue is in heavy use, causing heavy
> cache bounce and overhead. Change them to percpu_counter
> greatly improve the performance. Since there is one percpu
> struct per namespace, additional memory cost is minimal.
> Reading of the count done in msgctl call, which is infrequent.
> So the need to sum up the counts in each CPU is infrequent.
>
> Apply the patch and test the pts/stress-ng-1.4.0
> -- system v message passing (160 threads).
>
> Score gain: 3.99x
>
> CPU: ICX 8380 x 2 sockets
> Core number: 40 x 2 physical cores
> Benchmark: pts/stress-ng-1.4.0
> -- system v message passing (160 threads)
>
> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Reviewed-by: Manfred Spraul <manfred@colorfullif.com>
> @@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
>   	msginfo->msgssz = MSGSSZ;
>   	msginfo->msgseg = MSGSEG;
>   	down_read(&msg_ids(ns).rwsem);
> -	if (cmd == MSG_INFO) {
> +	if (cmd == MSG_INFO)
>   		msginfo->msgpool = msg_ids(ns).in_use;
> -		msginfo->msgmap = atomic_read(&ns->msg_hdrs);
> -		msginfo->msgtql = atomic_read(&ns->msg_bytes);
> +	max_idx = ipc_get_maxidx(&msg_ids(ns));
> +	up_read(&msg_ids(ns).rwsem);
> +	if (cmd == MSG_INFO) {
> +		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
> +		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);

Not caused by your change, it just now becomes obvious:

msginfo->msgmap and ->msgtql are type int, i.e. signed 32-bit, and the 
actual counters are 64-bit.
This can overflow - and I think the code should handle this. Just clamp 
the values to INT_MAX.

-- 

     Manfred




^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-18 12:53     ` Manfred Spraul
@ 2022-09-20  2:36       ` Sun, Jiebin
  2022-09-20  4:53         ` Manfred Spraul
  0 siblings, 1 reply; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-20  2:36 UTC (permalink / raw)
  To: Manfred Spraul, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo,
	Tim Chen, jiebin.sun


On 9/18/2022 8:53 PM, Manfred Spraul wrote:
> Hi Jiebin,
>
> On 9/13/22 21:25, Jiebin Sun wrote:
>> The msg_bytes and msg_hdrs atomic counters are frequently
>> updated when IPC msg queue is in heavy use, causing heavy
>> cache bounce and overhead. Change them to percpu_counter
>> greatly improve the performance. Since there is one percpu
>> struct per namespace, additional memory cost is minimal.
>> Reading of the count done in msgctl call, which is infrequent.
>> So the need to sum up the counts in each CPU is infrequent.
>>
>> Apply the patch and test the pts/stress-ng-1.4.0
>> -- system v message passing (160 threads).
>>
>> Score gain: 3.99x
>>
>> CPU: ICX 8380 x 2 sockets
>> Core number: 40 x 2 physical cores
>> Benchmark: pts/stress-ng-1.4.0
>> -- system v message passing (160 threads)
>>
>> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Reviewed-by: Manfred Spraul <manfred@colorfullif.com>
>> @@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace 
>> *ns, int msqid,
>>       msginfo->msgssz = MSGSSZ;
>>       msginfo->msgseg = MSGSEG;
>>       down_read(&msg_ids(ns).rwsem);
>> -    if (cmd == MSG_INFO) {
>> +    if (cmd == MSG_INFO)
>>           msginfo->msgpool = msg_ids(ns).in_use;
>> -        msginfo->msgmap = atomic_read(&ns->msg_hdrs);
>> -        msginfo->msgtql = atomic_read(&ns->msg_bytes);
>> +    max_idx = ipc_get_maxidx(&msg_ids(ns));
>> +    up_read(&msg_ids(ns).rwsem);
>> +    if (cmd == MSG_INFO) {
>> +        msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
>> +        msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
>
> Not caused by your change, it just now becomes obvious:
>
> msginfo->msgmap and ->msgtql are type int, i.e. signed 32-bit, and the 
> actual counters are 64-bit.
> This can overflow - and I think the code should handle this. Just 
> clamp the values to INT_MAX.
>
Hi Manfred,

Thanks for your advice. But I'm not sure if we could fix the overflow 
issue in ipc/msg totally by

clamp(val, low, INT_MAX). If the value is over s32, we might avoid the 
reversal sign, but still could

not get the accurate value.



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-20  2:36       ` Sun, Jiebin
@ 2022-09-20  4:53         ` Manfred Spraul
  2022-09-20  5:50           ` Sun, Jiebin
  2022-09-20 15:08           ` [PATCH] ipc/msg: avoid negative value by overflow in msginfo Jiebin Sun
  0 siblings, 2 replies; 40+ messages in thread
From: Manfred Spraul @ 2022-09-20  4:53 UTC (permalink / raw)
  To: Sun, Jiebin, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, Tim Chen

On 9/20/22 04:36, Sun, Jiebin wrote:
>
> On 9/18/2022 8:53 PM, Manfred Spraul wrote:
>> Hi Jiebin,
>>
>> On 9/13/22 21:25, Jiebin Sun wrote:
>>> The msg_bytes and msg_hdrs atomic counters are frequently
>>> updated when IPC msg queue is in heavy use, causing heavy
>>> cache bounce and overhead. Change them to percpu_counter
>>> greatly improve the performance. Since there is one percpu
>>> struct per namespace, additional memory cost is minimal.
>>> Reading of the count done in msgctl call, which is infrequent.
>>> So the need to sum up the counts in each CPU is infrequent.
>>>
>>> Apply the patch and test the pts/stress-ng-1.4.0
>>> -- system v message passing (160 threads).
>>>
>>> Score gain: 3.99x
>>>
>>> CPU: ICX 8380 x 2 sockets
>>> Core number: 40 x 2 physical cores
>>> Benchmark: pts/stress-ng-1.4.0
>>> -- system v message passing (160 threads)
>>>
>>> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
>>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
>> Reviewed-by: Manfred Spraul <manfred@colorfullif.com>
>>> @@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace 
>>> *ns, int msqid,
>>>       msginfo->msgssz = MSGSSZ;
>>>       msginfo->msgseg = MSGSEG;
>>>       down_read(&msg_ids(ns).rwsem);
>>> -    if (cmd == MSG_INFO) {
>>> +    if (cmd == MSG_INFO)
>>>           msginfo->msgpool = msg_ids(ns).in_use;
>>> -        msginfo->msgmap = atomic_read(&ns->msg_hdrs);
>>> -        msginfo->msgtql = atomic_read(&ns->msg_bytes);
>>> +    max_idx = ipc_get_maxidx(&msg_ids(ns));
>>> +    up_read(&msg_ids(ns).rwsem);
>>> +    if (cmd == MSG_INFO) {
>>> +        msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
>>> +        msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
>>
>> Not caused by your change, it just now becomes obvious:
>>
>> msginfo->msgmap and ->msgtql are type int, i.e. signed 32-bit, and 
>> the actual counters are 64-bit.
>> This can overflow - and I think the code should handle this. Just 
>> clamp the values to INT_MAX.
>>
> Hi Manfred,
>
> Thanks for your advice. But I'm not sure if we could fix the overflow 
> issue in ipc/msg totally by
>
> clamp(val, low, INT_MAX). If the value is over s32, we might avoid the 
> reversal sign, but still could
>
> not get the accurate value.

I think just clamping it to INT_MAX is the best approach.
Reporting negative values is worse than clamping. If (and only if) there 
are real users that need to know the total amount of memory allocated 
for messages queues in one namespace, then we could add a MSG_INFO64 
with long values. But I would not add that right now, I do not see a 
real use case where the value would be needed.

Any other opinions?

--

     Manfred



^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter
  2022-09-20  4:53         ` Manfred Spraul
@ 2022-09-20  5:50           ` Sun, Jiebin
  2022-09-20 15:08           ` [PATCH] ipc/msg: avoid negative value by overflow in msginfo Jiebin Sun
  1 sibling, 0 replies; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-20  5:50 UTC (permalink / raw)
  To: Manfred Spraul, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo, Tim Chen


On 9/20/2022 12:53 PM, Manfred Spraul wrote:
> On 9/20/22 04:36, Sun, Jiebin wrote:
>>
>> On 9/18/2022 8:53 PM, Manfred Spraul wrote:
>>> Hi Jiebin,
>>>
>>> On 9/13/22 21:25, Jiebin Sun wrote:
>>>> The msg_bytes and msg_hdrs atomic counters are frequently
>>>> updated when IPC msg queue is in heavy use, causing heavy
>>>> cache bounce and overhead. Change them to percpu_counter
>>>> greatly improve the performance. Since there is one percpu
>>>> struct per namespace, additional memory cost is minimal.
>>>> Reading of the count done in msgctl call, which is infrequent.
>>>> So the need to sum up the counts in each CPU is infrequent.
>>>>
>>>> Apply the patch and test the pts/stress-ng-1.4.0
>>>> -- system v message passing (160 threads).
>>>>
>>>> Score gain: 3.99x
>>>>
>>>> CPU: ICX 8380 x 2 sockets
>>>> Core number: 40 x 2 physical cores
>>>> Benchmark: pts/stress-ng-1.4.0
>>>> -- system v message passing (160 threads)
>>>>
>>>> Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
>>>> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
>>> Reviewed-by: Manfred Spraul <manfred@colorfullif.com>
>>>> @@ -495,17 +496,18 @@ static int msgctl_info(struct ipc_namespace 
>>>> *ns, int msqid,
>>>>       msginfo->msgssz = MSGSSZ;
>>>>       msginfo->msgseg = MSGSEG;
>>>>       down_read(&msg_ids(ns).rwsem);
>>>> -    if (cmd == MSG_INFO) {
>>>> +    if (cmd == MSG_INFO)
>>>>           msginfo->msgpool = msg_ids(ns).in_use;
>>>> -        msginfo->msgmap = atomic_read(&ns->msg_hdrs);
>>>> -        msginfo->msgtql = atomic_read(&ns->msg_bytes);
>>>> +    max_idx = ipc_get_maxidx(&msg_ids(ns));
>>>> +    up_read(&msg_ids(ns).rwsem);
>>>> +    if (cmd == MSG_INFO) {
>>>> +        msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
>>>> +        msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
>>>
>>> Not caused by your change, it just now becomes obvious:
>>>
>>> msginfo->msgmap and ->msgtql are type int, i.e. signed 32-bit, and 
>>> the actual counters are 64-bit.
>>> This can overflow - and I think the code should handle this. Just 
>>> clamp the values to INT_MAX.
>>>
>> Hi Manfred,
>>
>> Thanks for your advice. But I'm not sure if we could fix the overflow 
>> issue in ipc/msg totally by
>>
>> clamp(val, low, INT_MAX). If the value is over s32, we might avoid 
>> the reversal sign, but still could
>>
>> not get the accurate value.
>
> I think just clamping it to INT_MAX is the best approach.
> Reporting negative values is worse than clamping. If (and only if) 
> there are real users that need to know the total amount of memory 
> allocated for messages queues in one namespace, then we could add a 
> MSG_INFO64 with long values. But I would not add that right now, I do 
> not see a real use case where the value would be needed.
>
> Any other opinions?
>
> -- 
>
>     Manfred
>
>
OK. I will work on it and send it out for review.


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local
  2022-09-18 11:08     ` Manfred Spraul
@ 2022-09-20  6:01       ` Sun, Jiebin
  0 siblings, 0 replies; 40+ messages in thread
From: Sun, Jiebin @ 2022-09-20  6:01 UTC (permalink / raw)
  To: Manfred Spraul, akpm, vasily.averin, shakeelb, dennis, tj, cl,
	ebiederm, legion, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo,
	Tim Chen, kernel test robot, 1vier1


On 9/18/2022 7:08 PM, Manfred Spraul wrote:
> Hi Jiebin,
>
> On 9/13/22 21:25, Jiebin Sun wrote:
>>   +/*
>> + * With percpu_counter_add_local() and percpu_counter_sub_local(), 
>> counts
>> + * are accumulated in local per cpu counter and not in fbc->count until
>> + * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter
>> + * write efficient.
>> + * But percpu_counter_sum(), instead of percpu_counter_read(), needs 
>> to be
>> + * used to add up the counts from each CPU to account for all the local
>> + * counts. So percpu_counter_add_local() and percpu_counter_sub_local()
>> + * should be used when a counter is updated frequently and read rarely.
>> + */
>> +static inline void
>> +percpu_counter_add_local(struct percpu_counter *fbc, s64 amount)
>> +{
>> +    percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH);
>> +}
>> +
>
> Unrelated to your patch, and not relevant for ipc/msg as the functions 
> are not called from interrupts, but:
> Aren't there races with interrupts?
>
>> *
>> * This function is both preempt and irq safe. The former is due to 
>> explicit
>> * preemption disable. The latter is guaranteed by the fact that the 
>> slow path
>> * is explicitly protected by an irq-safe spinlock whereas the fast 
>> patch uses
>> * this_cpu_add which is irq-safe by definition. Hence there is no 
>> need muck
>> * with irq state before calling this one
>> */
>> void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, 
>> s32 batch)
>> {
>>        s64 count;
>>
>>        preempt_disable();
>>        count = __this_cpu_read(*fbc->counters) + amount;
>>        if (abs(count) >= batch) {
>>                unsigned long flags;
>>                raw_spin_lock_irqsave(&fbc->lock, flags);
>>                fbc->count += count;
>>                __this_cpu_sub(*fbc->counters, count - amount);
>>                raw_spin_unlock_irqrestore(&fbc->lock, flags);
>>        } else {
>>                this_cpu_add(*fbc->counters, amount);
>>        }
>>        preempt_enable();
>> }
>> EXPORT_SYMBOL(percpu_counter_add_batch);
>>
>>
> Race 1:
>
> start: __this_cpu_read(*fbc->counters) = INT_MAX-1.
>
> Call: per_cpu_counter_add_batch(fbc, 1, INT_MAX);
>
> Result:
>
> count=INT_MAX;
>
> if (abs(count) >= batch) { // branch taken
>
> before the raw_spin_lock_irqsave():
>
> Interrupt
>
> Within interrupt:
>
>    per_cpu_counter_add_batch(fbc, -2*(INT_MAX-1), INT_MAX)
>
>    count=-(INT_MAX-1);
>
>    branch not taken
>
>    this_cpu_add() updates fbc->counters, new value is -(INT_MAX-1)
>
>    exit interrupt
>
> raw_spin_lock_irqsave()
>
> __this_cpu_sub(*fbc->counters, count - amount)
>
> will substract INT_MAX-1 from *fbc->counters. But the value is already 
> -(INT_MAX-1) -> underflow.
>
>
> Race 2: (much simpler)
>
> start: __this_cpu_read(*fbc->counters) = 0.
>
> Call: per_cpu_counter_add_batch(fbc, INT_MAX-1, INT_MAX);
>
> amont=INT_MAX-1;
>
> - branch not taken.
>
> before this_cpu_add(): interrupt
>
> within the interrupt: call per_cpu_counter_add_batch(fbc, INT_MAX-1, 
> INT_MAX)
>
>    new value of *fbc->counters: INT_MAX-1.
>
>    exit interrupt
>
> outside interrupt:
>
> this_cpu_add(*fbc->counters, amount);
>
> <<< overflow.
>
> Attached is an incomplete patch (untested).
> If needed, I could check the whole file and add/move the required 
> local_irq_save() calls.
>
>
> -- 
>
>     Manfred

The interrupt protect patch in the real case looks good to me. Thanks.




^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH] ipc/msg: avoid negative value by overflow in msginfo
  2022-09-20  4:53         ` Manfred Spraul
  2022-09-20  5:50           ` Sun, Jiebin
@ 2022-09-20 15:08           ` Jiebin Sun
  1 sibling, 0 replies; 40+ messages in thread
From: Jiebin Sun @ 2022-09-20 15:08 UTC (permalink / raw)
  To: akpm, vasily.averin, shakeelb, dennis, tj, cl, ebiederm, legion,
	manfred, alexander.mikhalitsyn, linux-mm, linux-kernel
  Cc: tim.c.chen, feng.tang, ying.huang, tianyou.li, wangyang.guo,
	jiebin.sun, Manfred Spraul, Tim Chen

The 32-bit value in msginfo struct could be negative if we get it
from signed 64-bit. Clamping it to INT_MAX helps to avoid the
negative value by overflow.

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
Reviewed-by: Manfred Spraul <manfred@colorfullif.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 ipc/msg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ipc/msg.c b/ipc/msg.c
index f2bb4c193ecf..65f437e28c9b 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -501,8 +501,8 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
 	max_idx = ipc_get_maxidx(&msg_ids(ns));
 	up_read(&msg_ids(ns).rwsem);
 	if (cmd == MSG_INFO) {
-		msginfo->msgmap = percpu_counter_sum(&ns->percpu_msg_hdrs);
-		msginfo->msgtql = percpu_counter_sum(&ns->percpu_msg_bytes);
+		msginfo->msgmap = min(percpu_counter_sum(&ns->percpu_msg_hdrs), INT_MAX);
+		msginfo->msgtql = min(percpu_counter_sum(&ns->percpu_msg_bytes), INT_MAX);
 	} else {
 		msginfo->msgmap = MSGMAP;
 		msginfo->msgpool = MSGPOOL;
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2022-09-20  6:48 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-02 15:22 [PATCH] ipc/msg.c: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-02 16:06 ` Andrew Morton
2022-09-05 11:54   ` Sun, Jiebin
2022-09-02 16:27 ` Shakeel Butt
2022-09-05 12:02   ` Sun, Jiebin
2022-09-06 18:44   ` Tim Chen
2022-09-07  9:39     ` Sun, Jiebin
2022-09-07 20:43       ` Andrew Morton
2022-09-07 17:25   ` [PATCH v4] ipc/msg: " Jiebin Sun
2022-09-07 16:01     ` Tim Chen
2022-09-07 21:34       ` Andrew Morton
2022-09-07 22:10         ` Tim Chen
2022-09-08  8:25         ` Sun, Jiebin
2022-09-08 15:38           ` Andrew Morton
2022-09-08 16:15             ` Dennis Zhou
2022-09-03 19:35 ` [PATCH] ipc/msg.c: " Manfred Spraul
2022-09-05 12:12   ` Sun, Jiebin
     [not found] ` <20220905193516.846647-1-jiebin.sun@intel.com>
     [not found]   ` <20220905193516.846647-3-jiebin.sun@intel.com>
2022-09-05 19:31     ` [PATCH v2 1/2] percpu: Add percpu_counter_add_local Shakeel Butt
2022-09-06  8:41       ` Sun, Jiebin
2022-09-05 19:35   ` [PATCH v2 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-06 16:54 ` [PATCH v3 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
2022-09-06 16:54   ` [PATCH v3 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-09 20:36 ` [PATCH v5 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
2022-09-09 20:36   ` [PATCH v5 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
2022-09-09 16:37     ` Tim Chen
2022-09-10  1:37     ` kernel test robot
2022-09-10  8:15     ` kernel test robot
2022-09-10  8:26     ` kernel test robot
2022-09-09 20:36   ` [PATCH v5 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-09 16:11     ` Tim Chen
2022-09-13 19:25 ` [PATCH v6 0/2] ipc/msg: mitigate the lock contention in ipc/msg Jiebin Sun
2022-09-13 19:25   ` [PATCH v6 1/2] percpu: Add percpu_counter_add_local and percpu_counter_sub_local Jiebin Sun
2022-09-18 11:08     ` Manfred Spraul
2022-09-20  6:01       ` Sun, Jiebin
2022-09-13 19:25   ` [PATCH v6 2/2] ipc/msg: mitigate the lock contention with percpu counter Jiebin Sun
2022-09-18 12:53     ` Manfred Spraul
2022-09-20  2:36       ` Sun, Jiebin
2022-09-20  4:53         ` Manfred Spraul
2022-09-20  5:50           ` Sun, Jiebin
2022-09-20 15:08           ` [PATCH] ipc/msg: avoid negative value by overflow in msginfo Jiebin Sun

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).