RCU Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size
@ 2020-03-06  1:16 Joel Fernandes (Google)
  2020-03-06  1:16 ` [PATCH rcu-dev 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Joel Fernandes (Google) @ 2020-03-06  1:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	Davidlohr Bueso, Josh Triplett, Lai Jiangshan, Mathieu Desnoyers,
	Paul E. McKenney, rcu, Steven Rostedt, urezki

This allows us to increase memory pressure dynamically using a new
rcuperf boot command line parameter called 'rcumult'.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

---
 kernel/rcu/rcuperf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a8d097d84d9..16dd1e6b7c09f 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
 
 static char *perf_type = "rcu";
 module_param(perf_type, charp, 0444);
@@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
 		}
 
 		for (i = 0; i < kfree_alloc_num; i++) {
-			alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+			alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
 			if (!alloc_ptr)
 				return -ENOMEM;
 
@@ -722,6 +723,8 @@ kfree_perf_init(void)
 		schedule_timeout_uninterruptible(1);
 	}
 
+	pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+
 	kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
 			       GFP_KERNEL);
 	if (kfree_reader_tasks == NULL) {
-- 
2.25.0.265.gbab2e86ba0-goog


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH rcu-dev 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching
  2020-03-06  1:16 [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size Joel Fernandes (Google)
@ 2020-03-06  1:16 ` Joel Fernandes (Google)
  2020-03-06  1:37 ` [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size Paul E. McKenney
  2020-03-09  3:26 ` kbuild test robot
  2 siblings, 0 replies; 4+ messages in thread
From: Joel Fernandes (Google) @ 2020-03-06  1:16 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	urezki, Davidlohr Bueso, Josh Triplett, Lai Jiangshan,
	Mathieu Desnoyers, Paul E. McKenney, rcu, Steven Rostedt

To reduce grace periods and improve kfree() performance, we have done
batching recently dramatically bringing down the number of grace periods
while giving us the ability to use kfree_bulk() for efficient kfree'ing.

However, this has increased the likelihood of OOM condition under heavy
kfree_rcu() flood on small memory systems. This patch introduces a
shrinker which starts grace periods right away if the system is under
memory pressure due to existence of objects that have still not started
a grace period.

With this patch, I do not observe an OOM anymore on a system with 512MB
RAM and 8 CPUs, with the following rcuperf options:

rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000
rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2

Otherwise it easily OOMs with the above parameters.

NOTE:
1. On systems with no memory pressure, the patch has no effect as intended.
2. In the future, we can use this same mechanism to prevent grace periods
   from happening even more, by relying on shrinkers carefully.

Cc: urezki@gmail.com
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 kernel/rcu/tree.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d3f52c30efb0c..2e0f66f04360e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2823,6 +2823,8 @@ struct kfree_rcu_cpu {
 	struct delayed_work monitor_work;
 	bool monitor_todo;
 	bool initialized;
+	// Number of objects for which GP not started
+	int count;
 };
 
 static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2936,6 +2938,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
 				krcp->head = NULL;
 			}
 
+			krcp->count = 0;
+
 			/*
 			 * One work is per one batch, so there are two "free channels",
 			 * "bhead_free" and "head_free" the batch can handle. It can be
@@ -3072,6 +3076,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 		krcp->head = head;
 	}
 
+	krcp->count++;
+
 	// Set timer to drain after KFREE_DRAIN_JIFFIES.
 	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
 	    !krcp->monitor_todo) {
@@ -3086,6 +3092,58 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
+static unsigned long
+kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu;
+	unsigned long flags, count = 0;
+
+	/* Snapshot count of all CPUs */
+	for_each_online_cpu(cpu) {
+		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+		spin_lock_irqsave(&krcp->lock, flags);
+		count += krcp->count;
+		spin_unlock_irqrestore(&krcp->lock, flags);
+	}
+
+	return count;
+}
+
+static unsigned long
+kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu, freed = 0;
+	unsigned long flags;
+
+	for_each_online_cpu(cpu) {
+		int count;
+		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+		count = krcp->count;
+		spin_lock_irqsave(&krcp->lock, flags);
+		if (krcp->monitor_todo)
+			kfree_rcu_drain_unlock(krcp, flags);
+		else
+			spin_unlock_irqrestore(&krcp->lock, flags);
+
+		sc->nr_to_scan -= count;
+		freed += count;
+
+		if (sc->nr_to_scan <= 0)
+			break;
+	}
+
+	return freed;
+}
+
+static struct shrinker kfree_rcu_shrinker = {
+	.count_objects = kfree_rcu_shrink_count,
+	.scan_objects = kfree_rcu_shrink_scan,
+	.batch = 0,
+	.seeks = DEFAULT_SEEKS,
+};
+
 void __init kfree_rcu_scheduler_running(void)
 {
 	int cpu;
@@ -4007,6 +4065,8 @@ static void __init kfree_rcu_batch_init(void)
 		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
 		krcp->initialized = true;
 	}
+	if (register_shrinker(&kfree_rcu_shrinker))
+		pr_err("Failed to register kfree_rcu() shrinker!\n");
 }
 
 void __init rcu_init(void)
-- 
2.25.0.265.gbab2e86ba0-goog


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size
  2020-03-06  1:16 [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size Joel Fernandes (Google)
  2020-03-06  1:16 ` [PATCH rcu-dev 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
@ 2020-03-06  1:37 ` Paul E. McKenney
  2020-03-09  3:26 ` kbuild test robot
  2 siblings, 0 replies; 4+ messages in thread
From: Paul E. McKenney @ 2020-03-06  1:37 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: linux-kernel, Davidlohr Bueso, Josh Triplett, Lai Jiangshan,
	Mathieu Desnoyers, rcu, Steven Rostedt, urezki

On Thu, Mar 05, 2020 at 08:16:25PM -0500, Joel Fernandes (Google) wrote:
> This allows us to increase memory pressure dynamically using a new
> rcuperf boot command line parameter called 'rcumult'.
> 
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

Looks plausible to me at a very quick first glance.  Uladzislau, thoughts?

							Thanx, Paul

> ---
>  kernel/rcu/rcuperf.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
> index a4a8d097d84d9..16dd1e6b7c09f 100644
> --- a/kernel/rcu/rcuperf.c
> +++ b/kernel/rcu/rcuperf.c
> @@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
>  torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
>  torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
>  torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
> +torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
>  
>  static char *perf_type = "rcu";
>  module_param(perf_type, charp, 0444);
> @@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
>  		}
>  
>  		for (i = 0; i < kfree_alloc_num; i++) {
> -			alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
> +			alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
>  			if (!alloc_ptr)
>  				return -ENOMEM;
>  
> @@ -722,6 +723,8 @@ kfree_perf_init(void)
>  		schedule_timeout_uninterruptible(1);
>  	}
>  
> +	pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
> +
>  	kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
>  			       GFP_KERNEL);
>  	if (kfree_reader_tasks == NULL) {
> -- 
> 2.25.0.265.gbab2e86ba0-goog
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size
  2020-03-06  1:16 [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size Joel Fernandes (Google)
  2020-03-06  1:16 ` [PATCH rcu-dev 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
  2020-03-06  1:37 ` [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size Paul E. McKenney
@ 2020-03-09  3:26 ` kbuild test robot
  2 siblings, 0 replies; 4+ messages in thread
From: kbuild test robot @ 2020-03-09  3:26 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: kbuild-all, linux-kernel@vger.kernel.org, Joel Fernandes ,
	Davidlohr Bueso, Josh Triplett, Lai Jiangshan, Mathieu Desnoyers,
	Paul E. McKenney, rcu, Steven Rostedt, urezki

[-- Attachment #1: Type: text/plain, Size: 3132 bytes --]

Hi "Joel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on rcu/dev]
[also build test WARNING on rcu/rcu/next linus/master v5.6-rc5 next-20200306]
[cannot apply to linux/master]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:    https://github.com/0day-ci/linux/commits/Joel-Fernandes-Google/rcuperf-Add-ability-to-increase-object-allocation-size/20200306-110336
base:   https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
config: i386-randconfig-a001-20200308 (attached as .config)
compiler: gcc-4.9 (Debian 4.9.2-10+deb8u1) 4.9.2
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15:0,
                    from kernel/rcu/rcuperf.c:13:
   kernel/rcu/rcuperf.c: In function 'kfree_perf_init':
>> kernel/rcu/rcuperf.c:726:65: warning: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'unsigned int' [-Wformat=]
     pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
                                                                    ^
   include/linux/printk.h:300:35: note: in definition of macro 'pr_alert'
     printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
                                      ^

vim +726 kernel/rcu/rcuperf.c

   708	
   709	static int __init
   710	kfree_perf_init(void)
   711	{
   712		long i;
   713		int firsterr = 0;
   714	
   715		kfree_nrealthreads = compute_real(kfree_nthreads);
   716		/* Start up the kthreads. */
   717		if (shutdown) {
   718			init_waitqueue_head(&shutdown_wq);
   719			firsterr = torture_create_kthread(kfree_perf_shutdown, NULL,
   720							  shutdown_task);
   721			if (firsterr)
   722				goto unwind;
   723			schedule_timeout_uninterruptible(1);
   724		}
   725	
 > 726		pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
   727	
   728		kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
   729				       GFP_KERNEL);
   730		if (kfree_reader_tasks == NULL) {
   731			firsterr = -ENOMEM;
   732			goto unwind;
   733		}
   734	
   735		for (i = 0; i < kfree_nrealthreads; i++) {
   736			firsterr = torture_create_kthread(kfree_perf_thread, (void *)i,
   737							  kfree_reader_tasks[i]);
   738			if (firsterr)
   739				goto unwind;
   740		}
   741	
   742		while (atomic_read(&n_kfree_perf_thread_started) < kfree_nrealthreads)
   743			schedule_timeout_uninterruptible(1);
   744	
   745		torture_init_end();
   746		return 0;
   747	
   748	unwind:
   749		torture_init_end();
   750		kfree_perf_cleanup();
   751		return firsterr;
   752	}
   753	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 33776 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-06  1:16 [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size Joel Fernandes (Google)
2020-03-06  1:16 ` [PATCH rcu-dev 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
2020-03-06  1:37 ` [PATCH rcu-dev 1/2] rcuperf: Add ability to increase object allocation size Paul E. McKenney
2020-03-09  3:26 ` kbuild test robot

RCU Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/rcu/0 rcu/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 rcu rcu/ https://lore.kernel.org/rcu \
		rcu@vger.kernel.org
	public-inbox-index rcu

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.rcu


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git