rcu.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size
@ 2020-03-05 22:13 Joel Fernandes (Google)
  2020-03-05 22:13 ` [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
  2020-03-06  2:17 ` [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size kbuild test robot
  0 siblings, 2 replies; 5+ messages in thread
From: Joel Fernandes (Google) @ 2020-03-05 22:13 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	Davidlohr Bueso, Josh Triplett, Lai Jiangshan, Mathieu Desnoyers,
	Paul E. McKenney, rcu, Steven Rostedt, urezki

This allows us to increase memory pressure dynamically using a new
rcuperf boot command line parameter called 'rcumult'.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

---
 kernel/rcu/rcuperf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index da94b89cd5310..36f0ed75c7cf3 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -87,6 +87,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
 
 static char *perf_type = "rcu";
 module_param(perf_type, charp, 0444);
@@ -627,7 +628,7 @@ kfree_perf_thread(void *arg)
 
 	do {
 		for (i = 0; i < kfree_alloc_num; i++) {
-			alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+			alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
 			if (!alloc_ptr)
 				return -ENOMEM;
 
@@ -712,6 +713,8 @@ kfree_perf_init(void)
 		schedule_timeout_uninterruptible(1);
 	}
 
+	pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+
 	kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
 			       GFP_KERNEL);
 	if (kfree_reader_tasks == NULL) {
-- 
2.25.0.265.gbab2e86ba0-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching
  2020-03-05 22:13 [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size Joel Fernandes (Google)
@ 2020-03-05 22:13 ` Joel Fernandes (Google)
  2020-03-05 22:17   ` Joel Fernandes
  2020-03-06  2:17 ` [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size kbuild test robot
  1 sibling, 1 reply; 5+ messages in thread
From: Joel Fernandes (Google) @ 2020-03-05 22:13 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joel Fernandes (Google),
	urezki, Davidlohr Bueso, Josh Triplett, Lai Jiangshan,
	Mathieu Desnoyers, Paul E. McKenney, rcu, Steven Rostedt

To reduce grace periods and improve kfree() performance, we have done
batching recently dramatically bringing down the number of grace periods
while giving us the ability to use kfree_bulk() for efficient kfree'ing.

However, this has increased the likelihood of OOM condition under heavy
kfree_rcu() flood on small memory systems. This patch introduces a
shrinker which starts grace periods right away if the system is under
memory pressure due to existence of objects that have still not started
a grace period.

With this patch, I do not observe an OOM anymore on a system with 512MB
RAM and 8 CPUs, with the following rcuperf options:

rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000
rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2

NOTE:
On systems with no memory pressure, the patch has no effect as intended.

Cc: urezki@gmail.com
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>

---
 kernel/rcu/tree.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d91c9156fab2e..28ec35e15529d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2723,6 +2723,8 @@ struct kfree_rcu_cpu {
 	struct delayed_work monitor_work;
 	bool monitor_todo;
 	bool initialized;
+	// Number of objects for which GP not started
+	int count;
 };
 
 static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2791,6 +2793,7 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
 
 	krwp->head_free = krcp->head;
 	krcp->head = NULL;
+	krcp->count = 0;
 	INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work);
 	queue_rcu_work(system_wq, &krwp->rcu_work);
 	return true;
@@ -2864,6 +2867,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 	head->func = func;
 	head->next = krcp->head;
 	krcp->head = head;
+	krcp->count++;
 
 	// Set timer to drain after KFREE_DRAIN_JIFFIES.
 	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
@@ -2879,6 +2883,58 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
+static unsigned long
+kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu;
+	unsigned long flags, count = 0;
+
+	/* Snapshot count of all CPUs */
+	for_each_online_cpu(cpu) {
+		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+		spin_lock_irqsave(&krcp->lock, flags);
+		count += krcp->count;
+		spin_unlock_irqrestore(&krcp->lock, flags);
+	}
+
+	return count;
+}
+
+static unsigned long
+kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	int cpu, freed = 0;
+	unsigned long flags;
+
+	for_each_online_cpu(cpu) {
+		int count;
+		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+		count = krcp->count;
+		spin_lock_irqsave(&krcp->lock, flags);
+		if (krcp->monitor_todo)
+			kfree_rcu_drain_unlock(krcp, flags);
+		else
+			spin_unlock_irqrestore(&krcp->lock, flags);
+
+		sc->nr_to_scan -= count;
+		freed += count;
+
+		if (sc->nr_to_scan <= 0)
+			break;
+	}
+
+	return freed;
+}
+
+static struct shrinker kfree_rcu_shrinker = {
+	.count_objects = kfree_rcu_shrink_count,
+	.scan_objects = kfree_rcu_shrink_scan,
+	.batch = 0,
+	.seeks = DEFAULT_SEEKS,
+};
+
 void __init kfree_rcu_scheduler_running(void)
 {
 	int cpu;
@@ -3774,6 +3830,8 @@ static void __init kfree_rcu_batch_init(void)
 		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
 		krcp->initialized = true;
 	}
+	if (register_shrinker(&kfree_rcu_shrinker))
+		pr_err("Failed to register kfree_rcu() shrinker!\n");
 }
 
 void __init rcu_init(void)
-- 
2.25.0.265.gbab2e86ba0-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching
  2020-03-05 22:13 ` [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
@ 2020-03-05 22:17   ` Joel Fernandes
  2020-03-05 22:25     ` Joel Fernandes
  0 siblings, 1 reply; 5+ messages in thread
From: Joel Fernandes @ 2020-03-05 22:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: urezki, Davidlohr Bueso, Josh Triplett, Lai Jiangshan,
	Mathieu Desnoyers, Paul E. McKenney, rcu, Steven Rostedt

On Thu, Mar 05, 2020 at 05:13:23PM -0500, Joel Fernandes (Google) wrote:
> To reduce grace periods and improve kfree() performance, we have done
> batching recently dramatically bringing down the number of grace periods
> while giving us the ability to use kfree_bulk() for efficient kfree'ing.
> 
> However, this has increased the likelihood of OOM condition under heavy
> kfree_rcu() flood on small memory systems. This patch introduces a
> shrinker which starts grace periods right away if the system is under
> memory pressure due to existence of objects that have still not started
> a grace period.
> 
> With this patch, I do not observe an OOM anymore on a system with 512MB
> RAM and 8 CPUs, with the following rcuperf options:
> 
> rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000
> rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2

Paul,
I may have to rebase this patch on top of Vlad's kfree_bulk() work. But let
us discuss patch and I can rebase it and repost it once patch looks Ok to
you. (The kfree_bulk() work should not affect the patch).

thanks,

 - Joel


> 
> NOTE:
> On systems with no memory pressure, the patch has no effect as intended.
> 
> Cc: urezki@gmail.com
> Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
> 
> ---
>  kernel/rcu/tree.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 58 insertions(+)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index d91c9156fab2e..28ec35e15529d 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2723,6 +2723,8 @@ struct kfree_rcu_cpu {
>  	struct delayed_work monitor_work;
>  	bool monitor_todo;
>  	bool initialized;
> +	// Number of objects for which GP not started
> +	int count;
>  };
>  
>  static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
> @@ -2791,6 +2793,7 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
>  
>  	krwp->head_free = krcp->head;
>  	krcp->head = NULL;
> +	krcp->count = 0;
>  	INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work);
>  	queue_rcu_work(system_wq, &krwp->rcu_work);
>  	return true;
> @@ -2864,6 +2867,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  	head->func = func;
>  	head->next = krcp->head;
>  	krcp->head = head;
> +	krcp->count++;
>  
>  	// Set timer to drain after KFREE_DRAIN_JIFFIES.
>  	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
> @@ -2879,6 +2883,58 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  }
>  EXPORT_SYMBOL_GPL(kfree_call_rcu);
>  
> +static unsigned long
> +kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	int cpu;
> +	unsigned long flags, count = 0;
> +
> +	/* Snapshot count of all CPUs */
> +	for_each_online_cpu(cpu) {
> +		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
> +
> +		spin_lock_irqsave(&krcp->lock, flags);
> +		count += krcp->count;
> +		spin_unlock_irqrestore(&krcp->lock, flags);
> +	}
> +
> +	return count;
> +}
> +
> +static unsigned long
> +kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> +	int cpu, freed = 0;
> +	unsigned long flags;
> +
> +	for_each_online_cpu(cpu) {
> +		int count;
> +		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
> +
> +		count = krcp->count;
> +		spin_lock_irqsave(&krcp->lock, flags);
> +		if (krcp->monitor_todo)
> +			kfree_rcu_drain_unlock(krcp, flags);
> +		else
> +			spin_unlock_irqrestore(&krcp->lock, flags);
> +
> +		sc->nr_to_scan -= count;
> +		freed += count;
> +
> +		if (sc->nr_to_scan <= 0)
> +			break;
> +	}
> +
> +	return freed;
> +}
> +
> +static struct shrinker kfree_rcu_shrinker = {
> +	.count_objects = kfree_rcu_shrink_count,
> +	.scan_objects = kfree_rcu_shrink_scan,
> +	.batch = 0,
> +	.seeks = DEFAULT_SEEKS,
> +};
> +
>  void __init kfree_rcu_scheduler_running(void)
>  {
>  	int cpu;
> @@ -3774,6 +3830,8 @@ static void __init kfree_rcu_batch_init(void)
>  		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
>  		krcp->initialized = true;
>  	}
> +	if (register_shrinker(&kfree_rcu_shrinker))
> +		pr_err("Failed to register kfree_rcu() shrinker!\n");
>  }
>  
>  void __init rcu_init(void)
> -- 
> 2.25.0.265.gbab2e86ba0-goog
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching
  2020-03-05 22:17   ` Joel Fernandes
@ 2020-03-05 22:25     ` Joel Fernandes
  0 siblings, 0 replies; 5+ messages in thread
From: Joel Fernandes @ 2020-03-05 22:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: urezki, Davidlohr Bueso, Josh Triplett, Lai Jiangshan,
	Mathieu Desnoyers, Paul E. McKenney, rcu, Steven Rostedt

On Thu, Mar 05, 2020 at 05:17:53PM -0500, Joel Fernandes wrote:
> On Thu, Mar 05, 2020 at 05:13:23PM -0500, Joel Fernandes (Google) wrote:
> > To reduce grace periods and improve kfree() performance, we have done
> > batching recently dramatically bringing down the number of grace periods
> > while giving us the ability to use kfree_bulk() for efficient kfree'ing.
> > 
> > However, this has increased the likelihood of OOM condition under heavy
> > kfree_rcu() flood on small memory systems. This patch introduces a
> > shrinker which starts grace periods right away if the system is under
> > memory pressure due to existence of objects that have still not started
> > a grace period.
> > 
> > With this patch, I do not observe an OOM anymore on a system with 512MB
> > RAM and 8 CPUs, with the following rcuperf options:
> > 
> > rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000
> > rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2
> 
> Paul,
> I may have to rebase this patch on top of Vlad's kfree_bulk() work. But let
> us discuss patch and I can rebase it and repost it once patch looks Ok to
> you. (The kfree_bulk() work should not affect the patch).

BTW, we can also use the scheme in the future to keep garbage uncollected
until memory pressure. That way you defer grace periods for longer similar to
the paper [1], until the MM layer thinks the party is over. For one, I am not
too confident about the shrinker's ability to handle transient memory spikes.
If I remember, the shrinker is best-effort.

But one step at a time :)

thanks,

 - Joel

[1] https://dl.acm.org/doi/10.1145/3190508.3190522


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size
  2020-03-05 22:13 [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size Joel Fernandes (Google)
  2020-03-05 22:13 ` [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
@ 2020-03-06  2:17 ` kbuild test robot
  1 sibling, 0 replies; 5+ messages in thread
From: kbuild test robot @ 2020-03-06  2:17 UTC (permalink / raw)
  To: Joel Fernandes (Google)
  Cc: kbuild-all, linux-kernel@vger.kernel.org, Joel Fernandes ,
	Davidlohr Bueso, Josh Triplett, Lai Jiangshan, Mathieu Desnoyers,
	Paul E. McKenney, rcu, Steven Rostedt, urezki

[-- Attachment #1: Type: text/plain, Size: 4681 bytes --]

Hi "Joel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on linus/master]

url:    https://github.com/0day-ci/linux/commits/Joel-Fernandes-Google/rcuperf-Add-ability-to-increase-object-allocation-size/20200306-064829
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 9f65ed5fe41ce08ed1cb1f6a950f9ec694c142ad
config: c6x-allyesconfig (attached as .config)
compiler: c6x-elf-gcc (GCC) 7.5.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.5.0 make.cross ARCH=c6x 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/printk.h:7:0,
                    from include/linux/kernel.h:15,
                    from kernel//rcu/rcuperf.c:13:
   kernel//rcu/rcuperf.c: In function 'kfree_perf_init':
   include/linux/kern_levels.h:5:18: warning: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'unsigned int' [-Wformat=]
    #define KERN_SOH "\001"  /* ASCII Start Of Header */
                     ^
   include/linux/kern_levels.h:9:20: note: in expansion of macro 'KERN_SOH'
    #define KERN_ALERT KERN_SOH "1" /* action must be taken immediately */
                       ^~~~~~~~
>> include/linux/printk.h:300:9: note: in expansion of macro 'KERN_ALERT'
     printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
            ^~~~~~~~~~
>> kernel//rcu/rcuperf.c:716:2: note: in expansion of macro 'pr_alert'
     pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
     ^~~~~~~~
   kernel//rcu/rcuperf.c:716:32: note: format string is defined here
     pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
                                 ~~^
                                 %u
--
   In file included from include/linux/printk.h:7:0,
                    from include/linux/kernel.h:15,
                    from kernel/rcu/rcuperf.c:13:
   kernel/rcu/rcuperf.c: In function 'kfree_perf_init':
   include/linux/kern_levels.h:5:18: warning: format '%lu' expects argument of type 'long unsigned int', but argument 2 has type 'unsigned int' [-Wformat=]
    #define KERN_SOH "\001"  /* ASCII Start Of Header */
                     ^
   include/linux/kern_levels.h:9:20: note: in expansion of macro 'KERN_SOH'
    #define KERN_ALERT KERN_SOH "1" /* action must be taken immediately */
                       ^~~~~~~~
>> include/linux/printk.h:300:9: note: in expansion of macro 'KERN_ALERT'
     printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
            ^~~~~~~~~~
   kernel/rcu/rcuperf.c:716:2: note: in expansion of macro 'pr_alert'
     pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
     ^~~~~~~~
   kernel/rcu/rcuperf.c:716:32: note: format string is defined here
     pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
                                 ~~^
                                 %u

vim +/pr_alert +716 kernel//rcu/rcuperf.c

   698	
   699	static int __init
   700	kfree_perf_init(void)
   701	{
   702		long i;
   703		int firsterr = 0;
   704	
   705		kfree_nrealthreads = compute_real(kfree_nthreads);
   706		/* Start up the kthreads. */
   707		if (shutdown) {
   708			init_waitqueue_head(&shutdown_wq);
   709			firsterr = torture_create_kthread(kfree_perf_shutdown, NULL,
   710							  shutdown_task);
   711			if (firsterr)
   712				goto unwind;
   713			schedule_timeout_uninterruptible(1);
   714		}
   715	
 > 716		pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
   717	
   718		kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
   719				       GFP_KERNEL);
   720		if (kfree_reader_tasks == NULL) {
   721			firsterr = -ENOMEM;
   722			goto unwind;
   723		}
   724	
   725		for (i = 0; i < kfree_nrealthreads; i++) {
   726			firsterr = torture_create_kthread(kfree_perf_thread, (void *)i,
   727							  kfree_reader_tasks[i]);
   728			if (firsterr)
   729				goto unwind;
   730		}
   731	
   732		while (atomic_read(&n_kfree_perf_thread_started) < kfree_nrealthreads)
   733			schedule_timeout_uninterruptible(1);
   734	
   735		torture_init_end();
   736		return 0;
   737	
   738	unwind:
   739		torture_init_end();
   740		kfree_perf_cleanup();
   741		return firsterr;
   742	}
   743	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 51605 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-03-06  2:17 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-05 22:13 [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size Joel Fernandes (Google)
2020-03-05 22:13 ` [PATCH linus/master 2/2] rcu/tree: Add a shrinker to prevent OOM due to kfree_rcu() batching Joel Fernandes (Google)
2020-03-05 22:17   ` Joel Fernandes
2020-03-05 22:25     ` Joel Fernandes
2020-03-06  2:17 ` [PATCH linus/master 1/2] rcuperf: Add ability to increase object allocation size kbuild test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).