linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5] kvfree_rcu: Release page cache under memory pressure
@ 2021-02-11 12:10 qiang.zhang
  2021-02-12 15:45 ` Uladzislau Rezki
  0 siblings, 1 reply; 2+ messages in thread
From: qiang.zhang @ 2021-02-11 12:10 UTC (permalink / raw)
  To: urezki, paulmck; +Cc: joel, rcu, linux-kernel

From: Zqiang <qiang.zhang@windriver.com>

Add free per-cpu existing krcp's page cache operation in shrink callback
function, and also during shrink period, simple delay schedule fill page
work, to avoid refill page while free krcp page cache.

Signed-off-by: Zqiang <qiang.zhang@windriver.com>
Co-developed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 v1->v4:
 During the test a page shrinker is pretty active, because of low memory
 condition. callback drains it whereas kvfree_rcu() part refill it right
 away making kind of vicious circle.
 Through Vlad Rezki suggestion, to avoid this, schedule a periodic delayed
 work with HZ, and it's easy to do that.
 v4->v5:
 change commit message and use xchg replace WRITE_ONCE()

 kernel/rcu/tree.c | 49 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c1ae1e52f638..f1fba23f5036 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3139,7 +3139,7 @@ struct kfree_rcu_cpu {
 	bool initialized;
 	int count;
 
-	struct work_struct page_cache_work;
+	struct delayed_work page_cache_work;
 	atomic_t work_in_progress;
 	struct hrtimer hrtimer;
 
@@ -3395,7 +3395,7 @@ schedule_page_work_fn(struct hrtimer *t)
 	struct kfree_rcu_cpu *krcp =
 		container_of(t, struct kfree_rcu_cpu, hrtimer);
 
-	queue_work(system_highpri_wq, &krcp->page_cache_work);
+	queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0);
 	return HRTIMER_NORESTART;
 }
 
@@ -3404,7 +3404,7 @@ static void fill_page_cache_func(struct work_struct *work)
 	struct kvfree_rcu_bulk_data *bnode;
 	struct kfree_rcu_cpu *krcp =
 		container_of(work, struct kfree_rcu_cpu,
-			page_cache_work);
+			page_cache_work.work);
 	unsigned long flags;
 	bool pushed;
 	int i;
@@ -3428,15 +3428,21 @@ static void fill_page_cache_func(struct work_struct *work)
 	atomic_set(&krcp->work_in_progress, 0);
 }
 
+static atomic_t backoff_page_cache_fill = ATOMIC_INIT(0);
+
 static void
 run_page_cache_worker(struct kfree_rcu_cpu *krcp)
 {
 	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
 			!atomic_xchg(&krcp->work_in_progress, 1)) {
-		hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
-			HRTIMER_MODE_REL);
-		krcp->hrtimer.function = schedule_page_work_fn;
-		hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+		if (atomic_xchg(&backoff_page_cache_fill, 0)) {
+			queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, HZ);
+		} else {
+			hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
+				HRTIMER_MODE_REL);
+			krcp->hrtimer.function = schedule_page_work_fn;
+			hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
+		}
 	}
 }
 
@@ -3571,19 +3577,44 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(kvfree_call_rcu);
 
+static int free_krc_page_cache(struct kfree_rcu_cpu *krcp)
+{
+	unsigned long flags;
+	struct llist_node *page_list, *pos, *n;
+	int freed = 0;
+
+	raw_spin_lock_irqsave(&krcp->lock, flags);
+	page_list = llist_del_all(&krcp->bkvcache);
+	krcp->nr_bkv_objs = 0;
+	raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+	llist_for_each_safe(pos, n, page_list) {
+		free_page((unsigned long)pos);
+		freed++;
+	}
+
+	return freed;
+}
+
 static unsigned long
 kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 {
 	int cpu;
 	unsigned long count = 0;
+	unsigned long flags;
 
 	/* Snapshot count of all CPUs */
 	for_each_possible_cpu(cpu) {
 		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
 
 		count += READ_ONCE(krcp->count);
+
+		raw_spin_lock_irqsave(&krcp->lock, flags);
+		count += krcp->nr_bkv_objs;
+		raw_spin_unlock_irqrestore(&krcp->lock, flags);
 	}
 
+	atomic_set(&backoff_page_cache_fill, 1);
 	return count;
 }
 
@@ -3598,6 +3629,8 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
 
 		count = krcp->count;
+		count += free_krc_page_cache(krcp);
+
 		raw_spin_lock_irqsave(&krcp->lock, flags);
 		if (krcp->monitor_todo)
 			kfree_rcu_drain_unlock(krcp, flags);
@@ -4574,7 +4607,7 @@ static void __init kfree_rcu_batch_init(void)
 		}
 
 		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
-		INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
+		INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
 		krcp->initialized = true;
 	}
 	if (register_shrinker(&kfree_rcu_shrinker))
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v5] kvfree_rcu: Release page cache under memory pressure
  2021-02-11 12:10 [PATCH v5] kvfree_rcu: Release page cache under memory pressure qiang.zhang
@ 2021-02-12 15:45 ` Uladzislau Rezki
  0 siblings, 0 replies; 2+ messages in thread
From: Uladzislau Rezki @ 2021-02-12 15:45 UTC (permalink / raw)
  To: qiang.zhang; +Cc: urezki, paulmck, joel, rcu, linux-kernel

> From: Zqiang <qiang.zhang@windriver.com>
> 
> Add free per-cpu existing krcp's page cache operation in shrink callback
> function, and also during shrink period, simple delay schedule fill page
> work, to avoid refill page while free krcp page cache.
> 
> Signed-off-by: Zqiang <qiang.zhang@windriver.com>
> Co-developed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  v1->v4:
>  During the test a page shrinker is pretty active, because of low memory
>  condition. callback drains it whereas kvfree_rcu() part refill it right
>  away making kind of vicious circle.
>  Through Vlad Rezki suggestion, to avoid this, schedule a periodic delayed
>  work with HZ, and it's easy to do that.
>  v4->v5:
>  change commit message and use xchg replace WRITE_ONCE()
> 
>  kernel/rcu/tree.c | 49 +++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 41 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index c1ae1e52f638..f1fba23f5036 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -3139,7 +3139,7 @@ struct kfree_rcu_cpu {
>  	bool initialized;
>  	int count;
>  
> -	struct work_struct page_cache_work;
> +	struct delayed_work page_cache_work;
>  	atomic_t work_in_progress;
>  	struct hrtimer hrtimer;
>  
> @@ -3395,7 +3395,7 @@ schedule_page_work_fn(struct hrtimer *t)
>  	struct kfree_rcu_cpu *krcp =
>  		container_of(t, struct kfree_rcu_cpu, hrtimer);
>  
> -	queue_work(system_highpri_wq, &krcp->page_cache_work);
> +	queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, 0);
>  	return HRTIMER_NORESTART;
>  }
>  
> @@ -3404,7 +3404,7 @@ static void fill_page_cache_func(struct work_struct *work)
>  	struct kvfree_rcu_bulk_data *bnode;
>  	struct kfree_rcu_cpu *krcp =
>  		container_of(work, struct kfree_rcu_cpu,
> -			page_cache_work);
> +			page_cache_work.work);
>  	unsigned long flags;
>  	bool pushed;
>  	int i;
> @@ -3428,15 +3428,21 @@ static void fill_page_cache_func(struct work_struct *work)
>  	atomic_set(&krcp->work_in_progress, 0);
>  }
>  
> +static atomic_t backoff_page_cache_fill = ATOMIC_INIT(0);
> +
Should we initialize a static atomic_t? It is zero by default.

>  static void
>  run_page_cache_worker(struct kfree_rcu_cpu *krcp)
>  {
>  	if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
>  			!atomic_xchg(&krcp->work_in_progress, 1)) {
> -		hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
> -			HRTIMER_MODE_REL);
> -		krcp->hrtimer.function = schedule_page_work_fn;
> -		hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
> +		if (atomic_xchg(&backoff_page_cache_fill, 0)) {
> +			queue_delayed_work(system_highpri_wq, &krcp->page_cache_work, HZ);
system_wq? It is not so critical, anyway the job is rearmed with 1 second interval.

> +		} else {
> +			hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
> +				HRTIMER_MODE_REL);
> +			krcp->hrtimer.function = schedule_page_work_fn;
> +			hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
> +		}
>  	}
>  }
>  
> @@ -3571,19 +3577,44 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
>  }
>  EXPORT_SYMBOL_GPL(kvfree_call_rcu);
>  
> +static int free_krc_page_cache(struct kfree_rcu_cpu *krcp)
> +{
> +	unsigned long flags;
> +	struct llist_node *page_list, *pos, *n;
> +	int freed = 0;
> +
> +	raw_spin_lock_irqsave(&krcp->lock, flags);
> +	page_list = llist_del_all(&krcp->bkvcache);
> +	krcp->nr_bkv_objs = 0;
> +	raw_spin_unlock_irqrestore(&krcp->lock, flags);
> +
> +	llist_for_each_safe(pos, n, page_list) {
> +		free_page((unsigned long)pos);
> +		freed++;
> +	}
> +
> +	return freed;
> +}
> +
>  static unsigned long
>  kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
>  {
>  	int cpu;
>  	unsigned long count = 0;
> +	unsigned long flags;
>  
>  	/* Snapshot count of all CPUs */
>  	for_each_possible_cpu(cpu) {
>  		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
>  
>  		count += READ_ONCE(krcp->count);
> +
> +		raw_spin_lock_irqsave(&krcp->lock, flags);
> +		count += krcp->nr_bkv_objs;
> +		raw_spin_unlock_irqrestore(&krcp->lock, flags);
>  	}
>  
> +	atomic_set(&backoff_page_cache_fill, 1);
>  	return count;
>  }
>  
> @@ -3598,6 +3629,8 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>  		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
>  
>  		count = krcp->count;
> +		count += free_krc_page_cache(krcp);
> +
>  		raw_spin_lock_irqsave(&krcp->lock, flags);
>  		if (krcp->monitor_todo)
>  			kfree_rcu_drain_unlock(krcp, flags);
> @@ -4574,7 +4607,7 @@ static void __init kfree_rcu_batch_init(void)
>  		}
>  
>  		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
> -		INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
> +		INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
>  		krcp->initialized = true;
>  	}
>  	if (register_shrinker(&kfree_rcu_shrinker))
> -- 
> 2.25.1
> 
I have reviewed and tested that patch. Even though it can not be applied
cleanly on the latest Paul "dev" branch feel free to use:

Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Tested-by: Uladzislau Rezki (Sony) <urezki@gmail.com>

Also i placed some small nits, because it should be rebased on latest dev.
As for commit message i guess Paul will help :)

Thank you!

--
Vlad Rezki

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-02-12 15:46 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-11 12:10 [PATCH v5] kvfree_rcu: Release page cache under memory pressure qiang.zhang
2021-02-12 15:45 ` Uladzislau Rezki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).