linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 1/4] mm: reduce the impaction of page reporing worker
@ 2020-04-12  9:08 liliangleo
  2020-04-13 14:59 ` Alexander Duyck
  0 siblings, 1 reply; 3+ messages in thread
From: liliangleo @ 2020-04-12  9:08 UTC (permalink / raw)
  To: Alexander Duyck, Mel Gorman, linux-mm, linux-kernel,
	Andrea Arcangeli, Dan Williams, Dave Hansen, David Hildenbrand,
	Michal Hocko, Andrew Morton, Alex Williamson

When scaning the free list, 'page_reporting_cycle' may hold the
zone->lock for a long time when there are no reported page in the
free list. Setting PAGE_REPORTING_MIN_ORDER to a lower oder will
make this issue worse.

Two ways were used to reduce the impact:
   1. Release zone lock periodicly
   2. Yield cpu voluntarily if needed.

Signed-off-by: liliangleo <liliangleo@didiglobal.com>
---
 mm/page_reporting.c | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index 3bbd471cfc81..3a7084e508e1 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -6,11 +6,14 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
+#include <linux/sched.h>
 
 #include "page_reporting.h"
 #include "internal.h"
 
 #define PAGE_REPORTING_DELAY	(2 * HZ)
+#define MAX_SCAN_NUM 1024
+
 static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
 
 enum {
@@ -115,7 +118,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
 	unsigned int page_len = PAGE_SIZE << order;
 	struct page *page, *next;
 	long budget;
-	int err = 0;
+	int err = 0, scan_cnt = 0;
 
 	/*
 	 * Perform early check, if free area is empty there is
@@ -145,8 +148,14 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
 	/* loop through free list adding unreported pages to sg list */
 	list_for_each_entry_safe(page, next, list, lru) {
 		/* We are going to skip over the reported pages. */
-		if (PageReported(page))
+		if (PageReported(page)) {
+			if (++scan_cnt >= MAX_SCAN_NUM) {
+				err = scan_cnt;
+				break;
+			}
 			continue;
+		}
+
 
 		/*
 		 * If we fully consumed our budget then update our
@@ -219,6 +228,26 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
 	return err;
 }
 
+static int
+reporting_order_type(struct page_reporting_dev_info *prdev, struct zone *zone,
+		     unsigned int order, unsigned int mt,
+		     struct scatterlist *sgl, unsigned int *offset)
+{
+	int ret = 0;
+	unsigned long total = 0;
+
+	might_sleep();
+	do {
+		cond_resched();
+		ret = page_reporting_cycle(prdev, zone, order, mt,
+					   sgl, offset);
+		if (ret > 0)
+			total += ret;
+	} while (ret > 0 && total < zone->free_area[order].nr_free);
+
+	return ret;
+}
+
 static int
 page_reporting_process_zone(struct page_reporting_dev_info *prdev,
 			    struct scatterlist *sgl, struct zone *zone)
@@ -245,7 +274,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev,
 			if (is_migrate_isolate(mt))
 				continue;
 
-			err = page_reporting_cycle(prdev, zone, order, mt,
+			err = reporting_order_type(prdev, zone, order, mt,
 						   sgl, &offset);
 			if (err)
 				return err;
-- 
2.14.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH 1/4] mm: reduce the impaction of page reporing worker
  2020-04-12  9:08 [RFC PATCH 1/4] mm: reduce the impaction of page reporing worker liliangleo
@ 2020-04-13 14:59 ` Alexander Duyck
  2020-04-16  1:12   ` Liang Li
  0 siblings, 1 reply; 3+ messages in thread
From: Alexander Duyck @ 2020-04-13 14:59 UTC (permalink / raw)
  To: Mel Gorman, linux-mm, linux-kernel, Andrea Arcangeli,
	Dan Williams, Dave Hansen, David Hildenbrand, Michal Hocko,
	Andrew Morton, Alex Williamson

On 4/12/2020 2:08 AM, liliangleo wrote:
> When scaning the free list, 'page_reporting_cycle' may hold the
> zone->lock for a long time when there are no reported page in the
> free list. Setting PAGE_REPORTING_MIN_ORDER to a lower oder will
> make this issue worse.
> 
> Two ways were used to reduce the impact:
>     1. Release zone lock periodicly
>     2. Yield cpu voluntarily if needed.
> 
> Signed-off-by: liliangleo <liliangleo@didiglobal.com>

One of the reasons why I had limited this to no lower than pageblock 
order was in order to keep number of pages we would have to walk in each 
list on the smaller side.

Also the lock ends up being released every time we report a batch of 
pages. It might make more sense to look at calling cond_resched after a 
batch as been submitted rather than try to introduce a new loop around 
page_reporting_cycle.

> ---
>   mm/page_reporting.c | 35 ++++++++++++++++++++++++++++++++---
>   1 file changed, 32 insertions(+), 3 deletions(-)
> 
> diff --git a/mm/page_reporting.c b/mm/page_reporting.c
> index 3bbd471cfc81..3a7084e508e1 100644
> --- a/mm/page_reporting.c
> +++ b/mm/page_reporting.c
> @@ -6,11 +6,14 @@
>   #include <linux/export.h>
>   #include <linux/delay.h>
>   #include <linux/scatterlist.h>
> +#include <linux/sched.h>
>   
>   #include "page_reporting.h"
>   #include "internal.h"
>   
>   #define PAGE_REPORTING_DELAY	(2 * HZ)
> +#define MAX_SCAN_NUM 1024
> +
>   static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
>   
>   enum {
> @@ -115,7 +118,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
>   	unsigned int page_len = PAGE_SIZE << order;
>   	struct page *page, *next;
>   	long budget;
> -	int err = 0;
> +	int err = 0, scan_cnt = 0;
>   
>   	/*
>   	 * Perform early check, if free area is empty there is
> @@ -145,8 +148,14 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
>   	/* loop through free list adding unreported pages to sg list */
>   	list_for_each_entry_safe(page, next, list, lru) {
>   		/* We are going to skip over the reported pages. */
> -		if (PageReported(page))
> +		if (PageReported(page)) {
> +			if (++scan_cnt >= MAX_SCAN_NUM) {
> +				err = scan_cnt;
> +				break;
> +			}
>   			continue;
> +		}
> +
>   
>   		/*
>   		 * If we fully consumed our budget then update our

Why add yet another loopvariable, why not just move our budget test to 
before the PageReported check and then increase the value?

> @@ -219,6 +228,26 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
>   	return err;
>   }
>   
> +static int
> +reporting_order_type(struct page_reporting_dev_info *prdev, struct zone *zone,
> +		     unsigned int order, unsigned int mt,
> +		     struct scatterlist *sgl, unsigned int *offset)
> +{
> +	int ret = 0;
> +	unsigned long total = 0;
> +
> +	might_sleep();
> +	do {
> +		cond_resched();
> +		ret = page_reporting_cycle(prdev, zone, order, mt,
> +					   sgl, offset);
> +		if (ret > 0)
> +			total += ret;
> +	} while (ret > 0 && total < zone->free_area[order].nr_free);
> +
> +	return ret;
> +}
> +

The idea behind page reporting is it is supposed to happen while the 
system is idle. As such we don't need to be in a hurry. I would get rid 
of the loop and just let the natural placing take over so that we are 
only processing something like 1/8 of the nr_free with each pass.

>   static int
>   page_reporting_process_zone(struct page_reporting_dev_info *prdev,
>   			    struct scatterlist *sgl, struct zone *zone)
> @@ -245,7 +274,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev,
>   			if (is_migrate_isolate(mt))
>   				continue;
>   
> -			err = page_reporting_cycle(prdev, zone, order, mt,
> +			err = reporting_order_type(prdev, zone, order, mt,
>   						   sgl, &offset);
>   			if (err)
>   				return err;
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC PATCH 1/4] mm: reduce the impaction of page reporing worker
  2020-04-13 14:59 ` Alexander Duyck
@ 2020-04-16  1:12   ` Liang Li
  0 siblings, 0 replies; 3+ messages in thread
From: Liang Li @ 2020-04-16  1:12 UTC (permalink / raw)
  To: Alexander Duyck, liliang.opensource, liliang324
  Cc: Mel Gorman, linux-mm, linux-kernel, Andrea Arcangeli,
	Dan Williams, Dave Hansen, David Hildenbrand, Michal Hocko,
	Andrew Morton, Alex Williamson

On Mon, Apr 13, 2020 at 10:59 PM Alexander Duyck
<alexander.h.duyck@linux.intel.com> wrote:
>
> On 4/12/2020 2:08 AM, liliangleo wrote:
> > When scaning the free list, 'page_reporting_cycle' may hold the
> > zone->lock for a long time when there are no reported page in the
> > free list. Setting PAGE_REPORTING_MIN_ORDER to a lower oder will
> > make this issue worse.
> >
> > Two ways were used to reduce the impact:
> >     1. Release zone lock periodicly
> >     2. Yield cpu voluntarily if needed.
> >
> > Signed-off-by: liliangleo <liliangleo@didiglobal.com>
>
> One of the reasons why I had limited this to no lower than pageblock
> order was in order to keep number of pages we would have to walk in each
> list on the smaller side.
>
> Also the lock ends up being released every time we report a batch of
> pages. It might make more sense to look at calling cond_resched after a
> batch as been submitted rather than try to introduce a new loop around
> page_reporting_cycle.
>

Hi Alexander,

My original intention is to prevent 'page_reporting_cycle' hold the
zone lock for too long
when scanning free list but there is very few pages need to report.
For PG_zero use case, it's better for users to decide the page order,
if the order is set to
a low order, the impaction will be much more serious.
Just call cond_resched after batch submission is not enough, that's
the reason why I add
cond_resched.

> >   static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
> >
> >   enum {
> > @@ -115,7 +118,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
> >       unsigned int page_len = PAGE_SIZE << order;
> >       struct page *page, *next;
> >       long budget;
> > -     int err = 0;
> > +     int err = 0, scan_cnt = 0;
> >
> >       /*
> >        * Perform early check, if free area is empty there is
> > @@ -145,8 +148,14 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
> >       /* loop through free list adding unreported pages to sg list */
> >       list_for_each_entry_safe(page, next, list, lru) {
> >               /* We are going to skip over the reported pages. */
> > -             if (PageReported(page))
> > +             if (PageReported(page)) {
> > +                     if (++scan_cnt >= MAX_SCAN_NUM) {
> > +                             err = scan_cnt;
> > +                             break;
> > +                     }
> >                       continue;
> > +             }
> > +
> >
> >               /*
> >                * If we fully consumed our budget then update our
>
> Why add yet another loopvariable, why not just move our budget test to
> before the PageReported check and then increase the value?
>

The code can be refined, I just don't want to break the budget stuff.

Thanks for your feedback your work.

Liang

> > @@ -219,6 +228,26 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
> >       return err;
> >   }
> >
> > +static int
> > +reporting_order_type(struct page_reporting_dev_info *prdev, struct zone *zone,
> > +                  unsigned int order, unsigned int mt,
> > +                  struct scatterlist *sgl, unsigned int *offset)
> > +{
> > +     int ret = 0;
> > +     unsigned long total = 0;
> > +
> > +     might_sleep();
> > +     do {
> > +             cond_resched();
> > +             ret = page_reporting_cycle(prdev, zone, order, mt,
> > +                                        sgl, offset);
> > +             if (ret > 0)
> > +                     total += ret;
> > +     } while (ret > 0 && total < zone->free_area[order].nr_free);
> > +
> > +     return ret;
> > +}
> > +
>
> The idea behind page reporting is it is supposed to happen while the
> system is idle. As such we don't need to be in a hurry. I would get rid
> of the loop and just let the natural placing take over so that we are
> only processing something like 1/8 of the nr_free with each pass.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-04-16  1:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-12  9:08 [RFC PATCH 1/4] mm: reduce the impaction of page reporing worker liliangleo
2020-04-13 14:59 ` Alexander Duyck
2020-04-16  1:12   ` Liang Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).