linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
@ 2014-06-13  4:36 Chen Yucong
  2014-06-13  4:40 ` Andrew Morton
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Chen Yucong @ 2014-06-13  4:36 UTC (permalink / raw)
  To: akpm; +Cc: mgorman, hannes, mhocko, riel, linux-mm, linux-kernel, Chen Yucong

shrink_page_list() has too many arguments that have already reached ten.
Some of those arguments and temporary variables introduces extra 80 bytes
on the stack. This patch wraps five parameters into shrink_result and removes
some temporary variables, thus making the relative functions to consume fewer
stack space.

Before mm/vmscan.c is changed:
   text    data     bss     dec     hex filename
6876698  957224  966656 8800578  864942 vmlinux-3.15

After mm/vmscan.c is changed:
   text    data     bss     dec     hex filename
6876506  957224  966656 8800386  864882 vmlinux-3.15


scripts/checkstack.pl can be used for checking the change of the target function stack.

Before mm/vmscan.c is changed:

0xffffffff810af103 shrink_inactive_list []:		152
0xffffffff810af43d shrink_inactive_list []:		152
-------------------------------------------------------------
0xffffffff810aede8 reclaim_clean_pages_from_list []:	184
0xffffffff810aeef8 reclaim_clean_pages_from_list []:	184
-------------------------------------------------------------
0xffffffff810ae582 shrink_page_list []:			232
0xffffffff810aedb5 shrink_page_list []:			232

After mm/vmscan.c is changed::

0xffffffff810af078 shrink_inactive_list []:		120
0xffffffff810af36d shrink_inactive_list []:		120
-------------------------------------------------------------
0xffffffff810aed6c reclaim_clean_pages_from_list []:	152
0xffffffff810aee68 reclaim_clean_pages_from_list []:	152
--------------------------------------------------------------------------------------
0xffffffff810ae586 shrink_page_list []:			184   ---> sub    $0xb8,%rsp
0xffffffff810aed36 shrink_page_list []:			184   ---> add    $0xb8,%rsp

Via the above figures, we can find that the difference value of the stack is 32 for
shrink_inactive_list and reclaim_clean_pages_from_list, and this value is 48(232-184)
for shrink_page_list. From the hierarchy of functions called, the total difference
value is 80(32+48) for this change.

Changes since v1: https://lkml.org/lkml/2014/6/12/159
     * Rename arg_container to shrink_result
     * Change the the way of initializing shrink_result object.

Signed-off-by: Chen Yucong <slaoub@gmail.com>
---
 mm/vmscan.c |   62 ++++++++++++++++++++++++++---------------------------------
 1 file changed, 27 insertions(+), 35 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8ffe4e..3f28e39 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -791,28 +791,31 @@ static void page_check_dirty_writeback(struct page *page,
 }
 
 /*
+ * Callers pass a prezeroed shrink_result into the shrink functions to gather
+ * statistics about how many pages of particular states were processed
+ */
+struct shrink_result {
+	unsigned long nr_dirty;
+	unsigned long nr_unqueued_dirty;
+	unsigned long nr_congested;
+	unsigned long nr_writeback;
+	unsigned long nr_immediate;
+};
+
+/*
  * shrink_page_list() returns the number of reclaimed pages
  */
 static unsigned long shrink_page_list(struct list_head *page_list,
 				      struct zone *zone,
 				      struct scan_control *sc,
 				      enum ttu_flags ttu_flags,
-				      unsigned long *ret_nr_dirty,
-				      unsigned long *ret_nr_unqueued_dirty,
-				      unsigned long *ret_nr_congested,
-				      unsigned long *ret_nr_writeback,
-				      unsigned long *ret_nr_immediate,
+				      struct shrink_result *sr,
 				      bool force_reclaim)
 {
 	LIST_HEAD(ret_pages);
 	LIST_HEAD(free_pages);
 	int pgactivate = 0;
-	unsigned long nr_unqueued_dirty = 0;
-	unsigned long nr_dirty = 0;
-	unsigned long nr_congested = 0;
 	unsigned long nr_reclaimed = 0;
-	unsigned long nr_writeback = 0;
-	unsigned long nr_immediate = 0;
 
 	cond_resched();
 
@@ -858,10 +861,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 */
 		page_check_dirty_writeback(page, &dirty, &writeback);
 		if (dirty || writeback)
-			nr_dirty++;
+			sr->nr_dirty++;
 
 		if (dirty && !writeback)
-			nr_unqueued_dirty++;
+			sr->nr_unqueued_dirty++;
 
 		/*
 		 * Treat this page as congested if the underlying BDI is or if
@@ -872,7 +875,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		mapping = page_mapping(page);
 		if ((mapping && bdi_write_congested(mapping->backing_dev_info)) ||
 		    (writeback && PageReclaim(page)))
-			nr_congested++;
+			sr->nr_congested++;
 
 		/*
 		 * If a page at the tail of the LRU is under writeback, there
@@ -916,7 +919,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			if (current_is_kswapd() &&
 			    PageReclaim(page) &&
 			    zone_is_reclaim_writeback(zone)) {
-				nr_immediate++;
+				sr->nr_immediate++;
 				goto keep_locked;
 
 			/* Case 2 above */
@@ -934,7 +937,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 				 * and it's also appropriate in global reclaim.
 				 */
 				SetPageReclaim(page);
-				nr_writeback++;
+				sr->nr_writeback++;
 
 				goto keep_locked;
 
@@ -1132,11 +1135,6 @@ keep:
 	list_splice(&ret_pages, page_list);
 	count_vm_events(PGACTIVATE, pgactivate);
 	mem_cgroup_uncharge_end();
-	*ret_nr_dirty += nr_dirty;
-	*ret_nr_congested += nr_congested;
-	*ret_nr_unqueued_dirty += nr_unqueued_dirty;
-	*ret_nr_writeback += nr_writeback;
-	*ret_nr_immediate += nr_immediate;
 	return nr_reclaimed;
 }
 
@@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 		.priority = DEF_PRIORITY,
 		.may_unmap = 1,
 	};
-	unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
+	unsigned long ret;
+	struct shrink_result dummy = { };
 	struct page *page, *next;
 	LIST_HEAD(clean_pages);
 
@@ -1161,8 +1160,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 	}
 
 	ret = shrink_page_list(&clean_pages, zone, &sc,
-			TTU_UNMAP|TTU_IGNORE_ACCESS,
-			&dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
+			TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy, true);
 	list_splice(&clean_pages, page_list);
 	mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
 	return ret;
@@ -1469,11 +1467,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	unsigned long nr_scanned;
 	unsigned long nr_reclaimed = 0;
 	unsigned long nr_taken;
-	unsigned long nr_dirty = 0;
-	unsigned long nr_congested = 0;
-	unsigned long nr_unqueued_dirty = 0;
-	unsigned long nr_writeback = 0;
-	unsigned long nr_immediate = 0;
+	struct shrink_result sr = { };
 	isolate_mode_t isolate_mode = 0;
 	int file = is_file_lru(lru);
 	struct zone *zone = lruvec_zone(lruvec);
@@ -1515,9 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		return 0;
 
 	nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
-				&nr_dirty, &nr_unqueued_dirty, &nr_congested,
-				&nr_writeback, &nr_immediate,
-				false);
+					&sr, false);
 
 	spin_lock_irq(&zone->lru_lock);
 
@@ -1554,7 +1546,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	 * of pages under pages flagged for immediate reclaim and stall if any
 	 * are encountered in the nr_immediate check below.
 	 */
-	if (nr_writeback && nr_writeback == nr_taken)
+	if (sr.nr_writeback && sr.nr_writeback == nr_taken)
 		zone_set_flag(zone, ZONE_WRITEBACK);
 
 	/*
@@ -1566,7 +1558,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		 * Tag a zone as congested if all the dirty pages scanned were
 		 * backed by a congested BDI and wait_iff_congested will stall.
 		 */
-		if (nr_dirty && nr_dirty == nr_congested)
+		if (sr.nr_dirty && sr.nr_dirty == sr.nr_congested)
 			zone_set_flag(zone, ZONE_CONGESTED);
 
 		/*
@@ -1576,7 +1568,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		 * pages from reclaim context. It will forcibly stall in the
 		 * next check.
 		 */
-		if (nr_unqueued_dirty == nr_taken)
+		if (sr.nr_unqueued_dirty == nr_taken)
 			zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
 
 		/*
@@ -1585,7 +1577,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		 * implies that pages are cycling through the LRU faster than
 		 * they are written so also forcibly stall.
 		 */
-		if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
+		if ((sr.nr_unqueued_dirty == nr_taken || sr.nr_immediate) &&
 		    current_may_throttle())
 			congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13  4:36 [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption Chen Yucong
@ 2014-06-13  4:40 ` Andrew Morton
  2014-06-13  5:21   ` Chen Yucong
  2014-06-13  4:52 ` Konstantin Khlebnikov
  2014-06-13  5:10 ` Johannes Weiner
  2 siblings, 1 reply; 9+ messages in thread
From: Andrew Morton @ 2014-06-13  4:40 UTC (permalink / raw)
  To: Chen Yucong; +Cc: mgorman, hannes, mhocko, riel, linux-mm, linux-kernel

On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <slaoub@gmail.com> wrote:

> @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
>  		.priority = DEF_PRIORITY,
>  		.may_unmap = 1,
>  	};
> -	unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> +	unsigned long ret;
> +	struct shrink_result dummy = { };

You didn't like the idea of making this static?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13  4:36 [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption Chen Yucong
  2014-06-13  4:40 ` Andrew Morton
@ 2014-06-13  4:52 ` Konstantin Khlebnikov
  2014-06-13  5:21   ` Johannes Weiner
  2014-06-13  5:10 ` Johannes Weiner
  2 siblings, 1 reply; 9+ messages in thread
From: Konstantin Khlebnikov @ 2014-06-13  4:52 UTC (permalink / raw)
  To: Chen Yucong
  Cc: Andrew Morton, Mel Gorman, Johannes Weiner, mhocko, Rik van Riel,
	linux-mm, Linux Kernel Mailing List

On Fri, Jun 13, 2014 at 8:36 AM, Chen Yucong <slaoub@gmail.com> wrote:
> shrink_page_list() has too many arguments that have already reached ten.
> Some of those arguments and temporary variables introduces extra 80 bytes
> on the stack. This patch wraps five parameters into shrink_result and removes
> some temporary variables, thus making the relative functions to consume fewer
> stack space.

I think it's better to put them into struct scan_control.
Reset them before calling shrinker or take a snapshot to get delta.

>
> Before mm/vmscan.c is changed:
>    text    data     bss     dec     hex filename
> 6876698  957224  966656 8800578  864942 vmlinux-3.15
>
> After mm/vmscan.c is changed:
>    text    data     bss     dec     hex filename
> 6876506  957224  966656 8800386  864882 vmlinux-3.15
>
>
> scripts/checkstack.pl can be used for checking the change of the target function stack.
>
> Before mm/vmscan.c is changed:
>
> 0xffffffff810af103 shrink_inactive_list []:             152
> 0xffffffff810af43d shrink_inactive_list []:             152
> -------------------------------------------------------------
> 0xffffffff810aede8 reclaim_clean_pages_from_list []:    184
> 0xffffffff810aeef8 reclaim_clean_pages_from_list []:    184
> -------------------------------------------------------------
> 0xffffffff810ae582 shrink_page_list []:                 232
> 0xffffffff810aedb5 shrink_page_list []:                 232
>
> After mm/vmscan.c is changed::
>
> 0xffffffff810af078 shrink_inactive_list []:             120
> 0xffffffff810af36d shrink_inactive_list []:             120
> -------------------------------------------------------------
> 0xffffffff810aed6c reclaim_clean_pages_from_list []:    152
> 0xffffffff810aee68 reclaim_clean_pages_from_list []:    152
> --------------------------------------------------------------------------------------
> 0xffffffff810ae586 shrink_page_list []:                 184   ---> sub    $0xb8,%rsp
> 0xffffffff810aed36 shrink_page_list []:                 184   ---> add    $0xb8,%rsp
>
> Via the above figures, we can find that the difference value of the stack is 32 for
> shrink_inactive_list and reclaim_clean_pages_from_list, and this value is 48(232-184)
> for shrink_page_list. From the hierarchy of functions called, the total difference
> value is 80(32+48) for this change.
>
> Changes since v1: https://lkml.org/lkml/2014/6/12/159
>      * Rename arg_container to shrink_result
>      * Change the the way of initializing shrink_result object.
>
> Signed-off-by: Chen Yucong <slaoub@gmail.com>
> ---
>  mm/vmscan.c |   62 ++++++++++++++++++++++++++---------------------------------
>  1 file changed, 27 insertions(+), 35 deletions(-)
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index a8ffe4e..3f28e39 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -791,28 +791,31 @@ static void page_check_dirty_writeback(struct page *page,
>  }
>
>  /*
> + * Callers pass a prezeroed shrink_result into the shrink functions to gather
> + * statistics about how many pages of particular states were processed
> + */
> +struct shrink_result {
> +       unsigned long nr_dirty;
> +       unsigned long nr_unqueued_dirty;
> +       unsigned long nr_congested;
> +       unsigned long nr_writeback;
> +       unsigned long nr_immediate;
> +};
> +
> +/*
>   * shrink_page_list() returns the number of reclaimed pages
>   */
>  static unsigned long shrink_page_list(struct list_head *page_list,
>                                       struct zone *zone,
>                                       struct scan_control *sc,
>                                       enum ttu_flags ttu_flags,
> -                                     unsigned long *ret_nr_dirty,
> -                                     unsigned long *ret_nr_unqueued_dirty,
> -                                     unsigned long *ret_nr_congested,
> -                                     unsigned long *ret_nr_writeback,
> -                                     unsigned long *ret_nr_immediate,
> +                                     struct shrink_result *sr,
>                                       bool force_reclaim)
>  {
>         LIST_HEAD(ret_pages);
>         LIST_HEAD(free_pages);
>         int pgactivate = 0;
> -       unsigned long nr_unqueued_dirty = 0;
> -       unsigned long nr_dirty = 0;
> -       unsigned long nr_congested = 0;
>         unsigned long nr_reclaimed = 0;
> -       unsigned long nr_writeback = 0;
> -       unsigned long nr_immediate = 0;
>
>         cond_resched();
>
> @@ -858,10 +861,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
>                  */
>                 page_check_dirty_writeback(page, &dirty, &writeback);
>                 if (dirty || writeback)
> -                       nr_dirty++;
> +                       sr->nr_dirty++;
>
>                 if (dirty && !writeback)
> -                       nr_unqueued_dirty++;
> +                       sr->nr_unqueued_dirty++;
>
>                 /*
>                  * Treat this page as congested if the underlying BDI is or if
> @@ -872,7 +875,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
>                 mapping = page_mapping(page);
>                 if ((mapping && bdi_write_congested(mapping->backing_dev_info)) ||
>                     (writeback && PageReclaim(page)))
> -                       nr_congested++;
> +                       sr->nr_congested++;
>
>                 /*
>                  * If a page at the tail of the LRU is under writeback, there
> @@ -916,7 +919,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
>                         if (current_is_kswapd() &&
>                             PageReclaim(page) &&
>                             zone_is_reclaim_writeback(zone)) {
> -                               nr_immediate++;
> +                               sr->nr_immediate++;
>                                 goto keep_locked;
>
>                         /* Case 2 above */
> @@ -934,7 +937,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
>                                  * and it's also appropriate in global reclaim.
>                                  */
>                                 SetPageReclaim(page);
> -                               nr_writeback++;
> +                               sr->nr_writeback++;
>
>                                 goto keep_locked;
>
> @@ -1132,11 +1135,6 @@ keep:
>         list_splice(&ret_pages, page_list);
>         count_vm_events(PGACTIVATE, pgactivate);
>         mem_cgroup_uncharge_end();
> -       *ret_nr_dirty += nr_dirty;
> -       *ret_nr_congested += nr_congested;
> -       *ret_nr_unqueued_dirty += nr_unqueued_dirty;
> -       *ret_nr_writeback += nr_writeback;
> -       *ret_nr_immediate += nr_immediate;
>         return nr_reclaimed;
>  }
>
> @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
>                 .priority = DEF_PRIORITY,
>                 .may_unmap = 1,
>         };
> -       unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> +       unsigned long ret;
> +       struct shrink_result dummy = { };
>         struct page *page, *next;
>         LIST_HEAD(clean_pages);
>
> @@ -1161,8 +1160,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
>         }
>
>         ret = shrink_page_list(&clean_pages, zone, &sc,
> -                       TTU_UNMAP|TTU_IGNORE_ACCESS,
> -                       &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
> +                       TTU_UNMAP|TTU_IGNORE_ACCESS, &dummy, true);
>         list_splice(&clean_pages, page_list);
>         mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret);
>         return ret;
> @@ -1469,11 +1467,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
>         unsigned long nr_scanned;
>         unsigned long nr_reclaimed = 0;
>         unsigned long nr_taken;
> -       unsigned long nr_dirty = 0;
> -       unsigned long nr_congested = 0;
> -       unsigned long nr_unqueued_dirty = 0;
> -       unsigned long nr_writeback = 0;
> -       unsigned long nr_immediate = 0;
> +       struct shrink_result sr = { };
>         isolate_mode_t isolate_mode = 0;
>         int file = is_file_lru(lru);
>         struct zone *zone = lruvec_zone(lruvec);
> @@ -1515,9 +1509,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
>                 return 0;
>
>         nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP,
> -                               &nr_dirty, &nr_unqueued_dirty, &nr_congested,
> -                               &nr_writeback, &nr_immediate,
> -                               false);
> +                                       &sr, false);
>
>         spin_lock_irq(&zone->lru_lock);
>
> @@ -1554,7 +1546,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
>          * of pages under pages flagged for immediate reclaim and stall if any
>          * are encountered in the nr_immediate check below.
>          */
> -       if (nr_writeback && nr_writeback == nr_taken)
> +       if (sr.nr_writeback && sr.nr_writeback == nr_taken)
>                 zone_set_flag(zone, ZONE_WRITEBACK);
>
>         /*
> @@ -1566,7 +1558,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
>                  * Tag a zone as congested if all the dirty pages scanned were
>                  * backed by a congested BDI and wait_iff_congested will stall.
>                  */
> -               if (nr_dirty && nr_dirty == nr_congested)
> +               if (sr.nr_dirty && sr.nr_dirty == sr.nr_congested)
>                         zone_set_flag(zone, ZONE_CONGESTED);
>
>                 /*
> @@ -1576,7 +1568,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
>                  * pages from reclaim context. It will forcibly stall in the
>                  * next check.
>                  */
> -               if (nr_unqueued_dirty == nr_taken)
> +               if (sr.nr_unqueued_dirty == nr_taken)
>                         zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
>
>                 /*
> @@ -1585,7 +1577,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
>                  * implies that pages are cycling through the LRU faster than
>                  * they are written so also forcibly stall.
>                  */
> -               if ((nr_unqueued_dirty == nr_taken || nr_immediate) &&
> +               if ((sr.nr_unqueued_dirty == nr_taken || sr.nr_immediate) &&
>                     current_may_throttle())
>                         congestion_wait(BLK_RW_ASYNC, HZ/10);
>         }
> --
> 1.7.10.4
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13  4:36 [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption Chen Yucong
  2014-06-13  4:40 ` Andrew Morton
  2014-06-13  4:52 ` Konstantin Khlebnikov
@ 2014-06-13  5:10 ` Johannes Weiner
  2 siblings, 0 replies; 9+ messages in thread
From: Johannes Weiner @ 2014-06-13  5:10 UTC (permalink / raw)
  To: Chen Yucong; +Cc: akpm, mgorman, mhocko, riel, linux-mm, linux-kernel

On Fri, Jun 13, 2014 at 12:36:31PM +0800, Chen Yucong wrote:
> shrink_page_list() has too many arguments that have already reached ten.
> Some of those arguments and temporary variables introduces extra 80 bytes
> on the stack. This patch wraps five parameters into shrink_result and removes
> some temporary variables, thus making the relative functions to consume fewer
> stack space.
> 
> Before mm/vmscan.c is changed:
>    text    data     bss     dec     hex filename
> 6876698  957224  966656 8800578  864942 vmlinux-3.15
> 
> After mm/vmscan.c is changed:
>    text    data     bss     dec     hex filename
> 6876506  957224  966656 8800386  864882 vmlinux-3.15
> 
> 
> scripts/checkstack.pl can be used for checking the change of the target function stack.
> 
> Before mm/vmscan.c is changed:
> 
> 0xffffffff810af103 shrink_inactive_list []:		152
> 0xffffffff810af43d shrink_inactive_list []:		152
> -------------------------------------------------------------
> 0xffffffff810aede8 reclaim_clean_pages_from_list []:	184
> 0xffffffff810aeef8 reclaim_clean_pages_from_list []:	184
> -------------------------------------------------------------
> 0xffffffff810ae582 shrink_page_list []:			232
> 0xffffffff810aedb5 shrink_page_list []:			232
> 
> After mm/vmscan.c is changed::
> 
> 0xffffffff810af078 shrink_inactive_list []:		120
> 0xffffffff810af36d shrink_inactive_list []:		120
> -------------------------------------------------------------
> 0xffffffff810aed6c reclaim_clean_pages_from_list []:	152
> 0xffffffff810aee68 reclaim_clean_pages_from_list []:	152
> --------------------------------------------------------------------------------------
> 0xffffffff810ae586 shrink_page_list []:			184   ---> sub    $0xb8,%rsp
> 0xffffffff810aed36 shrink_page_list []:			184   ---> add    $0xb8,%rsp
> 
> Via the above figures, we can find that the difference value of the stack is 32 for
> shrink_inactive_list and reclaim_clean_pages_from_list, and this value is 48(232-184)
> for shrink_page_list. From the hierarchy of functions called, the total difference
> value is 80(32+48) for this change.

We just increased the stack size by 8k.  I'm not saying that we
shouldn't work on our stack footprint, but is this really worth it?
It doesn't make that code easier to follow exactly.

> Changes since v1: https://lkml.org/lkml/2014/6/12/159
>      * Rename arg_container to shrink_result
>      * Change the the way of initializing shrink_result object.
> 
> Signed-off-by: Chen Yucong <slaoub@gmail.com>
> ---
>  mm/vmscan.c |   62 ++++++++++++++++++++++++++---------------------------------
>  1 file changed, 27 insertions(+), 35 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index a8ffe4e..3f28e39 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -791,28 +791,31 @@ static void page_check_dirty_writeback(struct page *page,
>  }
>  
>  /*
> + * Callers pass a prezeroed shrink_result into the shrink functions to gather
> + * statistics about how many pages of particular states were processed
> + */
> +struct shrink_result {
> +	unsigned long nr_dirty;
> +	unsigned long nr_unqueued_dirty;
> +	unsigned long nr_congested;
> +	unsigned long nr_writeback;
> +	unsigned long nr_immediate;
> +};

This exclusively contains statistics on the writeback states of the
scanned pages.  struct writeback_stats?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13  4:40 ` Andrew Morton
@ 2014-06-13  5:21   ` Chen Yucong
  2014-06-13 16:28     ` Johannes Weiner
  0 siblings, 1 reply; 9+ messages in thread
From: Chen Yucong @ 2014-06-13  5:21 UTC (permalink / raw)
  To: Andrew Morton; +Cc: mgorman, hannes, mhocko, riel, linux-mm, linux-kernel

On Thu, 2014-06-12 at 21:40 -0700, Andrew Morton wrote:
> On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <slaoub@gmail.com> wrote:
> 
> > @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> >  		.priority = DEF_PRIORITY,
> >  		.may_unmap = 1,
> >  	};
> > -	unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> > +	unsigned long ret;
> > +	struct shrink_result dummy = { };
> 
> You didn't like the idea of making this static?
Sorry! It's my negligence.
If we make dummy static, it can help us save more stack.

without change:  
0xffffffff810aede8 reclaim_clean_pages_from_list []:	184
0xffffffff810aeef8 reclaim_clean_pages_from_list []:	184

with change: struct shrink_result dummy = {};
0xffffffff810aed6c reclaim_clean_pages_from_list []:	152
0xffffffff810aee68 reclaim_clean_pages_from_list []:	152

with change: static struct shrink_result dummy ={};
0xffffffff810aed69 reclaim_clean_pages_from_list []:	120
0xffffffff810aee4d reclaim_clean_pages_from_list []:	120

thx!
cyc


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13  4:52 ` Konstantin Khlebnikov
@ 2014-06-13  5:21   ` Johannes Weiner
  2014-06-13 10:21     ` Konstantin Khlebnikov
  0 siblings, 1 reply; 9+ messages in thread
From: Johannes Weiner @ 2014-06-13  5:21 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Chen Yucong, Andrew Morton, Mel Gorman, mhocko, Rik van Riel,
	linux-mm, Linux Kernel Mailing List

On Fri, Jun 13, 2014 at 08:52:22AM +0400, Konstantin Khlebnikov wrote:
> On Fri, Jun 13, 2014 at 8:36 AM, Chen Yucong <slaoub@gmail.com> wrote:
> > shrink_page_list() has too many arguments that have already reached ten.
> > Some of those arguments and temporary variables introduces extra 80 bytes
> > on the stack. This patch wraps five parameters into shrink_result and removes
> > some temporary variables, thus making the relative functions to consume fewer
> > stack space.
> 
> I think it's better to put them into struct scan_control.
> Reset them before calling shrinker or take a snapshot to get delta.

scan_control applies to the whole reclaim invocation*, it would be
confusing as hell to have things in there that only apply to certain
sublevels.  Please don't do that.

If you on the other hand take snapshots and accumulate them over the
whole run, it might actually make sense to move sc->nr_scanned and
sc->nr_reclaimed into shrink_results instead.  But I'm not sure it's
worth the extra snapshotting code, given that we don't actually need
the accumulated numbers at the outer levels right now.

* sc->swappiness being the recent exception, I'll send a fix for that.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13  5:21   ` Johannes Weiner
@ 2014-06-13 10:21     ` Konstantin Khlebnikov
  0 siblings, 0 replies; 9+ messages in thread
From: Konstantin Khlebnikov @ 2014-06-13 10:21 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Chen Yucong, Andrew Morton, Mel Gorman, mhocko, Rik van Riel,
	linux-mm, Linux Kernel Mailing List

On Fri, Jun 13, 2014 at 9:21 AM, Johannes Weiner <hannes@cmpxchg.org> wrote:
> On Fri, Jun 13, 2014 at 08:52:22AM +0400, Konstantin Khlebnikov wrote:
>> On Fri, Jun 13, 2014 at 8:36 AM, Chen Yucong <slaoub@gmail.com> wrote:
>> > shrink_page_list() has too many arguments that have already reached ten.
>> > Some of those arguments and temporary variables introduces extra 80 bytes
>> > on the stack. This patch wraps five parameters into shrink_result and removes
>> > some temporary variables, thus making the relative functions to consume fewer
>> > stack space.
>>
>> I think it's better to put them into struct scan_control.
>> Reset them before calling shrinker or take a snapshot to get delta.
>
> scan_control applies to the whole reclaim invocation*, it would be
> confusing as hell to have things in there that only apply to certain
> sublevels.  Please don't do that.

scan_control is internal private structure and reclaimer is small and
simple enough to hold whole state here.
For me it's easier to track state of single structure which is alive
during whole invocation,
than several smaller structures especially if some of them disappears
from time to time.

If it would be easier for you -- shrink_result might be embedded as
sub-structure.

>
> If you on the other hand take snapshots and accumulate them over the
> whole run, it might actually make sense to move sc->nr_scanned and
> sc->nr_reclaimed into shrink_results instead.  But I'm not sure it's
> worth the extra snapshotting code, given that we don't actually need
> the accumulated numbers at the outer levels right now.
>
> * sc->swappiness being the recent exception, I'll send a fix for that.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13  5:21   ` Chen Yucong
@ 2014-06-13 16:28     ` Johannes Weiner
  2014-06-14  3:04       ` Chen Yucong
  0 siblings, 1 reply; 9+ messages in thread
From: Johannes Weiner @ 2014-06-13 16:28 UTC (permalink / raw)
  To: Chen Yucong; +Cc: Andrew Morton, mgorman, mhocko, riel, linux-mm, linux-kernel

On Fri, Jun 13, 2014 at 01:21:15PM +0800, Chen Yucong wrote:
> On Thu, 2014-06-12 at 21:40 -0700, Andrew Morton wrote:
> > On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <slaoub@gmail.com> wrote:
> > 
> > > @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> > >  		.priority = DEF_PRIORITY,
> > >  		.may_unmap = 1,
> > >  	};
> > > -	unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> > > +	unsigned long ret;
> > > +	struct shrink_result dummy = { };
> > 
> > You didn't like the idea of making this static?
> Sorry! It's my negligence.
> If we make dummy static, it can help us save more stack.
> 
> without change:  
> 0xffffffff810aede8 reclaim_clean_pages_from_list []:	184
> 0xffffffff810aeef8 reclaim_clean_pages_from_list []:	184
> 
> with change: struct shrink_result dummy = {};
> 0xffffffff810aed6c reclaim_clean_pages_from_list []:	152
> 0xffffffff810aee68 reclaim_clean_pages_from_list []:	152
> 
> with change: static struct shrink_result dummy ={};
> 0xffffffff810aed69 reclaim_clean_pages_from_list []:	120
> 0xffffffff810aee4d reclaim_clean_pages_from_list []:	120

FWIW, I copied bloat-o-meter and hacked up a quick comparison tool
that you can feed two outputs of checkstack.pl for a whole vmlinux and
it shows you the delta.

The output for your patch (with the static dummy) looks like this:

+0/-240 -240
shrink_inactive_list                         136     112     -24
shrink_page_list                             208     160     -48
reclaim_clean_pages_from_list                168       -    -168

(The stack footprint for reclaim_clean_pages_from_list is actually 96
after your patch, but checkstack.pl skips frames under 100)

---
#!/usr/bin/python
#
# Based on bloat-o-meter 

import sys
import re

if len(sys.argv) != 3:
   print("usage: %s file1 file2" % sys.argv[0])
   sys.exit(1)

def getsizes(filename):
   sym = {}
   for line in open(filename):
      x = re.split('(0x.*) (.*) (.*):[ \t]*(.*)', line)
      try:
         foo, addr, name, src, size, bar = x
      except:
         print(x)
         raise Exception
      try:
         sym[name] = int(size)
      except:
         continue
   return sym

old = getsizes(sys.argv[1])
new = getsizes(sys.argv[2])

inc = 0
dec = 0
delta = []
common = {}

for a in old:
   if a in new:
      common[a] = 1

for name in old:
   if name not in common:
      dec += old[name]
      delta.append((-old[name], name))

for name in new:
   if name not in common:
      inc += new[name]
      delta.append((new[name], name))

for name in common:
   d = new.get(name, 0) - old.get(name, 0)
   if d > 0: inc += d
   if d < 0: dec -= d
   delta.append((d, name))

delta.sort()
delta.reverse()

print("+%d/-%d %+d" % (inc, dec, inc - dec))
for d, name in delta:
   if d:
      print("%-40s %7s %7s %+7d" % (name, old.get(name, "-"), new.get(name, "-"), d))

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption
  2014-06-13 16:28     ` Johannes Weiner
@ 2014-06-14  3:04       ` Chen Yucong
  0 siblings, 0 replies; 9+ messages in thread
From: Chen Yucong @ 2014-06-14  3:04 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Andrew Morton, mgorman, mhocko, riel, linux-mm, linux-kernel

On Fri, 2014-06-13 at 12:28 -0400, Johannes Weiner wrote:
> On Fri, Jun 13, 2014 at 01:21:15PM +0800, Chen Yucong wrote:
> > On Thu, 2014-06-12 at 21:40 -0700, Andrew Morton wrote:
> > > On Fri, 13 Jun 2014 12:36:31 +0800 Chen Yucong <slaoub@gmail.com> wrote:
> > > 
> > > > @@ -1148,7 +1146,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
> > > >  		.priority = DEF_PRIORITY,
> > > >  		.may_unmap = 1,
> > > >  	};
> > > > -	unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5;
> > > > +	unsigned long ret;
> > > > +	struct shrink_result dummy = { };
> > > 
> > > You didn't like the idea of making this static?
> > Sorry! It's my negligence.
> > If we make dummy static, it can help us save more stack.
> > 
> > without change:  
> > 0xffffffff810aede8 reclaim_clean_pages_from_list []:	184
> > 0xffffffff810aeef8 reclaim_clean_pages_from_list []:	184
> > 
> > with change: struct shrink_result dummy = {};
> > 0xffffffff810aed6c reclaim_clean_pages_from_list []:	152
> > 0xffffffff810aee68 reclaim_clean_pages_from_list []:	152
> > 
> > with change: static struct shrink_result dummy ={};
> > 0xffffffff810aed69 reclaim_clean_pages_from_list []:	120
> > 0xffffffff810aee4d reclaim_clean_pages_from_list []:	120
> 
> FWIW, I copied bloat-o-meter and hacked up a quick comparison tool
> that you can feed two outputs of checkstack.pl for a whole vmlinux and
> it shows you the delta.
> 
> The output for your patch (with the static dummy) looks like this:
> 
> +0/-240 -240
> shrink_inactive_list                         136     112     -24
> shrink_page_list                             208     160     -48
> reclaim_clean_pages_from_list                168       -    -168
> 
> (The stack footprint for reclaim_clean_pages_from_list is actually 96
> after your patch, but checkstack.pl skips frames under 100)
> 
Thanks very much for your comparison tool. Its output is more concise.

thx!
cyc

gcc version 4.7.3 (Gentoo 4.7.3-r1 p1.4, pie-0.5.5)
kernel version 3.15(stable)
Intel(R) Core(TM)2 Duo CPU     T5670  @ 1.80GHz

The output for this patch (with the static dummy) is:

+0/-144 -144
shrink_inactive_list                         152     120     -32
shrink_page_list                             232     184     -48
reclaim_clean_pages_from_list                184     120     -64

-------
gcc version 4.7.2 (Debian 4.7.2-5)
kernel version 3.15(stable)
Intel(R) Core(TM) i5-2320 CPU @ 3.00GHz

The output for this patch (with the static dummy) is:

shrink_inactive_list                         136     120     -16
shrink_page_list                             216     168     -48
reclaim_clean_pages_from_list                184     120     -64



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2014-06-14  3:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-13  4:36 [PATCH v2] mm/vmscan.c: wrap five parameters into shrink_result for reducing the stack consumption Chen Yucong
2014-06-13  4:40 ` Andrew Morton
2014-06-13  5:21   ` Chen Yucong
2014-06-13 16:28     ` Johannes Weiner
2014-06-14  3:04       ` Chen Yucong
2014-06-13  4:52 ` Konstantin Khlebnikov
2014-06-13  5:21   ` Johannes Weiner
2014-06-13 10:21     ` Konstantin Khlebnikov
2014-06-13  5:10 ` Johannes Weiner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).