All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shaohua Li <shaohua.li@intel.com>
To: "linux-mm@kvack.org" <linux-mm@kvack.org>
Cc: Andrew Morton <akpm@linux-foundation.org>, Mel Gorman <mel@csn.ul.ie>
Subject: [RFC]pagealloc: compensate a task for direct page reclaim
Date: Thu, 16 Sep 2010 19:26:36 +0800	[thread overview]
Message-ID: <1284636396.1726.5.camel@shli-laptop> (raw)

A task enters into direct page reclaim, free some memory. But sometimes
the task can't get a free page after direct page reclaim because
other tasks take them (this is quite common in a multi-task workload
in my test). This behavior will bring extra latency to the task and is
unfair. Since the task already gets penalty, we'd better give it a compensation.
If a task frees some pages from direct page reclaim, we cache one freed page,
and the task will get it soon. We only consider order 0 allocation, because
it's hard to cache order > 0 page.

Below is a trace output when a task frees some pages in try_to_free_pages(), but
get_page_from_freelist() can't get a page in direct page reclaim.

<...>-809   [004]   730.218991: __alloc_pages_nodemask: progress 147, order 0, pid 809, comm mmap_test
<...>-806   [001]   730.237969: __alloc_pages_nodemask: progress 147, order 0, pid 806, comm mmap_test
<...>-810   [005]   730.237971: __alloc_pages_nodemask: progress 147, order 0, pid 810, comm mmap_test
<...>-809   [004]   730.237972: __alloc_pages_nodemask: progress 147, order 0, pid 809, comm mmap_test
<...>-811   [006]   730.241409: __alloc_pages_nodemask: progress 147, order 0, pid 811, comm mmap_test
<...>-809   [004]   730.241412: __alloc_pages_nodemask: progress 147, order 0, pid 809, comm mmap_test
<...>-812   [007]   730.241435: __alloc_pages_nodemask: progress 147, order 0, pid 812, comm mmap_test
<...>-809   [004]   730.245036: __alloc_pages_nodemask: progress 147, order 0, pid 809, comm mmap_test
<...>-809   [004]   730.260360: __alloc_pages_nodemask: progress 147, order 0, pid 809, comm mmap_test
<...>-805   [000]   730.260362: __alloc_pages_nodemask: progress 147, order 0, pid 805, comm mmap_test
<...>-811   [006]   730.263877: __alloc_pages_nodemask: progress 147, order 0, pid 811, comm mmap_test

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
---
 include/linux/swap.h |    1 +
 mm/page_alloc.c      |   23 +++++++++++++++++++++++
 mm/vmscan.c          |   10 ++++++++++
 3 files changed, 34 insertions(+)

Index: linux/include/linux/swap.h
===================================================================
--- linux.orig/include/linux/swap.h	2010-09-16 11:01:56.000000000 +0800
+++ linux/include/linux/swap.h	2010-09-16 11:03:07.000000000 +0800
@@ -109,6 +109,7 @@ typedef struct {
  */
 struct reclaim_state {
 	unsigned long reclaimed_slab;
+	struct page **cached_page;
 };
 
 #ifdef __KERNEL__
Index: linux/mm/page_alloc.c
===================================================================
--- linux.orig/mm/page_alloc.c	2010-09-16 11:01:56.000000000 +0800
+++ linux/mm/page_alloc.c	2010-09-16 16:51:12.000000000 +0800
@@ -1837,6 +1837,21 @@ __alloc_pages_direct_compact(gfp_t gfp_m
 }
 #endif /* CONFIG_COMPACTION */
 
+static void prepare_cached_page(struct page *page, gfp_t gfp_mask)
+{
+	int wasMlocked = __TestClearPageMlocked(page);
+	unsigned long flags;
+
+	if (!free_pages_prepare(page, 0))
+		return;
+
+	local_irq_save(flags);
+	if (unlikely(wasMlocked))
+		free_page_mlock(page);
+	local_irq_restore(flags);
+	prep_new_page(page, 0, gfp_mask);
+}
+
 /* The really slow allocator path where we enter direct reclaim */
 static inline struct page *
 __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -1856,6 +1871,10 @@ __alloc_pages_direct_reclaim(gfp_t gfp_m
 	p->flags |= PF_MEMALLOC;
 	lockdep_set_current_reclaim_state(gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
+	if (order == 0)
+		reclaim_state.cached_page = &page;
+	else
+		reclaim_state.cached_page = NULL;
 	p->reclaim_state = &reclaim_state;
 
 	*did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask);
@@ -1864,6 +1883,10 @@ __alloc_pages_direct_reclaim(gfp_t gfp_m
 	lockdep_clear_current_reclaim_state();
 	p->flags &= ~PF_MEMALLOC;
 
+	if (page) {
+		prepare_cached_page(page, gfp_mask);
+		return page;
+	}
 	cond_resched();
 
 	if (unlikely(!(*did_some_progress)))
Index: linux/mm/vmscan.c
===================================================================
--- linux.orig/mm/vmscan.c	2010-09-16 11:01:56.000000000 +0800
+++ linux/mm/vmscan.c	2010-09-16 11:03:07.000000000 +0800
@@ -626,9 +626,17 @@ static noinline_for_stack void free_page
 {
 	struct pagevec freed_pvec;
 	struct page *page, *tmp;
+	struct reclaim_state *reclaim_state = current->reclaim_state;
 
 	pagevec_init(&freed_pvec, 1);
 
+	if (!list_empty(free_pages) && reclaim_state &&
+			reclaim_state->cached_page) {
+		page = list_entry(free_pages->next, struct page, lru);
+		list_del(&page->lru);
+		*reclaim_state->cached_page = page;
+	}
+
 	list_for_each_entry_safe(page, tmp, free_pages, lru) {
 		list_del(&page->lru);
 		if (!pagevec_add(&freed_pvec, page)) {
@@ -2467,6 +2475,7 @@ unsigned long shrink_all_memory(unsigned
 	p->flags |= PF_MEMALLOC;
 	lockdep_set_current_reclaim_state(sc.gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
+	reclaim_state.cached_page = NULL;
 	p->reclaim_state = &reclaim_state;
 
 	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
@@ -2655,6 +2664,7 @@ static int __zone_reclaim(struct zone *z
 	p->flags |= PF_MEMALLOC | PF_SWAPWRITE;
 	lockdep_set_current_reclaim_state(gfp_mask);
 	reclaim_state.reclaimed_slab = 0;
+	reclaim_state.cached_page = NULL;
 	p->reclaim_state = &reclaim_state;
 
 	if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

             reply	other threads:[~2010-09-16 11:26 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-16 11:26 Shaohua Li [this message]
2010-09-16 15:00 ` [RFC]pagealloc: compensate a task for direct page reclaim Minchan Kim
2010-09-17  2:34   ` Shaohua Li
2010-09-17  4:47     ` Minchan Kim
2010-09-20  8:50   ` Mel Gorman
2010-09-17  5:52 ` KOSAKI Motohiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1284636396.1726.5.camel@shli-laptop \
    --to=shaohua.li@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.