From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756905Ab0LITLc (ORCPT ); Thu, 9 Dec 2010 14:11:32 -0500 Received: from mx1.redhat.com ([209.132.183.28]:50229 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756676Ab0LITLb (ORCPT ); Thu, 9 Dec 2010 14:11:31 -0500 Date: Thu, 9 Dec 2010 20:10:33 +0100 From: Andrea Arcangeli To: Mel Gorman Cc: linux-mm@kvack.org, Linus Torvalds , Andrew Morton , linux-kernel@vger.kernel.org, Marcelo Tosatti , Adam Litke , Avi Kivity , Hugh Dickins , Rik van Riel , Dave Hansen , Benjamin Herrenschmidt , Ingo Molnar , Mike Travis , KAMEZAWA Hiroyuki , Christoph Lameter , Chris Wright , bpicco@redhat.com, KOSAKI Motohiro , Balbir Singh , "Michael S. Tsirkin" , Peter Zijlstra , Johannes Weiner , Daisuke Nishimura , Chris Mason , Borislav Petkov Subject: Re: [PATCH 61 of 66] use compaction for GFP_ATOMIC order > 0 Message-ID: <20101209191033.GK19131@random.random> References: <20101118163124.GG8135@csn.ul.ie> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20101118163124.GG8135@csn.ul.ie> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Nov 18, 2010 at 04:31:24PM +0000, Mel Gorman wrote: > I don't think this is related to THP although I see what you're doing. > It should be handled on its own. I'd also wonder if some of the tg3 > failures are due to MIGRATE_RESERVE not being set properly when > min_free_kbytes is automatically resized. The failures also happened on older kernels except they wouldn't print it in the kernel logs, but you're right we may get better with migrate reserve enabled also on huge systems (it seems hugeadm --set_recommended-min_free_kbytes was doing a little more than its kernel counterpart with large systems with tons of ram, now fixed). My status as far as this patch is concerned: ========= Subject: use compaction for all allocation orders From: Andrea Arcangeli It makes no sense not to enable compaction for small order pages as we don't want to end up with bad order 2 allocations and good and graceful order 9 allocations. Signed-off-by: Andrea Arcangeli --- diff --git a/mm/compaction.c b/mm/compaction.c --- a/mm/compaction.c +++ b/mm/compaction.c @@ -476,7 +495,7 @@ unsigned long try_to_compact_pages(struc * made because an assumption is made that the page allocator can satisfy * the "cheaper" orders without taking special steps */ - if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) + if (!order || !may_enter_fs || !may_perform_io) return rc; count_vm_event(COMPACTSTALL); =========== Subject: use compaction in kswapd for GFP_ATOMIC order > 0 From: Andrea Arcangeli This takes advantage of memory compaction to properly generate pages of order > 0 if regular page reclaim fails and priority level becomes more severe and we don't reach the proper watermarks. Signed-off-by: Andrea Arcangeli --- diff --git a/include/linux/compaction.h b/include/linux/compaction.h --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -11,6 +11,9 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 +#define COMPACT_MODE_DIRECT_RECLAIM 0 +#define COMPACT_MODE_KSWAPD 1 + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -20,6 +23,9 @@ extern int sysctl_extfrag_handler(struct void __user *buffer, size_t *length, loff_t *ppos); extern int fragmentation_index(struct zone *zone, unsigned int order); +extern unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -59,6 +65,13 @@ static inline unsigned long try_to_compa return COMPACT_CONTINUE; } +static inline unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) +{ + return COMPACT_CONTINUE; +} + static inline void defer_compaction(struct zone *zone) { } diff --git a/mm/compaction.c b/mm/compaction.c --- a/mm/compaction.c +++ b/mm/compaction.c @@ -38,6 +38,8 @@ struct compact_control { unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; + + int compact_mode; }; static unsigned long release_freepages(struct list_head *freelist) @@ -357,10 +359,10 @@ static void update_nr_listpages(struct c } static int compact_finished(struct zone *zone, - struct compact_control *cc) + struct compact_control *cc) { unsigned int order; - unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); + unsigned long watermark; if (fatal_signal_pending(current)) return COMPACT_PARTIAL; @@ -370,12 +372,27 @@ static int compact_finished(struct zone return COMPACT_COMPLETE; /* Compaction run is not finished if the watermark is not met */ + if (cc->compact_mode != COMPACT_MODE_KSWAPD) + watermark = low_wmark_pages(zone); + else + watermark = high_wmark_pages(zone); + watermark += (1 << cc->order); + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) return COMPACT_CONTINUE; if (cc->order == -1) return COMPACT_CONTINUE; + /* + * Generating only one page of the right order is not enough + * for kswapd, we must continue until we're above the high + * watermark as a pool for high order GFP_ATOMIC allocations + * too. + */ + if (cc->compact_mode == COMPACT_MODE_KSWAPD) + return COMPACT_CONTINUE; + /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { /* Job done if page is free of the right migratetype */ @@ -433,8 +450,9 @@ static int compact_zone(struct zone *zon return ret; } -static unsigned long compact_zone_order(struct zone *zone, - int order, gfp_t gfp_mask) +unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) { struct compact_control cc = { .nr_freepages = 0, @@ -442,6 +460,7 @@ static unsigned long compact_zone_order( .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, + .compact_mode = compact_mode, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -517,7 +536,8 @@ unsigned long try_to_compact_pages(struc break; } - status = compact_zone_order(zone, order, gfp_mask); + status = compact_zone_order(zone, order, gfp_mask, + COMPACT_MODE_DIRECT_RECLAIM); rc = max(status, rc); if (zone_watermark_ok(zone, order, watermark, 0, 0)) @@ -547,6 +567,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, }; zone = &pgdat->node_zones[zoneid]; diff --git a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -2148,6 +2149,7 @@ loop_again: * cause too much scanning of the lower zones. */ for (i = 0; i <= end_zone; i++) { + int compaction; struct zone *zone = pgdat->node_zones + i; int nr_slab; @@ -2177,9 +2179,26 @@ loop_again: lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; + + compaction = 0; + if (order && + zone_watermark_ok(zone, 0, + high_wmark_pages(zone), + end_zone, 0) && + !zone_watermark_ok(zone, order, + high_wmark_pages(zone), + end_zone, 0)) { + compact_zone_order(zone, + order, + sc.gfp_mask, + COMPACT_MODE_KSWAPD); + compaction = 1; + } + if (zone->all_unreclaimable) continue; - if (nr_slab == 0 && !zone_reclaimable(zone)) + if (!compaction && nr_slab == 0 && + !zone_reclaimable(zone)) zone->all_unreclaimable = 1; /* * If we've done a decent amount of scanning and From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail172.messagelabs.com (mail172.messagelabs.com [216.82.254.3]) by kanga.kvack.org (Postfix) with SMTP id 1DBA86B0089 for ; Thu, 9 Dec 2010 14:11:14 -0500 (EST) Date: Thu, 9 Dec 2010 20:10:33 +0100 From: Andrea Arcangeli Subject: Re: [PATCH 61 of 66] use compaction for GFP_ATOMIC order > 0 Message-ID: <20101209191033.GK19131@random.random> References: <20101118163124.GG8135@csn.ul.ie> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20101118163124.GG8135@csn.ul.ie> Sender: owner-linux-mm@kvack.org To: Mel Gorman Cc: linux-mm@kvack.org, Linus Torvalds , Andrew Morton , linux-kernel@vger.kernel.org, Marcelo Tosatti , Adam Litke , Avi Kivity , Hugh Dickins , Rik van Riel , Dave Hansen , Benjamin Herrenschmidt , Ingo Molnar , Mike Travis , KAMEZAWA Hiroyuki , Christoph Lameter , Chris Wright , bpicco@redhat.com, KOSAKI Motohiro , Balbir Singh , "Michael S. Tsirkin" , Peter Zijlstra , Johannes Weiner , Daisuke Nishimura , Chris Mason , Borislav Petkov List-ID: On Thu, Nov 18, 2010 at 04:31:24PM +0000, Mel Gorman wrote: > I don't think this is related to THP although I see what you're doing. > It should be handled on its own. I'd also wonder if some of the tg3 > failures are due to MIGRATE_RESERVE not being set properly when > min_free_kbytes is automatically resized. The failures also happened on older kernels except they wouldn't print it in the kernel logs, but you're right we may get better with migrate reserve enabled also on huge systems (it seems hugeadm --set_recommended-min_free_kbytes was doing a little more than its kernel counterpart with large systems with tons of ram, now fixed). My status as far as this patch is concerned: ========= Subject: use compaction for all allocation orders From: Andrea Arcangeli It makes no sense not to enable compaction for small order pages as we don't want to end up with bad order 2 allocations and good and graceful order 9 allocations. Signed-off-by: Andrea Arcangeli --- diff --git a/mm/compaction.c b/mm/compaction.c --- a/mm/compaction.c +++ b/mm/compaction.c @@ -476,7 +495,7 @@ unsigned long try_to_compact_pages(struc * made because an assumption is made that the page allocator can satisfy * the "cheaper" orders without taking special steps */ - if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io) + if (!order || !may_enter_fs || !may_perform_io) return rc; count_vm_event(COMPACTSTALL); =========== Subject: use compaction in kswapd for GFP_ATOMIC order > 0 From: Andrea Arcangeli This takes advantage of memory compaction to properly generate pages of order > 0 if regular page reclaim fails and priority level becomes more severe and we don't reach the proper watermarks. Signed-off-by: Andrea Arcangeli --- diff --git a/include/linux/compaction.h b/include/linux/compaction.h --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -11,6 +11,9 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 +#define COMPACT_MODE_DIRECT_RECLAIM 0 +#define COMPACT_MODE_KSWAPD 1 + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -20,6 +23,9 @@ extern int sysctl_extfrag_handler(struct void __user *buffer, size_t *length, loff_t *ppos); extern int fragmentation_index(struct zone *zone, unsigned int order); +extern unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -59,6 +65,13 @@ static inline unsigned long try_to_compa return COMPACT_CONTINUE; } +static inline unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) +{ + return COMPACT_CONTINUE; +} + static inline void defer_compaction(struct zone *zone) { } diff --git a/mm/compaction.c b/mm/compaction.c --- a/mm/compaction.c +++ b/mm/compaction.c @@ -38,6 +38,8 @@ struct compact_control { unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; + + int compact_mode; }; static unsigned long release_freepages(struct list_head *freelist) @@ -357,10 +359,10 @@ static void update_nr_listpages(struct c } static int compact_finished(struct zone *zone, - struct compact_control *cc) + struct compact_control *cc) { unsigned int order; - unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order); + unsigned long watermark; if (fatal_signal_pending(current)) return COMPACT_PARTIAL; @@ -370,12 +372,27 @@ static int compact_finished(struct zone return COMPACT_COMPLETE; /* Compaction run is not finished if the watermark is not met */ + if (cc->compact_mode != COMPACT_MODE_KSWAPD) + watermark = low_wmark_pages(zone); + else + watermark = high_wmark_pages(zone); + watermark += (1 << cc->order); + if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) return COMPACT_CONTINUE; if (cc->order == -1) return COMPACT_CONTINUE; + /* + * Generating only one page of the right order is not enough + * for kswapd, we must continue until we're above the high + * watermark as a pool for high order GFP_ATOMIC allocations + * too. + */ + if (cc->compact_mode == COMPACT_MODE_KSWAPD) + return COMPACT_CONTINUE; + /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { /* Job done if page is free of the right migratetype */ @@ -433,8 +450,9 @@ static int compact_zone(struct zone *zon return ret; } -static unsigned long compact_zone_order(struct zone *zone, - int order, gfp_t gfp_mask) +unsigned long compact_zone_order(struct zone *zone, + int order, gfp_t gfp_mask, + int compact_mode) { struct compact_control cc = { .nr_freepages = 0, @@ -442,6 +460,7 @@ static unsigned long compact_zone_order( .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, + .compact_mode = compact_mode, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -517,7 +536,8 @@ unsigned long try_to_compact_pages(struc break; } - status = compact_zone_order(zone, order, gfp_mask); + status = compact_zone_order(zone, order, gfp_mask, + COMPACT_MODE_DIRECT_RECLAIM); rc = max(status, rc); if (zone_watermark_ok(zone, order, watermark, 0, 0)) @@ -547,6 +567,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, }; zone = &pgdat->node_zones[zoneid]; diff --git a/mm/vmscan.c b/mm/vmscan.c --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -2148,6 +2149,7 @@ loop_again: * cause too much scanning of the lower zones. */ for (i = 0; i <= end_zone; i++) { + int compaction; struct zone *zone = pgdat->node_zones + i; int nr_slab; @@ -2177,9 +2179,26 @@ loop_again: lru_pages); sc.nr_reclaimed += reclaim_state->reclaimed_slab; total_scanned += sc.nr_scanned; + + compaction = 0; + if (order && + zone_watermark_ok(zone, 0, + high_wmark_pages(zone), + end_zone, 0) && + !zone_watermark_ok(zone, order, + high_wmark_pages(zone), + end_zone, 0)) { + compact_zone_order(zone, + order, + sc.gfp_mask, + COMPACT_MODE_KSWAPD); + compaction = 1; + } + if (zone->all_unreclaimable) continue; - if (nr_slab == 0 && !zone_reclaimable(zone)) + if (!compaction && nr_slab == 0 && + !zone_reclaimable(zone)) zone->all_unreclaimable = 1; /* * If we've done a decent amount of scanning and -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/ Don't email: email@kvack.org