linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@osdl.org>
To: mel@csn.ul.ie (Mel Gorman)
Cc: lhms-devel@lists.sourceforge.net, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, Dave Hansen <haveblue@us.ibm.com>,
	Andy Whitcroft <apw@shadowen.org>,
	Linus Torvalds <torvalds@osdl.org>
Subject: Re: [PATCH] BUG: gfp_zone() not mapping zone modifiers correctly and bad ordering of fallback lists
Date: Fri, 13 Jan 2006 12:16:52 -0800	[thread overview]
Message-ID: <20060113121652.114941a3.akpm@osdl.org> (raw)
In-Reply-To: <20060113155026.GA4811@skynet.ie>

mel@csn.ul.ie (Mel Gorman) wrote:
>
> Hi Andrew,
> 
> This patch is divided into two parts and addresses a bug in how zone
> fallback lists are calculated and how __GFP_* zone modifiers are mapped to
> their equivilant ZONE_* type. It applies to 2.6.15-mm3 and has been tested
> on x86 and ppc64. It has been reported by Yasunori Goto that it boots on
> ia64. Details as follows;

This stuff is nasty.  It'd be nice to split the patch into two (always)
(please).

> build_zonelists() attempts to be smart, and uses highest_zone() so that it
> doesn't attempt to call build_zonelists_node() for empty zones.  However,
> build_zonelists_node() is smart enough to do the right thing by itself and
> build_zonelists() already has the zone index that highest_zone() is meant
> to provide. So, remove the unnecessary function highest_zone().

Dave, Andy: could you please have a think about the fallback list thing?

> The helper function gfp_zone() assumes that the bits used in the zone modifier
> of a GFP flag maps directory on to their ZONE_* equivalent and just applies a
> mask. However, the bits do not map directly and the wrong fallback lists can
> be used. If unluckly, the system can go OOM when plenty of suitable memory
> is available. This patch redefines the __GFP_ zone modifier flags to allow
> a simple mapping to their equivilant ZONE_ type.

Linus, you had a look at this a few weeks ago and I think you had
objections to the gfp-modifiers-are-bitmasks approach?

> Signed-off-by: Mel Gorman <mel@csn.ul.ie>
> diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.15-mm3-clean/include/linux/gfp.h linux-2.6.15-mm3-001_fallbacks/include/linux/gfp.h
> --- linux-2.6.15-mm3-clean/include/linux/gfp.h	2006-01-11 14:24:18.000000000 +0000
> +++ linux-2.6.15-mm3-001_fallbacks/include/linux/gfp.h	2006-01-13 09:18:09.000000000 +0000
> @@ -11,15 +11,19 @@ struct vm_area_struct;
>  /*
>   * GFP bitmasks..
>   */
> -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
> -#define __GFP_DMA	((__force gfp_t)0x01u)
> -#define __GFP_HIGHMEM	((__force gfp_t)0x02u)
> +/*
> + * Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) 
> + * The values here are mapped to their equivilant ZONE_* type by gfp_zone
> + * which makes assumptions on the value of these bits.
> + */
> +#define __GFP_DMA	((__force gfp_t)0x02u)
> +#define __GFP_HIGHMEM	((__force gfp_t)0x01u)
>  #ifdef CONFIG_DMA_IS_DMA32
> -#define __GFP_DMA32	((__force gfp_t)0x01)	/* ZONE_DMA is ZONE_DMA32 */
> +#define __GFP_DMA32	((__force gfp_t)0x02u)	/* ZONE_DMA is ZONE_DMA32 */
>  #elif BITS_PER_LONG < 64
> -#define __GFP_DMA32	((__force gfp_t)0x00)	/* ZONE_NORMAL is ZONE_DMA32 */
> +#define __GFP_DMA32	((__force gfp_t)0x00u)	/* ZONE_NORMAL is ZONE_DMA32 */
>  #else
> -#define __GFP_DMA32	((__force gfp_t)0x04)	/* Has own ZONE_DMA32 */
> +#define __GFP_DMA32	((__force gfp_t)0x03u)	/* Has own ZONE_DMA32 */
>  #endif
>  
>  /*
> @@ -75,10 +79,15 @@ struct vm_area_struct;
>  #define GFP_DMA32	__GFP_DMA32
>  
>  
> +/**
> + * GFP zone modifiers need to be mapped to their equivilant ZONE_ value defined
> + * in linux/mmzone.h to determine what fallback list should be used for the
> + * allocation.
> + */
>  static inline int gfp_zone(gfp_t gfp)
>  {
> -	int zone = GFP_ZONEMASK & (__force int) gfp;
> -	BUG_ON(zone >= GFP_ZONETYPES);
> +	int zone = ((__force int) gfp & GFP_ZONEMASK) ^ 0x2;
> +	BUG_ON(zone >= MAX_NR_ZONES);
>  	return zone;
>  }
>  
> diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.15-mm3-clean/mm/page_alloc.c linux-2.6.15-mm3-001_fallbacks/mm/page_alloc.c
> --- linux-2.6.15-mm3-clean/mm/page_alloc.c	2006-01-11 14:24:18.000000000 +0000
> +++ linux-2.6.15-mm3-001_fallbacks/mm/page_alloc.c	2006-01-13 09:10:17.000000000 +0000
> @@ -1577,18 +1577,6 @@ static int __init build_zonelists_node(p
>  	return nr_zones;
>  }
>  
> -static inline int highest_zone(int zone_bits)
> -{
> -	int res = ZONE_NORMAL;
> -	if (zone_bits & (__force int)__GFP_HIGHMEM)
> -		res = ZONE_HIGHMEM;
> -	if (zone_bits & (__force int)__GFP_DMA32)
> -		res = ZONE_DMA32;
> -	if (zone_bits & (__force int)__GFP_DMA)
> -		res = ZONE_DMA;
> -	return res;
> -}
> -
>  #ifdef CONFIG_NUMA
>  #define MAX_NODE_LOAD (num_online_nodes())
>  static int __initdata node_load[MAX_NUMNODES];
> @@ -1690,13 +1678,11 @@ static void __init build_zonelists(pg_da
>  			node_load[node] += load;
>  		prev_node = node;
>  		load--;
> -		for (i = 0; i < GFP_ZONETYPES; i++) {
> +		for (i = 0; i < MAX_NR_ZONES; i++) {
>  			zonelist = pgdat->node_zonelists + i;
>  			for (j = 0; zonelist->zones[j] != NULL; j++);
>  
> -			k = highest_zone(i);
> -
> -	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
> +	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
>  			zonelist->zones[j] = NULL;
>  		}
>  	}
> @@ -1706,17 +1692,16 @@ static void __init build_zonelists(pg_da
>  
>  static void __init build_zonelists(pg_data_t *pgdat)
>  {
> -	int i, j, k, node, local_node;
> +	int i, j, node, local_node;
>  
>  	local_node = pgdat->node_id;
> -	for (i = 0; i < GFP_ZONETYPES; i++) {
> +	for (i = 0; i < MAX_NR_ZONES; i++) {
>  		struct zonelist *zonelist;
>  
>  		zonelist = pgdat->node_zonelists + i;
>  
>  		j = 0;
> -		k = highest_zone(i);
> - 		j = build_zonelists_node(pgdat, zonelist, j, k);
> + 		j = build_zonelists_node(pgdat, zonelist, j, i);
>   		/*
>   		 * Now we build the zonelist so that it contains the zones
>   		 * of all the other nodes.
> @@ -1728,12 +1713,12 @@ static void __init build_zonelists(pg_da
>  		for (node = local_node + 1; node < MAX_NUMNODES; node++) {
>  			if (!node_online(node))
>  				continue;
> -			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
> +			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
>  		}
>  		for (node = 0; node < local_node; node++) {
>  			if (!node_online(node))
>  				continue;
> -			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
> +			j = build_zonelists_node(NODE_DATA(node), zonelist, j, i);
>  		}
>  
>  		zonelist->zones[j] = NULL;

  parent reply	other threads:[~2006-01-13 20:19 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-13 15:50 [PATCH] BUG: gfp_zone() not mapping zone modifiers correctly and bad ordering of fallback lists Mel Gorman
2006-01-13 18:13 ` Mika Penttilä
2006-01-15 14:00   ` Mel Gorman
2006-01-13 20:16 ` Andrew Morton [this message]
2006-01-14  2:24   ` Dave Hansen
2006-01-14 18:29     ` Mel Gorman
2006-01-19 21:52     ` [PATCH 1/2] GFP_ZONETYPES add commentry on how to calculate Andy Whitcroft
2006-01-19 21:52     ` [PATCH 2/2] GFP_ZONETYPES calculate from GFP_ZONEMASK Andy Whitcroft
2006-01-16 15:57   ` [PATCH] BUG: gfp_zone() not mapping zone modifiers correctly and bad ordering of fallback lists Andy Whitcroft

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060113121652.114941a3.akpm@osdl.org \
    --to=akpm@osdl.org \
    --cc=apw@shadowen.org \
    --cc=haveblue@us.ibm.com \
    --cc=lhms-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).