zoned-2.3.27-E0

Message ID Pine.LNX.4.10.9911121502120.7240-200000@chiara.csoma.elte.hu
State New, archived
Headers show
Series
  • zoned-2.3.27-E0
Related show

Commit Message

Ingo Molnar Nov. 12, 1999, 2:02 p.m. UTC
Stephen noticed that 2.3.27 doesnt boot on <=16MB boxes due to the zoned
allocator changes. The attached patch should fix this. Unfortunately i
found no way to prevent introducing the runtime 'nr_zones' variable.

-- mingo

Comments

Stephen C. Tweedie Nov. 12, 1999, 5:33 p.m. UTC | #1
Hi,

On Fri, 12 Nov 1999 15:02:31 +0100 (CET), Ingo Molnar
<mingo@chiara.csoma.elte.hu> said:

> Stephen noticed that 2.3.27 doesnt boot on <=16MB boxes due to the zoned
> allocator changes. The attached patch should fix this. Unfortunately i
> found no way to prevent introducing the runtime 'nr_zones' variable.

A quick special-case check on zones known to be empty would allow you to
maintain performance even if you have zones which will never have any
pages in them on a given machine.

You need this anyway --- Alan pointed out that it is a significant hit
on benchmarks if, during normal running, one zone fills up and you start
falling back routinely to a lower zone.

--Stephen

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/
Ingo Molnar Nov. 12, 1999, 6:49 p.m. UTC | #2
On Fri, 12 Nov 1999, Stephen C. Tweedie wrote:

> > Stephen noticed that 2.3.27 doesnt boot on <=16MB boxes due to the zoned
> > allocator changes. The attached patch should fix this. Unfortunately i
> > found no way to prevent introducing the runtime 'nr_zones' variable.
> 
> A quick special-case check on zones known to be empty would allow you to
> maintain performance even if you have zones which will never have any
> pages in them on a given machine.

yes, i first did something like this, but it's just as slow in the end.
(well, there is just an academic slowdown anyway)

> You need this anyway --- Alan pointed out that it is a significant hit
> on benchmarks if, during normal running, one zone fills up and you
> start falling back routinely to a lower zone.

at that point we are wasting much more time already walking page tables in
kswapd and try_to_free_pages to free RAM.

and we'd have to get the spinlock to rely on zone->free_pages, and for any
non-page-sized allocation request zone->free_pages is not authorative.

-- mingo


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/

Patch

--- linux/mm/page_alloc.c.orig	Fri Nov 12 01:34:46 1999
+++ linux/mm/page_alloc.c	Fri Nov 12 02:27:22 1999
@@ -51,11 +51,13 @@ 
 
 #ifdef CONFIG_HIGHMEM
 # define ZONE_HIGHMEM		2
-# define NR_ZONES		3
+# define MAX_NR_ZONES		3
 #else
-# define NR_ZONES		2
+# define MAX_NR_ZONES		2
 #endif
 
+int nr_zones = MAX_NR_ZONES;
+
 typedef struct zone_struct {
 	spinlock_t lock;
 	unsigned long offset;
@@ -68,7 +70,7 @@ 
 	char * name;
 } zone_t;
 
-static zone_t zones[NR_ZONES] =
+static zone_t zones[MAX_NR_ZONES] =
 	{
 		{ name: "DMA" },
 		{ name: "Normal" },
@@ -120,12 +122,12 @@ 
 	/*
 	 * Which zone is this page belonging to.
 	 *
-	 * (NR_ZONES is low, and we do not want (yet) to introduce
+	 * (nr_zones is low, and we do not want (yet) to introduce
 	 * put page->zone, it increases the size of mem_map[]
 	 * unnecesserily. This small loop is basically equivalent
 	 * to the previous #ifdef jungle, speed-wise.)
 	 */
-	i = NR_ZONES-1;
+	i = nr_zones-1;
 	zone = zones + i;
 	for ( ; i >= 0; i--, zone--)
 		if (map_nr >= zone->offset)
@@ -345,7 +347,7 @@ 
 
 static inline zone_t * gfp_mask_to_zone (int gfp_mask)
 {
-	zone_t *zone;
+	zone_t *zone, *limit;
 
 #if CONFIG_HIGHMEM
 	if (gfp_mask & __GFP_HIGHMEM)
@@ -356,6 +358,9 @@ 
 			zone = zones + ZONE_DMA;
 		else
 			zone = zones + ZONE_NORMAL;
+	limit = zones + nr_zones-1;
+	if (zone > limit)
+		zone = limit;
 	return zone;
 }
 
@@ -383,7 +388,7 @@ 
 	zone_t *zone;
 
 	sum = 0;
-	for (zone = zones; zone < zones+NR_ZONES; zone++)
+	for (zone = zones; zone < zones+nr_zones; zone++)
 		sum += zone->free_pages;
 	return sum;
 }
@@ -429,7 +434,7 @@ 
 		freepages.low,
 		freepages.high);
 
-	for (type = 0; type < NR_ZONES; type++) {
+	for (type = 0; type < nr_zones; type++) {
 		zone_t *zone = zones + type;
  		unsigned long total = 0;
 
@@ -466,12 +471,12 @@ 
 void __init free_area_init(unsigned int *zones_size)
 {
 	mem_map_t * p;
-	unsigned long i, j;
+	int i, j;
 	unsigned long map_size;
 	unsigned int totalpages, offset;
 
 	totalpages = 0;
-	for (i = 0; i < NR_ZONES; i++)
+	for (i = 0; i < nr_zones; i++)
 		totalpages += zones_size[i];
 	printk("totalpages: %08x\n", totalpages);
 
@@ -514,12 +519,18 @@ 
 	}
 
 	offset = 0;	
-	for (j = 0; j < NR_ZONES; j++) {
+	for (j = 0; j < MAX_NR_ZONES; j++) {
 		zone_t *zone = zones + j;
 		unsigned long mask = -1;
 		unsigned long size;
 
 		size = zones_size[j];
+		printk("zone %d, size %08lx\n", j, size);
+		if (!size) {
+			printk("setting nr_zones to %d\n", nr_zones);
+			nr_zones = j;
+			break;
+		}
 		zone->size = size;
 		zone->offset = offset;
 		zone->pages_low = freepages.low;
--- linux/arch/i386/mm/init.c.orig	Fri Nov 12 01:42:51 1999
+++ linux/arch/i386/mm/init.c	Fri Nov 12 01:45:14 1999
@@ -448,12 +448,18 @@ 
 	kmap_init();
 #endif
 	{
-		unsigned int zones_size[3];
+		unsigned int max_dma_pfn;
+		unsigned int zones_size[3] = { 0, 0, 0 };
 
-		zones_size[0] = virt_to_phys((char *)MAX_DMA_ADDRESS)
-					 >> PAGE_SHIFT;
-		zones_size[1] = max_low_pfn - zones_size[0];
-		zones_size[2] = highend_pfn - zones_size[0] - zones_size[1];
+ 		max_dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS)>>PAGE_SHIFT;
+		if (max_low_pfn < max_dma_pfn)
+			zones_size[0] = max_low_pfn;
+		else {
+			zones_size[0] = max_dma_pfn;
+			zones_size[1] = max_low_pfn - zones_size[0];
+			zones_size[2] = highend_pfn - zones_size[0]
+							 - zones_size[1];
+		}
 
 		free_area_init(zones_size);
 	}