linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC][patch] mm: single pcp lists
@ 2006-02-22 14:32 Nick Piggin
  2006-02-22 14:33 ` [patch] mm: pcp stats Nick Piggin
  2006-02-22 22:20 ` [RFC][patch] mm: single pcp lists Joel Schopp
  0 siblings, 2 replies; 4+ messages in thread
From: Nick Piggin @ 2006-02-22 14:32 UTC (permalink / raw)
  To: Linux Kernel Mailing List, Linux Memory Management List

Having a hot and a cold pcp list means that:

- cold pages are overlooked when when a hot page is needed but none available.
- when the hot list spills, it doesn't fill the cold list if it is low.

Use a single pcp list to solve both these problems.  Disallow cold page
allocation from taking hot pages though.

Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -44,15 +44,13 @@ struct zone_padding {
 #define ZONE_PADDING(name)
 #endif
 
-struct per_cpu_pages {
+struct per_cpu_pageset {
+	struct list_head list;	/* the list of pages */
 	int count;		/* number of pages in the list */
+	int cold_count;		/* number of cold pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
-	struct list_head list;	/* the list of pages */
-};
 
-struct per_cpu_pageset {
-	struct per_cpu_pages pcp[2];	/* 0: hot.  1: cold */
 #ifdef CONFIG_NUMA
 	unsigned long numa_hit;		/* allocated in intended node */
 	unsigned long numa_miss;	/* allocated in non intended node */
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -598,27 +598,24 @@ static int rmqueue_bulk(struct zone *zon
 void drain_remote_pages(void)
 {
 	struct zone *zone;
-	int i;
 	unsigned long flags;
 
-	local_irq_save(flags);
 	for_each_zone(zone) {
-		struct per_cpu_pageset *pset;
-
 		/* Do not drain local pagesets */
 		if (zone->zone_pgdat->node_id == numa_node_id())
 			continue;
 
-		pset = zone_pcp(zone, smp_processor_id());
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
-
-			pcp = &pset->pcp[i];
-			free_pages_bulk(zone, pcp->count, &pcp->list, 0);
-			pcp->count = 0;
+		local_irq_save(flags);
+		if (zone->zone_pgdat->node_id != numa_node_id()) {
+			struct per_cpu_pageset *pset;
+
+			pset = zone_pcp(zone, smp_processor_id());
+			free_pages_bulk(zone, pset->count, &pset->list, 0);
+			pset->cold_count = 0;
+			pset->count = 0;
 		}
+		local_irq_restore(flags);
 	}
-	local_irq_restore(flags);
 }
 #endif
 
@@ -627,21 +624,16 @@ static void __drain_pages(unsigned int c
 {
 	unsigned long flags;
 	struct zone *zone;
-	int i;
 
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
 
 		pset = zone_pcp(zone, cpu);
-		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
-			struct per_cpu_pages *pcp;
-
-			pcp = &pset->pcp[i];
-			local_irq_save(flags);
-			free_pages_bulk(zone, pcp->count, &pcp->list, 0);
-			pcp->count = 0;
-			local_irq_restore(flags);
-		}
+		local_irq_save(flags);
+		free_pages_bulk(zone, pset->count, &pset->list, 0);
+		pset->cold_count = 0;
+		pset->count = 0;
+		local_irq_restore(flags);
 	}
 }
 #endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
@@ -713,7 +705,7 @@ static void zone_statistics(struct zonel
 static void fastcall free_hot_cold_page(struct page *page, int cold)
 {
 	struct zone *zone = page_zone(page);
-	struct per_cpu_pages *pcp;
+	struct per_cpu_pageset *pset;
 	unsigned long flags;
 
 	arch_free_page(page, 0);
@@ -725,14 +717,22 @@ static void fastcall free_hot_cold_page(
 
 	kernel_map_pages(page, 1, 0);
 
-	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+	pset = zone_pcp(zone, get_cpu());
 	local_irq_save(flags);
 	__inc_page_state(pgfree);
-	list_add(&page->lru, &pcp->list);
-	pcp->count++;
-	if (pcp->count >= pcp->high) {
-		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
-		pcp->count -= pcp->batch;
+	pset->count++;
+	if (cold) {
+		pset->cold_count++;
+		list_add_tail(&page->lru, &pset->list);
+	} else {
+		list_add(&page->lru, &pset->list);
+	}
+
+	if (pset->count > pset->high) {
+		int count = pset->batch;
+		free_pages_bulk(zone, count, &pset->list, 0);
+		pset->cold_count -= min(count, pset->cold_count);
+		pset->count -= count;
 	}
 	local_irq_restore(flags);
 	put_cpu();
@@ -782,19 +782,30 @@ static struct page *buffered_rmqueue(str
 again:
 	cpu  = get_cpu();
 	if (likely(order == 0)) {
-		struct per_cpu_pages *pcp;
+		struct per_cpu_pageset *pset;
 
-		pcp = &zone_pcp(zone, cpu)->pcp[cold];
+		pset = zone_pcp(zone, cpu);
 		local_irq_save(flags);
-		if (!pcp->count) {
-			pcp->count += rmqueue_bulk(zone, 0,
-						pcp->batch, &pcp->list);
-			if (unlikely(!pcp->count))
+		if (!pset->count || (cold && !pset->cold_count &&
+				pset->count <= pset->high - (pset->high>>2))) {
+			int count;
+			count = rmqueue_bulk(zone, 0, pset->batch, &pset->list);
+			if (unlikely(!count))
 				goto failed;
+			pset->count += count;
+			pset->cold_count += count;
+		}
+
+		pset->count--;
+		if (cold) {
+			page = list_entry(pset->list.prev, struct page, lru);
+			if (pset->cold_count)
+				pset->cold_count--;
+		} else {
+			page = list_entry(pset->list.next, struct page, lru);
+			pset->cold_count = min(pset->cold_count, pset->count);
 		}
-		page = list_entry(pcp->list.next, struct page, lru);
 		list_del(&page->lru);
-		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
@@ -1385,7 +1396,7 @@ void si_meminfo_node(struct sysinfo *val
 void show_free_areas(void)
 {
 	struct page_state ps;
-	int cpu, temperature;
+	int cpu;
 	unsigned long active;
 	unsigned long inactive;
 	unsigned long free;
@@ -1402,17 +1413,11 @@ void show_free_areas(void)
 			printk("\n");
 
 		for_each_online_cpu(cpu) {
-			struct per_cpu_pageset *pageset;
-
-			pageset = zone_pcp(zone, cpu);
+			struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
 
-			for (temperature = 0; temperature < 2; temperature++)
-				printk("cpu %d %s: high %d, batch %d used:%d\n",
-					cpu,
-					temperature ? "cold" : "hot",
-					pageset->pcp[temperature].high,
-					pageset->pcp[temperature].batch,
-					pageset->pcp[temperature].count);
+			printk("cpu %d: high %d, batch %d, pages %d, cold %d\n",
+				cpu, pset->high, pset->batch,
+				pset->count, pset->cold_count);
 		}
 	}
 
@@ -1845,23 +1850,14 @@ static int __cpuinit zone_batchsize(stru
 	return batch;
 }
 
-inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+static inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
-	struct per_cpu_pages *pcp;
-
 	memset(p, 0, sizeof(*p));
-
-	pcp = &p->pcp[0];		/* hot */
-	pcp->count = 0;
-	pcp->high = 6 * batch;
-	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
-
-	pcp = &p->pcp[1];		/* cold*/
-	pcp->count = 0;
-	pcp->high = 2 * batch;
-	pcp->batch = max(1UL, batch/2);
-	INIT_LIST_HEAD(&pcp->list);
+	p->count = 0;
+	p->cold_count = 0;
+	p->high = 6 * batch;
+	p->batch = max(1UL, 1 * batch);
+	INIT_LIST_HEAD(&p->list);
 }
 
 /*
@@ -1869,16 +1865,13 @@ inline void setup_pageset(struct per_cpu
  * to the value high for the pageset p.
  */
 
-static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+static void setup_pagelist_highmark(struct per_cpu_pageset *pset,
 				unsigned long high)
 {
-	struct per_cpu_pages *pcp;
-
-	pcp = &p->pcp[0]; /* hot list */
-	pcp->high = high;
-	pcp->batch = max(1UL, high/4);
-	if ((high/4) > (PAGE_SHIFT * 8))
-		pcp->batch = PAGE_SHIFT * 8;
+	pset->high = high;
+	pset->batch = max(1UL, high/4);
+	if (pset->batch > PAGE_SHIFT * 8)
+		pset->batch = PAGE_SHIFT * 8;
 }
 
 
@@ -2259,27 +2252,15 @@ static int zoneinfo_show(struct seq_file
 			   ")"
 			   "\n  pagesets");
 		for_each_online_cpu(i) {
-			struct per_cpu_pageset *pageset;
-			int j;
+			struct per_cpu_pageset *pset;
 
-			pageset = zone_pcp(zone, i);
-			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-				if (pageset->pcp[j].count)
-					break;
-			}
-			if (j == ARRAY_SIZE(pageset->pcp))
-				continue;
-			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-				seq_printf(m,
-					   "\n    cpu: %i pcp: %i"
-					   "\n              count: %i"
-					   "\n              high:  %i"
-					   "\n              batch: %i",
-					   i, j,
-					   pageset->pcp[j].count,
-					   pageset->pcp[j].high,
-					   pageset->pcp[j].batch);
-			}
+			pset = zone_pcp(zone, i);
+			seq_printf(m,
+				   "\n    cpu: %i, pcp"
+				   "\n              count: %i"
+				   "\n              high:  %i"
+				   "\n              batch: %i",
+				   i, pset->count, pset->high, pset->batch);
 #ifdef CONFIG_NUMA
 			seq_printf(m,
 				   "\n            numa_hit:       %lu"
@@ -2288,12 +2269,12 @@ static int zoneinfo_show(struct seq_file
 				   "\n            interleave_hit: %lu"
 				   "\n            local_node:     %lu"
 				   "\n            other_node:     %lu",
-				   pageset->numa_hit,
-				   pageset->numa_miss,
-				   pageset->numa_foreign,
-				   pageset->interleave_hit,
-				   pageset->local_node,
-				   pageset->other_node);
+				   pset->numa_hit,
+				   pset->numa_miss,
+				   pset->numa_foreign,
+				   pset->interleave_hit,
+				   pset->local_node,
+				   pset->other_node);
 #endif
 		}
 		seq_printf(m,

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [patch] mm: pcp stats
  2006-02-22 14:32 [RFC][patch] mm: single pcp lists Nick Piggin
@ 2006-02-22 14:33 ` Nick Piggin
  2006-02-22 22:20 ` [RFC][patch] mm: single pcp lists Joel Schopp
  1 sibling, 0 replies; 4+ messages in thread
From: Nick Piggin @ 2006-02-22 14:33 UTC (permalink / raw)
  To: Linux Kernel Mailing List, Linux Memory Management List

Following patch offers some stats about pageset activity.

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -720,6 +720,7 @@ static void fastcall free_hot_cold_page(
 	pset = zone_pcp(zone, get_cpu());
 	local_irq_save(flags);
 	__inc_page_state(pgfree);
+	__inc_page_state(pcpfree);
 	pset->count++;
 	if (cold) {
 		pset->cold_count++;
@@ -733,6 +734,7 @@ static void fastcall free_hot_cold_page(
 		free_pages_bulk(zone, count, &pset->list, 0);
 		pset->cold_count -= min(count, pset->cold_count);
 		pset->count -= count;
+		__mod_page_state(pcpspill, count);
 	}
 	local_irq_restore(flags);
 	put_cpu();
@@ -794,8 +796,10 @@ again:
 				goto failed;
 			pset->count += count;
 			pset->cold_count += count;
+			__mod_page_state(pcpfill, count);
 		}
 
+		__inc_page_state(pcpalloc);
 		pset->count--;
 		if (cold) {
 			page = list_entry(pset->list.prev, struct page, lru);
@@ -2319,6 +2323,10 @@ static char *vmstat_text[] = {
 	"pgalloc_dma",
 
 	"pgfree",
+	"pcpalloc",
+	"pcpfree",
+	"pcpfill",
+	"pcpspill",
 	"pgactivate",
 	"pgdeactivate",
 
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -117,6 +117,10 @@ struct page_state {
 	unsigned long pgalloc_dma;
 
 	unsigned long pgfree;		/* page freeings */
+	unsigned long pcpalloc;		/* pages allocated from pcp lists */
+	unsigned long pcpfree;		/* pages freed into pcp lists */
+	unsigned long pcpfill;		/* pages allocated into pcp lists */
+	unsigned long pcpspill;		/* pages freed from pcp lists */
 	unsigned long pgactivate;	/* pages moved inactive->active */
 	unsigned long pgdeactivate;	/* pages moved active->inactive */
 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC][patch] mm: single pcp lists
  2006-02-22 14:32 [RFC][patch] mm: single pcp lists Nick Piggin
  2006-02-22 14:33 ` [patch] mm: pcp stats Nick Piggin
@ 2006-02-22 22:20 ` Joel Schopp
  2006-02-23  4:28   ` Nick Piggin
  1 sibling, 1 reply; 4+ messages in thread
From: Joel Schopp @ 2006-02-22 22:20 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List, Linux Memory Management List

> -struct per_cpu_pages {
> +struct per_cpu_pageset {
> +	struct list_head list;	/* the list of pages */
>  	int count;		/* number of pages in the list */
> +	int cold_count;		/* number of cold pages in the list */
>  	int high;		/* high watermark, emptying needed */
>  	int batch;		/* chunk size for buddy add/remove */
> -	struct list_head list;	/* the list of pages */
> -};

Any particular reason to move the list_head to the front?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC][patch] mm: single pcp lists
  2006-02-22 22:20 ` [RFC][patch] mm: single pcp lists Joel Schopp
@ 2006-02-23  4:28   ` Nick Piggin
  0 siblings, 0 replies; 4+ messages in thread
From: Nick Piggin @ 2006-02-23  4:28 UTC (permalink / raw)
  To: Joel Schopp
  Cc: Nick Piggin, Linux Kernel Mailing List, Linux Memory Management List

Joel Schopp wrote:
>> -struct per_cpu_pages {
>> +struct per_cpu_pageset {
>> +    struct list_head list;    /* the list of pages */
>>      int count;        /* number of pages in the list */
>> +    int cold_count;        /* number of cold pages in the list */
>>      int high;        /* high watermark, emptying needed */
>>      int batch;        /* chunk size for buddy add/remove */
>> -    struct list_head list;    /* the list of pages */
>> -};
> 
> 
> Any particular reason to move the list_head to the front?
> 

Nothing particular. I think it was for alignment at one stage
before cold_count was added.

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2006-02-23  4:28 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-02-22 14:32 [RFC][patch] mm: single pcp lists Nick Piggin
2006-02-22 14:33 ` [patch] mm: pcp stats Nick Piggin
2006-02-22 22:20 ` [RFC][patch] mm: single pcp lists Joel Schopp
2006-02-23  4:28   ` Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).