linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch 0/12] mm: optimisations
@ 2005-11-21 11:37 Nick Piggin
  2005-11-21 11:41 ` [patch 11/12] mm: page_alloc cleanups Nick Piggin
                   ` (13 more replies)
  0 siblings, 14 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 11:37 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

The following patchset against 2.6.15-rc2 contains optimisations to the
mm subsystem, mainly the page allocator. Single threaded write-fault
based allocation performance is improved ~5% on G5 with SMP kernel, and
~7% on P4 Xeon with SMP kernel (this measurement includes the full fault
path, page copy, unmapping, and page freeing, so actual kernel allocator
improvement should be larger).

Thanks to feedback from Christoph, Andi, and Bob Picco.

This patchset is cut down to include just straight optimisations and no
behavioural changes.

Nick

-- 
SUSE Labs, Novell Inc.


Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 11/12] mm: page_alloc cleanups
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
@ 2005-11-21 11:41 ` Nick Piggin
  2005-11-21 12:00 ` [patch 7/12] mm: bad_page opt Nick Piggin
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 11:41 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Small cleanups that does not change generated code with the gcc's
I've tested with.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -438,8 +438,7 @@ void __free_pages_ok(struct page *page, 
  *
  * -- wli
  */
-static inline struct page *
-expand(struct zone *zone, struct page *page,
+static inline void expand(struct zone *zone, struct page *page,
  	int low, int high, struct free_area *area)
 {
 	unsigned long size = 1 << high;
@@ -453,7 +452,6 @@ expand(struct zone *zone, struct page *p
 		area->nr_free++;
 		set_page_order(&page[size], high);
 	}
-	return page;
 }
 
 /*
@@ -505,7 +503,8 @@ static struct page *__rmqueue(struct zon
 		rmv_page_order(page);
 		area->nr_free--;
 		zone->free_pages -= 1UL << order;
-		return expand(zone, page, order, current_order, area);
+		expand(zone, page, order, current_order, area);
+		return page;
 	}
 
 	return NULL;
@@ -520,19 +519,16 @@ static int rmqueue_bulk(struct zone *zon
 			unsigned long count, struct list_head *list)
 {
 	int i;
-	int allocated = 0;
-	struct page *page;
 	
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
-		page = __rmqueue(zone, order);
-		if (page == NULL)
+		struct page *page = __rmqueue(zone, order);
+		if (unlikely(page == NULL))
 			break;
-		allocated++;
 		list_add_tail(&page->lru, list);
 	}
 	spin_unlock(&zone->lock);
-	return allocated;
+	return i;
 }
 
 #ifdef CONFIG_NUMA
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 7/12] mm: bad_page opt
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
  2005-11-21 11:41 ` [patch 11/12] mm: page_alloc cleanups Nick Piggin
@ 2005-11-21 12:00 ` Nick Piggin
  2005-11-21 12:00 ` [patch 8/12] mm: remove pcp low Nick Piggin
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 12:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Cut down size slightly by not passing bad_page the function name (it should
be able to be determined by dump_stack()). And cut down the number of printks
in bad_page.

Also, cut down some branching in the destroy_compound_page path.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -130,16 +130,15 @@ static inline int bad_range(struct zone 
 }
 #endif
 
-static void bad_page(const char *function, struct page *page)
+static void bad_page(struct page *page)
 {
-	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
-		function, current->comm, page);
-	printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-		(int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
-		page->mapping, page_mapcount(page), page_count(page));
-	printk(KERN_EMERG "Backtrace:\n");
+	printk(KERN_EMERG "Bad page state in process '%s'\n"
+		"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
+		"Trying to fix it up, but a reboot is needed\n",
+		current->comm, page, (int)(2*sizeof(unsigned long)),
+		(unsigned long)page->flags, page->mapping,
+		page_mapcount(page), page_count(page));
 	dump_stack();
-	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n");
 	page->flags &= ~(1 << PG_lru	|
 			1 << PG_private |
 			1 << PG_locked	|
@@ -197,19 +196,15 @@ static void destroy_compound_page(struct
 	int i;
 	int nr_pages = 1 << order;
 
-	if (!PageCompound(page))
-		return;
-
-	if (page[1].index != order)
-		bad_page(__FUNCTION__, page);
+	if (unlikely(page[1].index != order))
+		bad_page(page);
 
 	for (i = 0; i < nr_pages; i++) {
 		struct page *p = page + i;
 
-		if (!PageCompound(p))
-			bad_page(__FUNCTION__, page);
-		if (page_private(p) != (unsigned long)page)
-			bad_page(__FUNCTION__, page);
+		if (unlikely(!PageCompound(p) |
+				(page_private(p) != (unsigned long)page)))
+			bad_page(page);
 		ClearPageCompound(p);
 	}
 }
@@ -320,7 +315,7 @@ static inline void __free_pages_bulk (st
 	unsigned long page_idx;
 	int order_size = 1 << order;
 
-	if (unlikely(order))
+	if (unlikely(PageCompound(page)))
 		destroy_compound_page(page, order);
 
 	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
@@ -352,7 +347,7 @@ static inline void __free_pages_bulk (st
 	zone->free_area[order].nr_free++;
 }
 
-static inline void free_pages_check(const char *function, struct page *page)
+static inline void free_pages_check(struct page *page)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
@@ -367,7 +362,7 @@ static inline void free_pages_check(cons
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved ))))
-		bad_page(function, page);
+		bad_page(page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
 }
@@ -421,7 +416,7 @@ void __free_pages_ok(struct page *page, 
 #endif
 
 	for (i = 0 ; i < (1 << order) ; ++i)
-		free_pages_check(__FUNCTION__, page + i);
+		free_pages_check(page + i);
 	list_add(&page->lru, &list);
 	kernel_map_pages(page, 1<<order, 0);
 	local_irq_save(flags);
@@ -481,7 +476,7 @@ static void prep_new_page(struct page *p
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved ))))
-		bad_page(__FUNCTION__, page);
+		bad_page(page);
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
 			1 << PG_referenced | 1 << PG_arch_1 |
@@ -677,7 +672,7 @@ static void fastcall free_hot_cold_page(
 	inc_page_state(pgfree);
 	if (PageAnon(page))
 		page->mapping = NULL;
-	free_pages_check(__FUNCTION__, page);
+	free_pages_check(page);
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 	local_irq_save(flags);
 	list_add(&page->lru, &pcp->list);
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 8/12] mm: remove pcp low
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
  2005-11-21 11:41 ` [patch 11/12] mm: page_alloc cleanups Nick Piggin
  2005-11-21 12:00 ` [patch 7/12] mm: bad_page opt Nick Piggin
@ 2005-11-21 12:00 ` Nick Piggin
  2005-11-21 12:07 ` [patch 9/12] mm: page_state opt Nick Piggin
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 12:00 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

struct per_cpu_pages.low is useless. Remove it.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h
+++ linux-2.6/include/linux/mmzone.h
@@ -46,7 +46,6 @@ struct zone_padding {
 
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
-	int low;		/* low watermark, refill needed */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
 	struct list_head list;	/* the list of pages */
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -719,7 +719,7 @@ buffered_rmqueue(struct zone *zone, int 
 
 		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 		local_irq_save(flags);
-		if (pcp->count <= pcp->low)
+		if (!pcp->count)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
 		if (likely(pcp->count)) {
@@ -1314,10 +1314,9 @@ void show_free_areas(void)
 			pageset = zone_pcp(zone, cpu);
 
 			for (temperature = 0; temperature < 2; temperature++)
-				printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
+				printk("cpu %d %s: high %d, batch %d used:%d\n",
 					cpu,
 					temperature ? "cold" : "hot",
-					pageset->pcp[temperature].low,
 					pageset->pcp[temperature].high,
 					pageset->pcp[temperature].batch,
 					pageset->pcp[temperature].count);
@@ -1761,14 +1760,12 @@ inline void setup_pageset(struct per_cpu
 
 	pcp = &p->pcp[0];		/* hot */
 	pcp->count = 0;
-	pcp->low = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
 	INIT_LIST_HEAD(&pcp->list);
 
 	pcp = &p->pcp[1];		/* cold*/
 	pcp->count = 0;
-	pcp->low = 0;
 	pcp->high = 2 * batch;
 	pcp->batch = max(1UL, batch/2);
 	INIT_LIST_HEAD(&pcp->list);
@@ -2164,12 +2161,10 @@ static int zoneinfo_show(struct seq_file
 				seq_printf(m,
 					   "\n    cpu: %i pcp: %i"
 					   "\n              count: %i"
-					   "\n              low:   %i"
 					   "\n              high:  %i"
 					   "\n              batch: %i",
 					   i, j,
 					   pageset->pcp[j].count,
-					   pageset->pcp[j].low,
 					   pageset->pcp[j].high,
 					   pageset->pcp[j].batch);
 			}
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 9/12] mm: page_state opt
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (2 preceding siblings ...)
  2005-11-21 12:00 ` [patch 8/12] mm: remove pcp low Nick Piggin
@ 2005-11-21 12:07 ` Nick Piggin
  2005-11-22  7:54   ` Andrew Morton
  2005-11-21 12:08 ` [patch 12/12] mm: rmap opt Nick Piggin
                   ` (9 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 12:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Optimise page_state manipulations by introducing interrupt unsafe accessors
to page_state fields. Callers must provide their own locking (either disable
interrupts or not update from interrupt context).

Switch over the hot callsites that can easily be moved under interrupts
off sections.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -136,31 +136,52 @@ struct page_state {
 extern void get_page_state(struct page_state *ret);
 extern void get_page_state_node(struct page_state *ret, int node);
 extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned long offset);
-extern void __mod_page_state(unsigned long offset, unsigned long delta);
+extern unsigned long read_page_state_offset(unsigned long offset);
+extern void mod_page_state_offset(unsigned long offset, unsigned long delta);
+extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
-	__read_page_state(offsetof(struct page_state, member))
+	read_page_state_offset(offsetof(struct page_state, member))
 
 #define mod_page_state(member, delta)	\
-	__mod_page_state(offsetof(struct page_state, member), (delta))
+	mod_page_state_offset(offsetof(struct page_state, member), (delta))
 
-#define inc_page_state(member)	mod_page_state(member, 1UL)
-#define dec_page_state(member)	mod_page_state(member, 0UL - 1)
-#define add_page_state(member,delta) mod_page_state(member, (delta))
-#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
-
-#define mod_page_state_zone(zone, member, delta)				\
-	do {									\
-		unsigned offset;						\
-		if (is_highmem(zone))						\
-			offset = offsetof(struct page_state, member##_high);	\
-		else if (is_normal(zone))					\
-			offset = offsetof(struct page_state, member##_normal);	\
-		else								\
-			offset = offsetof(struct page_state, member##_dma);	\
-		__mod_page_state(offset, (delta));				\
-	} while (0)
+#define __mod_page_state(member, delta)	\
+	__mod_page_state_offset(offsetof(struct page_state, member), (delta))
+
+#define inc_page_state(member)		mod_page_state(member, 1UL)
+#define dec_page_state(member)		mod_page_state(member, 0UL - 1)
+#define add_page_state(member,delta)	mod_page_state(member, (delta))
+#define sub_page_state(member,delta)	mod_page_state(member, 0UL - (delta))
+
+#define __inc_page_state(member)	__mod_page_state(member, 1UL)
+#define __dec_page_state(member)	__mod_page_state(member, 0UL - 1)
+#define __add_page_state(member,delta)	__mod_page_state(member, (delta))
+#define __sub_page_state(member,delta)	__mod_page_state(member, 0UL - (delta))
+
+#define page_state(member) (*__page_state(offsetof(struct page_state, member)))
+
+#define state_zone_offset(zone, member)					\
+({									\
+	unsigned offset;						\
+	if (is_highmem(zone))						\
+		offset = offsetof(struct page_state, member##_high);	\
+	else if (is_normal(zone))					\
+		offset = offsetof(struct page_state, member##_normal);	\
+	else								\
+		offset = offsetof(struct page_state, member##_dma);	\
+	offset;								\
+})
+
+#define __mod_page_state_zone(zone, member, delta)			\
+ do {									\
+	__mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
+ } while (0)
+
+#define mod_page_state_zone(zone, member, delta)			\
+ do {									\
+	mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
+ } while (0)
 
 /*
  * Manipulation of page state flags
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -407,8 +407,6 @@ void __free_pages_ok(struct page *page, 
 
 	arch_free_page(page, order);
 
-	mod_page_state(pgfree, 1 << order);
-
 #ifndef CONFIG_MMU
 	if (order > 0)
 		for (i = 1 ; i < (1 << order) ; ++i)
@@ -420,6 +418,7 @@ void __free_pages_ok(struct page *page, 
 	list_add(&page->lru, &list);
 	kernel_map_pages(page, 1<<order, 0);
 	local_irq_save(flags);
+	__mod_page_state(pgfree, 1 << order);
 	free_pages_bulk(page_zone(page), 1, &list, order);
 	local_irq_restore(flags);
 }
@@ -630,18 +629,14 @@ void drain_local_pages(void)
 }
 #endif /* CONFIG_PM */
 
-static void zone_statistics(struct zonelist *zonelist, struct zone *z)
+static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
 {
 #ifdef CONFIG_NUMA
-	unsigned long flags;
-	int cpu;
 	pg_data_t *pg = z->zone_pgdat;
 	pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
 	struct per_cpu_pageset *p;
 
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	p = zone_pcp(z,cpu);
+	p = zone_pcp(z, cpu);
 	if (pg == orig) {
 		p->numa_hit++;
 	} else {
@@ -652,7 +647,6 @@ static void zone_statistics(struct zonel
 		p->local_node++;
 	else
 		p->other_node++;
-	local_irq_restore(flags);
 #endif
 }
 
@@ -669,12 +663,12 @@ static void fastcall free_hot_cold_page(
 	arch_free_page(page, 0);
 
 	kernel_map_pages(page, 1, 0);
-	inc_page_state(pgfree);
 	if (PageAnon(page))
 		page->mapping = NULL;
 	free_pages_check(page);
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 	local_irq_save(flags);
+	__inc_page_state(pgfree);
 	list_add(&page->lru, &pcp->list);
 	pcp->count++;
 	if (pcp->count >= pcp->high)
@@ -707,46 +701,55 @@ static inline void prep_zero_page(struct
  * we cheat by calling it from here, in the order > 0 path.  Saves a branch
  * or two.
  */
-static struct page *
-buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
+static struct page *buffered_rmqueue(struct zonelist *zonelist,
+			struct zone *zone, int order, gfp_t gfp_flags)
 {
 	unsigned long flags;
-	struct page *page = NULL;
+	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
+	int cpu = get_cpu();
 
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
 
-		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+		pcp = &zone_pcp(zone, cpu)->pcp[cold];
 		local_irq_save(flags);
-		if (!pcp->count)
+		if (!pcp->count) {
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (likely(pcp->count)) {
-			page = list_entry(pcp->list.next, struct page, lru);
-			list_del(&page->lru);
-			pcp->count--;
+			if (unlikely(!pcp->count))
+				goto failed;
 		}
-		local_irq_restore(flags);
-		put_cpu();
+		page = list_entry(pcp->list.next, struct page, lru);
+		list_del(&page->lru);
+		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
-		spin_unlock_irqrestore(&zone->lock, flags);
+		spin_unlock(&zone->lock);
+		if (!page)
+			goto failed;
 	}
 
-	if (page != NULL) {
-		BUG_ON(bad_range(zone, page));
-		mod_page_state_zone(zone, pgalloc, 1 << order);
-		prep_new_page(page, order);
+	__mod_page_state_zone(zone, pgalloc, 1 << order);
+	zone_statistics(zonelist, zone, cpu);
+	local_irq_restore(flags);
+	put_cpu();
 
-		if (gfp_flags & __GFP_ZERO)
-			prep_zero_page(page, order, gfp_flags);
+	BUG_ON(bad_range(zone, page));
+	prep_new_page(page, order);
 
-		if (order && (gfp_flags & __GFP_COMP))
-			prep_compound_page(page, order);
-	}
+	if (gfp_flags & __GFP_ZERO)
+		prep_zero_page(page, order, gfp_flags);
+
+	if (order && (gfp_flags & __GFP_COMP))
+		prep_compound_page(page, order);
 	return page;
+
+failed:
+	local_irq_restore(flags);
+	put_cpu();
+	return NULL;
 }
 
 #define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
@@ -812,9 +815,8 @@ get_page_from_freelist(gfp_t gfp_mask, u
 				continue;
 		}
 
-		page = buffered_rmqueue(*z, order, gfp_mask);
+		page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
 		if (page) {
-			zone_statistics(zonelist, *z);
 			break;
 		}
 	} while (*(++z) != NULL);
@@ -1191,7 +1193,7 @@ void get_full_page_state(struct page_sta
 	__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
 }
 
-unsigned long __read_page_state(unsigned long offset)
+unsigned long read_page_state_offset(unsigned long offset)
 {
 	unsigned long ret = 0;
 	int cpu;
@@ -1205,18 +1207,26 @@ unsigned long __read_page_state(unsigned
 	return ret;
 }
 
-void __mod_page_state(unsigned long offset, unsigned long delta)
+void __mod_page_state_offset(unsigned long offset, unsigned long delta)
+{
+	void *ptr;
+
+	ptr = &__get_cpu_var(page_states);
+	*(unsigned long *)(ptr + offset) += delta;
+}
+EXPORT_SYMBOL(__mod_page_state_offset);
+
+void mod_page_state_offset(unsigned long offset, unsigned long delta)
 {
 	unsigned long flags;
-	void* ptr;
+	void *ptr;
 
-	local_irq_save(flags);
 	ptr = &__get_cpu_var(page_states);
-	*(unsigned long*)(ptr + offset) += delta;
+	local_irq_save(flags);
+	*(unsigned long *)(ptr + offset) += delta;
 	local_irq_restore(flags);
 }
-
-EXPORT_SYMBOL(__mod_page_state);
+EXPORT_SYMBOL(mod_page_state_offset);
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c
+++ linux-2.6/mm/vmscan.c
@@ -641,17 +641,18 @@ static void shrink_cache(struct zone *zo
 			goto done;
 
 		max_scan -= nr_scan;
-		if (current_is_kswapd())
-			mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
-		else
-			mod_page_state_zone(zone, pgscan_direct, nr_scan);
 		nr_freed = shrink_list(&page_list, sc);
-		if (current_is_kswapd())
-			mod_page_state(kswapd_steal, nr_freed);
-		mod_page_state_zone(zone, pgsteal, nr_freed);
 		sc->nr_to_reclaim -= nr_freed;
 
-		spin_lock_irq(&zone->lru_lock);
+		local_irq_disable();
+		if (current_is_kswapd()) {
+			__mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
+			__mod_page_state(kswapd_steal, nr_freed);
+		} else
+			__mod_page_state_zone(zone, pgscan_direct, nr_scan);
+		__mod_page_state_zone(zone, pgsteal, nr_freed);
+
+		spin_lock(&zone->lru_lock);
 		/*
 		 * Put back any unfreeable pages.
 		 */
@@ -813,11 +814,13 @@ refill_inactive_zone(struct zone *zone, 
 		}
 	}
 	zone->nr_active += pgmoved;
-	spin_unlock_irq(&zone->lru_lock);
-	pagevec_release(&pvec);
+	spin_unlock(&zone->lru_lock);
+
+	__mod_page_state_zone(zone, pgrefill, pgscanned);
+	__mod_page_state(pgdeactivate, pgdeactivate);
+	local_irq_enable();
 
-	mod_page_state_zone(zone, pgrefill, pgscanned);
-	mod_page_state(pgdeactivate, pgdeactivate);
+	pagevec_release(&pvec);
 }
 
 /*
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -459,7 +459,11 @@ void page_add_anon_rmap(struct page *pag
 
 		page->index = linear_page_index(vma, address);
 
-		inc_page_state(nr_mapped);
+		/*
+		 * nr_mapped state can be updated without turning off
+		 * interrupts because it is not modified via interrupt.
+		 */
+		__inc_page_state(nr_mapped);
 	}
 	/* else checking page index and mapping is racy */
 }
@@ -476,7 +480,7 @@ void page_add_file_rmap(struct page *pag
 	BUG_ON(!pfn_valid(page_to_pfn(page)));
 
 	if (atomic_inc_and_test(&page->_mapcount))
-		inc_page_state(nr_mapped);
+		__inc_page_state(nr_mapped);
 }
 
 /**
@@ -500,7 +504,7 @@ void page_remove_rmap(struct page *page)
 		 */
 		if (page_test_and_clear_dirty(page))
 			set_page_dirty(page);
-		dec_page_state(nr_mapped);
+		__dec_page_state(nr_mapped);
 	}
 }
 
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 12/12] mm: rmap opt
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (3 preceding siblings ...)
  2005-11-21 12:07 ` [patch 9/12] mm: page_state opt Nick Piggin
@ 2005-11-21 12:08 ` Nick Piggin
  2005-11-22 19:19   ` Hugh Dickins
  2005-11-21 13:07 ` [patch 2/12] mm: pagealloc opt Nick Piggin
                   ` (8 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 12:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Optimise rmap functions by minimising atomic operations when
we know there will be no concurrent modifications.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/include/linux/rmap.h
===================================================================
--- linux-2.6.orig/include/linux/rmap.h
+++ linux-2.6/include/linux/rmap.h
@@ -71,6 +71,7 @@ void __anon_vma_link(struct vm_area_stru
  * rmap interfaces called when adding or removing pte of page
  */
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
 
Index: linux-2.6/mm/memory.c
===================================================================
--- linux-2.6.orig/mm/memory.c
+++ linux-2.6/mm/memory.c
@@ -1339,7 +1339,7 @@ static int do_wp_page(struct mm_struct *
 		update_mmu_cache(vma, address, entry);
 		lazy_mmu_prot_update(entry);
 		lru_cache_add_active(new_page);
-		page_add_anon_rmap(new_page, vma, address);
+		page_add_new_anon_rmap(new_page, vma, address);
 
 		/* Free the old page.. */
 		new_page = old_page;
@@ -1792,8 +1792,7 @@ static int do_anonymous_page(struct mm_s
 			goto release;
 		inc_mm_counter(mm, anon_rss);
 		lru_cache_add_active(page);
-		SetPageReferenced(page);
-		page_add_anon_rmap(page, vma, address);
+		page_add_new_anon_rmap(page, vma, address);
 	} else {
 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
 		page = ZERO_PAGE(address);
@@ -1923,7 +1922,7 @@ retry:
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
 			lru_cache_add_active(new_page);
-			page_add_anon_rmap(new_page, vma, address);
+			page_add_new_anon_rmap(new_page, vma, address);
 		} else if (!(vma->vm_flags & VM_RESERVED)) {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
Index: linux-2.6/mm/rmap.c
===================================================================
--- linux-2.6.orig/mm/rmap.c
+++ linux-2.6/mm/rmap.c
@@ -440,6 +440,26 @@ int page_referenced(struct page *page, i
 }
 
 /**
+ * page_set_anon_rmap - setup new anonymous rmap
+ * @page:	the page to add the mapping to
+ * @vma:	the vm area in which the mapping is added
+ * @address:	the user virtual address mapped
+ */
+static void __page_set_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	struct anon_vma *anon_vma = vma->anon_vma;
+
+	BUG_ON(!anon_vma);
+	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+	page->mapping = (struct address_space *) anon_vma;
+
+	page->index = linear_page_index(vma, address);
+
+	__inc_page_state(nr_mapped);
+}
+
+/**
  * page_add_anon_rmap - add pte mapping to an anonymous page
  * @page:	the page to add the mapping to
  * @vma:	the vm area in which the mapping is added
@@ -450,25 +470,28 @@ int page_referenced(struct page *page, i
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	if (atomic_inc_and_test(&page->_mapcount)) {
-		struct anon_vma *anon_vma = vma->anon_vma;
-
-		BUG_ON(!anon_vma);
-		anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
-		page->mapping = (struct address_space *) anon_vma;
-
-		page->index = linear_page_index(vma, address);
-
-		/*
-		 * nr_mapped state can be updated without turning off
-		 * interrupts because it is not modified via interrupt.
-		 */
-		__inc_page_state(nr_mapped);
-	}
+	if (atomic_inc_and_test(&page->_mapcount))
+		__page_set_anon_rmap(page, vma, address);
 	/* else checking page index and mapping is racy */
 }
 
 /**
+ * page_add_new_anon_rmap - add pte mapping to a new anonymous page
+ * @page:	the page to add the mapping to
+ * @vma:	the vm area in which the mapping is added
+ * @address:	the user virtual address mapped
+ *
+ * same as page_add_anon_rmap but must only be called on *new* pages.
+ */
+void page_add_new_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
+	__page_set_anon_rmap(page, vma, address);
+}
+
+
+/**
  * page_add_file_rmap - add pte mapping to a file page
  * @page: the page to add the mapping to
  *
@@ -491,21 +514,28 @@ void page_add_file_rmap(struct page *pag
  */
 void page_remove_rmap(struct page *page)
 {
-	if (atomic_add_negative(-1, &page->_mapcount)) {
+	int fast = (page_mapcount(page) == 1) &
+			PageAnon(page) & (!PageSwapCache(page));
+
+	/* fast page may become SwapCache here, but nothing new will map it. */
+	if (fast)
+		reset_page_mapcount(page);
+	else if (atomic_add_negative(-1, &page->_mapcount))
 		BUG_ON(page_mapcount(page) < 0);
-		/*
-		 * It would be tidy to reset the PageAnon mapping here,
-		 * but that might overwrite a racing page_add_anon_rmap
-		 * which increments mapcount after us but sets mapping
-		 * before us: so leave the reset to free_hot_cold_page,
-		 * and remember that it's only reliable while mapped.
-		 * Leaving it set also helps swapoff to reinstate ptes
-		 * faster for those pages still in swapcache.
-		 */
 		if (page_test_and_clear_dirty(page))
 			set_page_dirty(page);
-		__dec_page_state(nr_mapped);
-	}
+	else
+		return; /* non zero mapcount */
+	/*
+	 * It would be tidy to reset the PageAnon mapping here,
+	 * but that might overwrite a racing page_add_anon_rmap
+	 * which increments mapcount after us but sets mapping
+	 * before us: so leave the reset to free_hot_cold_page,
+	 * and remember that it's only reliable while mapped.
+	 * Leaving it set also helps swapoff to reinstate ptes
+	 * faster for those pages still in swapcache.
+	 */
+	__dec_page_state(nr_mapped);
 }
 
 /*
Index: linux-2.6/include/linux/page-flags.h
===================================================================
--- linux-2.6.orig/include/linux/page-flags.h
+++ linux-2.6/include/linux/page-flags.h
@@ -203,6 +203,7 @@ extern void __mod_page_state_offset(unsi
 
 #define PageReferenced(page)	test_bit(PG_referenced, &(page)->flags)
 #define SetPageReferenced(page)	set_bit(PG_referenced, &(page)->flags)
+#define __SetPageReferenced(page)	__set_bit(PG_referenced, &(page)->flags)
 #define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
 #define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
 
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 2/12] mm: pagealloc opt
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (4 preceding siblings ...)
  2005-11-21 12:08 ` [patch 12/12] mm: rmap opt Nick Piggin
@ 2005-11-21 13:07 ` Nick Piggin
  2005-11-21 13:08 ` [patch 4/12] mm: set_page_refs opt Nick Piggin
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 13:07 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Slightly optimise some page allocation and freeing functions by
taking advantage of knowing whether or not interrupts are disabled.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -375,11 +375,10 @@ static int
 free_pages_bulk(struct zone *zone, int count,
 		struct list_head *list, unsigned int order)
 {
-	unsigned long flags;
 	struct page *page = NULL;
 	int ret = 0;
 
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
 	while (!list_empty(list) && count--) {
@@ -389,12 +388,13 @@ free_pages_bulk(struct zone *zone, int c
 		__free_pages_bulk(page, zone, order);
 		ret++;
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 	return ret;
 }
 
 void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long flags;
 	LIST_HEAD(list);
 	int i;
 
@@ -412,7 +412,9 @@ void __free_pages_ok(struct page *page, 
 		free_pages_check(__FUNCTION__, page + i);
 	list_add(&page->lru, &list);
 	kernel_map_pages(page, 1<<order, 0);
+	local_irq_save(flags);
 	free_pages_bulk(page_zone(page), 1, &list, order);
+	local_irq_restore(flags);
 }
 
 
@@ -528,12 +530,11 @@ static struct page *__rmqueue(struct zon
 static int rmqueue_bulk(struct zone *zone, unsigned int order, 
 			unsigned long count, struct list_head *list)
 {
-	unsigned long flags;
 	int i;
 	int allocated = 0;
 	struct page *page;
 	
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
 		page = __rmqueue(zone, order);
 		if (page == NULL)
@@ -541,7 +542,7 @@ static int rmqueue_bulk(struct zone *zon
 		allocated++;
 		list_add_tail(&page->lru, list);
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 	return allocated;
 }
 
@@ -578,6 +579,7 @@ void drain_remote_pages(void)
 #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
 static void __drain_pages(unsigned int cpu)
 {
+	unsigned long flags;
 	struct zone *zone;
 	int i;
 
@@ -589,8 +591,10 @@ static void __drain_pages(unsigned int c
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
+			local_irq_save(flags);
 			pcp->count -= free_pages_bulk(zone, pcp->count,
 						&pcp->list, 0);
+			local_irq_restore(flags);
 		}
 	}
 }
@@ -728,7 +732,7 @@ buffered_rmqueue(struct zone *zone, int 
 		if (pcp->count <= pcp->low)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (pcp->count) {
+		if (likely(pcp->count)) {
 			page = list_entry(pcp->list.next, struct page, lru);
 			list_del(&page->lru);
 			pcp->count--;
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 4/12] mm: set_page_refs opt
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (5 preceding siblings ...)
  2005-11-21 13:07 ` [patch 2/12] mm: pagealloc opt Nick Piggin
@ 2005-11-21 13:08 ` Nick Piggin
  2005-11-21 13:23 ` [patch 1/12] mm: free_pages_and_swap_cache opt Nick Piggin
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 13:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Inline set_page_refs.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -450,23 +450,6 @@ expand(struct zone *zone, struct page *p
 	return page;
 }
 
-void set_page_refs(struct page *page, int order)
-{
-#ifdef CONFIG_MMU
-	set_page_count(page, 1);
-#else
-	int i;
-
-	/*
-	 * We need to reference all the pages for this order, otherwise if
-	 * anyone accesses one of the pages with (get/put) it will be freed.
-	 * - eg: access_process_vm()
-	 */
-	for (i = 0; i < (1 << order); i++)
-		set_page_count(page + i, 1);
-#endif /* CONFIG_MMU */
-}
-
 /*
  * This page is about to be returned from the page allocator
  */
Index: linux-2.6/mm/internal.h
===================================================================
--- linux-2.6.orig/mm/internal.h
+++ linux-2.6/mm/internal.h
@@ -9,5 +9,20 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-/* page_alloc.c */
-extern void set_page_refs(struct page *page, int order);
+static inline void set_page_refs(struct page *page, int order)
+{
+#ifdef CONFIG_MMU
+	set_page_count(page, 1);
+#else
+	int i;
+
+	/*
+	 * We need to reference all the pages for this order, otherwise if
+	 * anyone accesses one of the pages with (get/put) it will be freed.
+	 * - eg: access_process_vm()
+	 */
+	for (i = 0; i < (1 << order); i++)
+		set_page_count(page + i, 1);
+#endif /* CONFIG_MMU */
+}
+
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 1/12] mm: free_pages_and_swap_cache opt
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (6 preceding siblings ...)
  2005-11-21 13:08 ` [patch 4/12] mm: set_page_refs opt Nick Piggin
@ 2005-11-21 13:23 ` Nick Piggin
  2005-11-21 13:25 ` [patch 5/12] mm: microopt conditions Nick Piggin
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 13:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Minor optimization (though it doesn't help in the PREEMPT case, severely
constrained by small ZAP_BLOCK_SIZE).  free_pages_and_swap_cache works in
chunks of 16, calling release_pages which works in chunks of PAGEVEC_SIZE.
But PAGEVEC_SIZE was dropped from 16 to 14 in 2.6.10, so we're now doing
more spin_lock_irq'ing than necessary: use PAGEVEC_SIZE throughout.

Signed-off-by: Hugh Dickins <hugh@veritas.com>

Index: linux-2.6/mm/swap_state.c
===================================================================
--- linux-2.6.orig/mm/swap_state.c
+++ linux-2.6/mm/swap_state.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/backing-dev.h>
+#include <linux/pagevec.h>
 
 #include <asm/pgtable.h>
 
@@ -272,12 +273,11 @@ void free_page_and_swap_cache(struct pag
  */
 void free_pages_and_swap_cache(struct page **pages, int nr)
 {
-	int chunk = 16;
 	struct page **pagep = pages;
 
 	lru_add_drain();
 	while (nr) {
-		int todo = min(chunk, nr);
+		int todo = min(nr, PAGEVEC_SIZE);
 		int i;
 
 		for (i = 0; i < todo; i++)
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 5/12] mm: microopt conditions
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (7 preceding siblings ...)
  2005-11-21 13:23 ` [patch 1/12] mm: free_pages_and_swap_cache opt Nick Piggin
@ 2005-11-21 13:25 ` Nick Piggin
  2005-11-21 13:27 ` [patch 10/12] mm: page_state fixes Nick Piggin
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 13:25 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Micro optimise some conditionals where we don't need lazy evaluation.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -342,9 +342,9 @@ static inline void __free_pages_bulk (st
 
 static inline void free_pages_check(const char *function, struct page *page)
 {
-	if (	page_mapcount(page) ||
-		page->mapping != NULL ||
-		page_count(page) != 0 ||
+	if (unlikely(page_mapcount(page) |
+		(page->mapping != NULL)  |
+		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
 			1 << PG_private |
@@ -354,7 +354,7 @@ static inline void free_pages_check(cons
 			1 << PG_slab	|
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved )))
+			1 << PG_reserved ))))
 		bad_page(function, page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
@@ -455,9 +455,9 @@ expand(struct zone *zone, struct page *p
  */
 static void prep_new_page(struct page *page, int order)
 {
-	if (	page_mapcount(page) ||
-		page->mapping != NULL ||
-		page_count(page) != 0 ||
+	if (unlikely(page_mapcount(page) |
+		(page->mapping != NULL)  |
+		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
 			1 << PG_private	|
@@ -468,7 +468,7 @@ static void prep_new_page(struct page *p
 			1 << PG_slab    |
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved )))
+			1 << PG_reserved ))))
 		bad_page(__FUNCTION__, page);
 
 	page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 10/12] mm: page_state fixes
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (8 preceding siblings ...)
  2005-11-21 13:25 ` [patch 5/12] mm: microopt conditions Nick Piggin
@ 2005-11-21 13:27 ` Nick Piggin
  2005-11-21 14:29 ` [patch 0/12] mm: optimisations Paul Jackson
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 13:27 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

read_page_state and __get_page_state only traverse online CPUs, which will
cause results to fluctuate when CPUs are plugged in or out.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -1140,12 +1140,11 @@ EXPORT_SYMBOL(nr_pagecache);
 DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
 #endif
 
-void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
+static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
 {
 	int cpu = 0;
 
 	memset(ret, 0, sizeof(*ret));
-	cpus_and(*cpumask, *cpumask, cpu_online_map);
 
 	cpu = first_cpu(*cpumask);
 	while (cpu < NR_CPUS) {
@@ -1198,7 +1197,7 @@ unsigned long read_page_state_offset(uns
 	unsigned long ret = 0;
 	int cpu;
 
-	for_each_online_cpu(cpu) {
+	for_each_cpu(cpu) {
 		unsigned long in;
 
 		in = (unsigned long)&per_cpu(page_states, cpu) + offset;
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 0/12] mm: optimisations
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (9 preceding siblings ...)
  2005-11-21 13:27 ` [patch 10/12] mm: page_state fixes Nick Piggin
@ 2005-11-21 14:29 ` Paul Jackson
  2005-11-22  0:26   ` Nick Piggin
  2005-11-21 15:10 ` [patch 3/12] mm: release opt Nick Piggin
                   ` (2 subsequent siblings)
  13 siblings, 1 reply; 24+ messages in thread
From: Paul Jackson @ 2005-11-21 14:29 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel, nickpiggin, akpm

Welcome to sendpatchset - cool.

(Nick is now using sendpatchset:
    http://www.speakeasy.org/~pj99/sgi/sendpatchset
 to send these patches, so they are inline.)

Now ... if you use sendpatchset to send the entire set at once, and
(even fancier) number them "01/12" instead of "1/12", we will see these
patches in the order you numbered them, instead of in the order you
apparently sent them: 11, 7, 8, 9, 12, 4, 2, 1, 5, 10 (with 3 and 6
still outstanding).

Your single sendpatchset control file would include lines something
like:

    subj [patch 00/12] mm: optimisations
    file optimisations.txt

    subj [patch 01/12] mm: free_pages_and_swap_cache opt
    file patches/free_pages_and_swap_cache-opt

    subj [patch 02/12] mm: pagealloc opt
    file patches/pagealloc-opt

    subj [patch 03/12] mm: something-or-other
    file patches/something-or-other

    subj [patch 04/12] mm: set_page_refs opt
    file patches/set_page_refs-opt

    ...


-- 
                  I won't rest till it's the best ...
                  Programmer, Linux Scalability
                  Paul Jackson <pj@sgi.com> 1.925.600.0401

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 3/12] mm: release opt
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (10 preceding siblings ...)
  2005-11-21 14:29 ` [patch 0/12] mm: optimisations Paul Jackson
@ 2005-11-21 15:10 ` Nick Piggin
  2005-11-21 15:12 ` [patch 6/12] mm: remove bad_range Nick Piggin
  2005-11-22  8:06 ` [patch 0/12] mm: optimisations Andrew Morton
  13 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 15:10 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

Optimise some pagevec functions by not reenabling irqs while
switching lru locks.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -220,10 +220,13 @@ void release_pages(struct page **pages, 
 
 		pagezone = page_zone(page);
 		if (pagezone != zone) {
-			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
+			spin_lock_prefetch(&pagezone->lru_lock);
+			if (!zone)
+				local_irq_disable();
+			else
+				spin_unlock(&zone->lru_lock);
 			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
+			spin_lock(&zone->lru_lock);
 		}
 		if (TestClearPageLRU(page))
 			del_page_from_lru(zone, page);
@@ -297,10 +300,12 @@ void __pagevec_lru_add(struct pagevec *p
 		struct zone *pagezone = page_zone(page);
 
 		if (pagezone != zone) {
+			if (!zone)
+				local_irq_disable();
 			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
+				spin_unlock(&zone->lru_lock);
 			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
+			spin_lock(&zone->lru_lock);
 		}
 		if (TestSetPageLRU(page))
 			BUG();
@@ -324,10 +329,12 @@ void __pagevec_lru_add_active(struct pag
 		struct zone *pagezone = page_zone(page);
 
 		if (pagezone != zone) {
+			if (!zone)
+				local_irq_disable();
 			if (zone)
-				spin_unlock_irq(&zone->lru_lock);
+				spin_unlock(&zone->lru_lock);
 			zone = pagezone;
-			spin_lock_irq(&zone->lru_lock);
+			spin_lock(&zone->lru_lock);
 		}
 		if (TestSetPageLRU(page))
 			BUG();
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [patch 6/12] mm: remove bad_range
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (11 preceding siblings ...)
  2005-11-21 15:10 ` [patch 3/12] mm: release opt Nick Piggin
@ 2005-11-21 15:12 ` Nick Piggin
  2005-11-22 12:32   ` Dave Hansen
  2005-11-22  8:06 ` [patch 0/12] mm: optimisations Andrew Morton
  13 siblings, 1 reply; 24+ messages in thread
From: Nick Piggin @ 2005-11-21 15:12 UTC (permalink / raw)
  To: linux-kernel; +Cc: Nick Piggin, Andrew Morton

bad_range is supposed to be a temporary check. It would be a pity to throw
it out. Make it depend on CONFIG_DEBUG_VM instead.

CONFIG_HOLES_IN_ZONE systems were relying on this to check pfn_valid in
the page allocator. Add that to page_is_buddy instead.

Signed-off-by: Nick Piggin <npiggin@suse.de>

Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c
+++ linux-2.6/mm/page_alloc.c
@@ -81,6 +81,7 @@ int min_free_kbytes = 1024;
 unsigned long __initdata nr_kernel_pages;
 unsigned long __initdata nr_all_pages;
 
+#ifdef CONFIG_DEBUG_VM
 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 {
 	int ret = 0;
@@ -122,6 +123,13 @@ static int bad_range(struct zone *zone, 
 	return 0;
 }
 
+#else
+static inline int bad_range(struct zone *zone, struct page *page)
+{
+	return 0;
+}
+#endif
+
 static void bad_page(const char *function, struct page *page)
 {
 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
@@ -261,14 +269,20 @@ __find_combined_index(unsigned long page
 /*
  * This function checks whether a page is free && is the buddy
  * we can do coalesce a page and its buddy if
- * (a) the buddy is free &&
- * (b) the buddy is on the buddy system &&
- * (c) a page and its buddy have the same order.
+ * (a) the buddy is not in a hole &&
+ * (b) the buddy is free &&
+ * (c) the buddy is on the buddy system &&
+ * (d) a page and its buddy have the same order.
  * for recording page's order, we use page_private(page) and PG_private.
  *
  */
 static inline int page_is_buddy(struct page *page, int order)
 {
+#ifdef CONFIG_HOLES_IN_ZONE
+	if (!pfn_valid(page_to_pfn(page)))
+		return 0;
+#endif
+
        if (PagePrivate(page)           &&
            (page_order(page) == order) &&
             page_count(page) == 0)
@@ -320,17 +334,15 @@ static inline void __free_pages_bulk (st
 		struct free_area *area;
 		struct page *buddy;
 
-		combined_idx = __find_combined_index(page_idx, order);
 		buddy = __page_find_buddy(page, page_idx, order);
-
-		if (bad_range(zone, buddy))
-			break;
 		if (!page_is_buddy(buddy, order))
 			break;		/* Move the buddy up one level. */
+
 		list_del(&buddy->lru);
 		area = zone->free_area + order;
 		area->nr_free--;
 		rmv_page_order(buddy);
+		combined_idx = __find_combined_index(page_idx, order);
 		page = page + (combined_idx - page_idx);
 		page_idx = combined_idx;
 		order++;
Index: linux-2.6/lib/Kconfig.debug
===================================================================
--- linux-2.6.orig/lib/Kconfig.debug
+++ linux-2.6/lib/Kconfig.debug
@@ -172,7 +172,8 @@ config DEBUG_VM
 	bool "Debug VM"
 	depends on DEBUG_KERNEL
 	help
-	  Enable this to debug the virtual-memory system.
+	  Enable this to turn on extended checks in the virtual-memory system
+          that may impact performance.
 
 	  If unsure, say N.
 
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 0/12] mm: optimisations
  2005-11-21 14:29 ` [patch 0/12] mm: optimisations Paul Jackson
@ 2005-11-22  0:26   ` Nick Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-22  0:26 UTC (permalink / raw)
  To: Paul Jackson; +Cc: linux-kernel, akpm

Paul Jackson wrote:
> Welcome to sendpatchset - cool.
> 
> (Nick is now using sendpatchset:
>     http://www.speakeasy.org/~pj99/sgi/sendpatchset
>  to send these patches, so they are inline.)
> 
> Now ... if you use sendpatchset to send the entire set at once, and
> (even fancier) number them "01/12" instead of "1/12", we will see these
> patches in the order you numbered them, instead of in the order you
> apparently sent them: 11, 7, 8, 9, 12, 4, 2, 1, 5, 10 (with 3 and 6
> still outstanding).
> 

Yeah, they were, but it seems the smtp server's had a failure
and had to retry them.

Not the fault of your script though - it's nice. Thanks.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 9/12] mm: page_state opt
  2005-11-21 12:07 ` [patch 9/12] mm: page_state opt Nick Piggin
@ 2005-11-22  7:54   ` Andrew Morton
  2005-11-22  9:04     ` Nick Piggin
  0 siblings, 1 reply; 24+ messages in thread
From: Andrew Morton @ 2005-11-22  7:54 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel, nickpiggin

Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> -#define mod_page_state_zone(zone, member, delta)				\
>  -	do {									\
>  -		unsigned offset;						\
>  -		if (is_highmem(zone))						\
>  -			offset = offsetof(struct page_state, member##_high);	\
>  -		else if (is_normal(zone))					\
>  -			offset = offsetof(struct page_state, member##_normal);	\
>  -		else								\
>  -			offset = offsetof(struct page_state, member##_dma);	\
>  -		__mod_page_state(offset, (delta));				\
>  -	} while (0)

I suppose this needs updating to know about the dma32 zone.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 0/12] mm: optimisations
  2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
                   ` (12 preceding siblings ...)
  2005-11-21 15:12 ` [patch 6/12] mm: remove bad_range Nick Piggin
@ 2005-11-22  8:06 ` Andrew Morton
  2005-11-22 10:51   ` Nick Piggin
  13 siblings, 1 reply; 24+ messages in thread
From: Andrew Morton @ 2005-11-22  8:06 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel, nickpiggin

Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> The following patchset against 2.6.15-rc2 contains optimisations to the
>  mm subsystem, mainly the page allocator.

All look sane to me - I merged the ones which applied, randomly dropped the
rest - there are still a large number of mm/ changes pending.  A number of
which don't generate a lot of enthusiasm, frankly.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 9/12] mm: page_state opt
  2005-11-22  9:04     ` Nick Piggin
@ 2005-11-22  8:08       ` Andrew Morton
  0 siblings, 0 replies; 24+ messages in thread
From: Andrew Morton @ 2005-11-22  8:08 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel

Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> Andrew Morton wrote:
> > Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> > 
> >>-#define mod_page_state_zone(zone, member, delta)				\
> >> -	do {									\
> >> -		unsigned offset;						\
> >> -		if (is_highmem(zone))						\
> >> -			offset = offsetof(struct page_state, member##_high);	\
> >> -		else if (is_normal(zone))					\
> >> -			offset = offsetof(struct page_state, member##_normal);	\
> >> -		else								\
> >> -			offset = offsetof(struct page_state, member##_dma);	\
> >> -		__mod_page_state(offset, (delta));				\
> >> -	} while (0)
> > 
> > 
> > I suppose this needs updating to know about the dma32 zone.
> > 
> 
> Ah I didn't realise DMA32 is in the tree now. I think you're right.

It means adding a new field to /proc/vmstat of course.  Presumably someone
uses that file occasionally.

> I'll rebase this patchset when such an update is made. If you'd like
> I could look at doing said DMA32 update for you?

Sometime..

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 9/12] mm: page_state opt
  2005-11-22  7:54   ` Andrew Morton
@ 2005-11-22  9:04     ` Nick Piggin
  2005-11-22  8:08       ` Andrew Morton
  0 siblings, 1 reply; 24+ messages in thread
From: Nick Piggin @ 2005-11-22  9:04 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew Morton wrote:
> Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> 
>>-#define mod_page_state_zone(zone, member, delta)				\
>> -	do {									\
>> -		unsigned offset;						\
>> -		if (is_highmem(zone))						\
>> -			offset = offsetof(struct page_state, member##_high);	\
>> -		else if (is_normal(zone))					\
>> -			offset = offsetof(struct page_state, member##_normal);	\
>> -		else								\
>> -			offset = offsetof(struct page_state, member##_dma);	\
>> -		__mod_page_state(offset, (delta));				\
>> -	} while (0)
> 
> 
> I suppose this needs updating to know about the dma32 zone.
> 

Ah I didn't realise DMA32 is in the tree now. I think you're right.

I'll rebase this patchset when such an update is made. If you'd like
I could look at doing said DMA32 update for you?

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 0/12] mm: optimisations
  2005-11-22  8:06 ` [patch 0/12] mm: optimisations Andrew Morton
@ 2005-11-22 10:51   ` Nick Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-22 10:51 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew Morton wrote:
> Nick Piggin <nickpiggin@yahoo.com.au> wrote:
> 
>>The following patchset against 2.6.15-rc2 contains optimisations to the
>> mm subsystem, mainly the page allocator.
> 
> 
> All look sane to me - I merged the ones which applied, randomly dropped the
> rest

Thanks, I'll resynch the remaining patches and send them in a bit.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 6/12] mm: remove bad_range
  2005-11-21 15:12 ` [patch 6/12] mm: remove bad_range Nick Piggin
@ 2005-11-22 12:32   ` Dave Hansen
  2005-11-23  2:09     ` Nick Piggin
  0 siblings, 1 reply; 24+ messages in thread
From: Dave Hansen @ 2005-11-22 12:32 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Linux Kernel Mailing List, Andrew Morton

On Mon, 2005-11-21 at 10:12 -0500, Nick Piggin wrote:
> )
> X-Fetchmail-Warning: recipient address lkml2@sr71.net didn't match any local name
> 
> bad_range is supposed to be a temporary check. It would be a pity to throw
> it out. Make it depend on CONFIG_DEBUG_VM instead.
> 
> CONFIG_HOLES_IN_ZONE systems were relying on this to check pfn_valid in
> the page allocator. Add that to page_is_buddy instead.
> 
> Signed-off-by: Nick Piggin <npiggin@suse.de>
> 
> Index: linux-2.6/mm/page_alloc.c
> ===================================================================
> --- linux-2.6.orig/mm/page_alloc.c
> +++ linux-2.6/mm/page_alloc.c
> @@ -81,6 +81,7 @@ int min_free_kbytes = 1024;
>  unsigned long __initdata nr_kernel_pages;
>  unsigned long __initdata nr_all_pages;
>  
> +#ifdef CONFIG_DEBUG_VM
>  static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
>  {
>  	int ret = 0;
> @@ -122,6 +123,13 @@ static int bad_range(struct zone *zone, 
>  	return 0;
>  }
>  
> +#else
> +static inline int bad_range(struct zone *zone, struct page *page)
> +{
> +	return 0;
> +}
> +#endif
> +
>  static void bad_page(const char *function, struct page *page)
...
>  static inline int page_is_buddy(struct page *page, int order)
>  {
> +#ifdef CONFIG_HOLES_IN_ZONE
> +	if (!pfn_valid(page_to_pfn(page)))
> +		return 0;
> +#endif
> +
>         if (PagePrivate(page)           &&
>             (page_order(page) == order) &&
>              page_count(page) == 0)
> @@ -320,17 +334,15 @@ static inline void __free_pages_bulk (st
>  		struct free_area *area;
>  		struct page *buddy;
>  
> -		combined_idx = __find_combined_index(page_idx, order);
>  		buddy = __page_find_buddy(page, page_idx, order);
> -
> -		if (bad_range(zone, buddy))
> -			break;
>  		if (!page_is_buddy(buddy, order))
>  			break;		/* Move the buddy up one level. */

I seem to also remember a case with this bad_range() check was useful
for zones that don't have their boundaries aligned on a MAX_ORDER
boundary.  Would this change break such a zone?  Do we care?

-- Dave


^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 12/12] mm: rmap opt
  2005-11-21 12:08 ` [patch 12/12] mm: rmap opt Nick Piggin
@ 2005-11-22 19:19   ` Hugh Dickins
  2005-11-23  0:07     ` Nick Piggin
  0 siblings, 1 reply; 24+ messages in thread
From: Hugh Dickins @ 2005-11-22 19:19 UTC (permalink / raw)
  To: Nick Piggin; +Cc: linux-kernel, Andrew Morton

On Mon, 21 Nov 2005, Nick Piggin wrote:

> Optimise rmap functions by minimising atomic operations when
> we know there will be no concurrent modifications.

It's not quite right yet.  A few minor points first:

You ought to convert the page_add_anon_rmap in fs/exec.c to
page_add_new_anon_rmap: that won't give a huge leap in performance,
but it will save someone coming along later and wondering why that
particular one isn't "new_".

The mod to page-flags.h at the end: nowhere is __SetPageReferenced
used, just cut the page-flags.h change out of your patch.

Perhaps that was at one time a half-way house to removing the
SetPageReferenced from do_anonymous_page: I support you in that
removal (I've several times argued that if it's needed there, then
it's also needed in several other like places which lack it; and I
think you concluded that it's just not needed); but you ought at least
to confess to that in the change comments, if it's not a separate patch.

I've spent longest staring at page_remove_rmap.  Here's how it looks:

void page_remove_rmap(struct page *page)
{
	int fast = (page_mapcount(page) == 1) &
			PageAnon(page) & (!PageSwapCache(page));

	/* fast page may become SwapCache here, but nothing new will map it. */
	if (fast)
		reset_page_mapcount(page);
	else if (atomic_add_negative(-1, &page->_mapcount))
		BUG_ON(page_mapcount(page) < 0);
		if (page_test_and_clear_dirty(page))
			set_page_dirty(page);
	else
		return; /* non zero mapcount */
/* [comment snipped for these purposes] */
	__dec_page_state(nr_mapped);
}

Well, C doesn't yet allow indentation to take the place of braces:
I think you'll find your /proc/meminfo Mapped goes up and up, since
only on s390 will page_test_and_clear_dirty ever say yes.

That "fast" condition.  I believe it's right, and I can see that in the
common case it will avoid the atomic -1.  Yet it seems so desperate, and
is just begging for a hole to be found in the logic (I thought I'd found
one, but was forgetting I'd rearranged do_swap_page to remove from swap
cache when full _after_ its page_add_anon_rmap).

It also made me wonder whether barriers are needed between the different
tests: somehow I think not, but can't put into words how I think it is
protected.  Were the "&"s instead of "&&"s a significant part of the
optimization, or an accident?

The page_remove_rmap part is rather ugly, but if going to those lengths
to avoid the atomic -1 is really a win on the majority of machines we
need to be fastest on (is that the case?), then okay.

Hugh

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 12/12] mm: rmap opt
  2005-11-22 19:19   ` Hugh Dickins
@ 2005-11-23  0:07     ` Nick Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-23  0:07 UTC (permalink / raw)
  To: Hugh Dickins; +Cc: linux-kernel, Andrew Morton

Hugh Dickins wrote:
> On Mon, 21 Nov 2005, Nick Piggin wrote:
> 
> 
>>Optimise rmap functions by minimising atomic operations when
>>we know there will be no concurrent modifications.
> 
> 
> It's not quite right yet.  A few minor points first:
> 

Thanks for looking at it.

> You ought to convert the page_add_anon_rmap in fs/exec.c to
> page_add_new_anon_rmap: that won't give a huge leap in performance,
> but it will save someone coming along later and wondering why that
> particular one isn't "new_".
> 

Yep, you mentioned that before but I must have lost the hunk.

> The mod to page-flags.h at the end: nowhere is __SetPageReferenced
> used, just cut the page-flags.h change out of your patch.
> 
> Perhaps that was at one time a half-way house to removing the
> SetPageReferenced from do_anonymous_page: I support you in that
> removal (I've several times argued that if it's needed there, then
> it's also needed in several other like places which lack it; and I
> think you concluded that it's just not needed); but you ought at least
> to confess to that in the change comments, if it's not a separate patch.
> 

You're right. I'll split that and fix the page-flags.h.

> I've spent longest staring at page_remove_rmap.  Here's how it looks:
> 
> void page_remove_rmap(struct page *page)
> {
> 	int fast = (page_mapcount(page) == 1) &
> 			PageAnon(page) & (!PageSwapCache(page));
> 
> 	/* fast page may become SwapCache here, but nothing new will map it. */
> 	if (fast)
> 		reset_page_mapcount(page);
> 	else if (atomic_add_negative(-1, &page->_mapcount))
> 		BUG_ON(page_mapcount(page) < 0);
> 		if (page_test_and_clear_dirty(page))
> 			set_page_dirty(page);
> 	else
> 		return; /* non zero mapcount */
> /* [comment snipped for these purposes] */
> 	__dec_page_state(nr_mapped);
> }
> 
> Well, C doesn't yet allow indentation to take the place of braces:
> I think you'll find your /proc/meminfo Mapped goes up and up, since
> only on s390 will page_test_and_clear_dirty ever say yes.
> 

Thanks. It is fairly obscure, and possibly has memory ordering problems.
Also the conditional jumps and icache usage are increased, so it isn't
as clear a win as the add_new_anon_rmap's. I'll drop this part for the
moment.

Nick

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [patch 6/12] mm: remove bad_range
  2005-11-22 12:32   ` Dave Hansen
@ 2005-11-23  2:09     ` Nick Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nick Piggin @ 2005-11-23  2:09 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Linux Kernel Mailing List, Andrew Morton, Linux Memory Management

Dave Hansen wrote:

> 
> I seem to also remember a case with this bad_range() check was useful
> for zones that don't have their boundaries aligned on a MAX_ORDER
> boundary.  Would this change break such a zone?  Do we care?
> 

Hmm, I guess that would be covered by the:

         if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
                 return 1;
         if (page_to_pfn(page) < zone->zone_start_pfn)
                 return 1;

checks in bad_range. ISTR some "warning: zone not aligned, kernel
*will* crash" message got printed in that case. I always thought
that zones were supposed to be MAX_ORDER aligned, but I can see how
that restriction might be relaxed with these checks in place.

This commit introduced the change:
http://www.kernel.org/git/?p=linux/kernel/git/torvalds/old-2.6-bkcvs.git;a=commitdiff;h=d60c9dbc4589766ef5fe88f082052ccd4ecaea59

I think this basically says that architectures who care need to define
CONFIG_HOLES_IN_ZONE and handle this in pfn_valid.

Unless this is a very common requirement and such a solution would have
too much performance cost? Anyone?

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2005-11-23  1:07 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-11-21 11:37 [patch 0/12] mm: optimisations Nick Piggin
2005-11-21 11:41 ` [patch 11/12] mm: page_alloc cleanups Nick Piggin
2005-11-21 12:00 ` [patch 7/12] mm: bad_page opt Nick Piggin
2005-11-21 12:00 ` [patch 8/12] mm: remove pcp low Nick Piggin
2005-11-21 12:07 ` [patch 9/12] mm: page_state opt Nick Piggin
2005-11-22  7:54   ` Andrew Morton
2005-11-22  9:04     ` Nick Piggin
2005-11-22  8:08       ` Andrew Morton
2005-11-21 12:08 ` [patch 12/12] mm: rmap opt Nick Piggin
2005-11-22 19:19   ` Hugh Dickins
2005-11-23  0:07     ` Nick Piggin
2005-11-21 13:07 ` [patch 2/12] mm: pagealloc opt Nick Piggin
2005-11-21 13:08 ` [patch 4/12] mm: set_page_refs opt Nick Piggin
2005-11-21 13:23 ` [patch 1/12] mm: free_pages_and_swap_cache opt Nick Piggin
2005-11-21 13:25 ` [patch 5/12] mm: microopt conditions Nick Piggin
2005-11-21 13:27 ` [patch 10/12] mm: page_state fixes Nick Piggin
2005-11-21 14:29 ` [patch 0/12] mm: optimisations Paul Jackson
2005-11-22  0:26   ` Nick Piggin
2005-11-21 15:10 ` [patch 3/12] mm: release opt Nick Piggin
2005-11-21 15:12 ` [patch 6/12] mm: remove bad_range Nick Piggin
2005-11-22 12:32   ` Dave Hansen
2005-11-23  2:09     ` Nick Piggin
2005-11-22  8:06 ` [patch 0/12] mm: optimisations Andrew Morton
2005-11-22 10:51   ` Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).