linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] use local_t for page statistics
@ 2006-01-06 21:53 Benjamin LaHaise
  2006-01-07  0:33 ` Andrew Morton
  0 siblings, 1 reply; 13+ messages in thread
From: Benjamin LaHaise @ 2006-01-06 21:53 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, linux-mm

The patch below converts the mm page_states counters to use local_t.  
mod_page_state shows up in a few profiles on x86 and x86-64 due to the 
disable/enable interrupts operations touching the flags register.  On 
both my laptop (Pentium M) and P4 test box this results in about 10 
additional /bin/bash -c exit 0 executions per second (P4 went from ~759/s 
to ~771/s).  Tested on x86 and x86-64.  Oh, also add a pgcow statistic 
for the number of COW page faults.

Signed-off-by: Benjamin LaHaise <benjamin.c.lahaise@intel.com>

diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 9db3242..293ba9a 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -59,11 +59,11 @@ void show_mem(void)
 	printk(KERN_INFO "%d pages swap cached\n", cached);
 
 	get_page_state(&ps);
-	printk(KERN_INFO "%lu pages dirty\n", ps.nr_dirty);
-	printk(KERN_INFO "%lu pages writeback\n", ps.nr_writeback);
-	printk(KERN_INFO "%lu pages mapped\n", ps.nr_mapped);
-	printk(KERN_INFO "%lu pages slab\n", ps.nr_slab);
-	printk(KERN_INFO "%lu pages pagetables\n", ps.nr_page_table_pages);
+	printk(KERN_INFO "%lu pages dirty\n", local_read(&ps.nr_dirty));
+	printk(KERN_INFO "%lu pages writeback\n", local_read(&ps.nr_writeback));
+	printk(KERN_INFO "%lu pages mapped\n", local_read(&ps.nr_mapped));
+	printk(KERN_INFO "%lu pages slab\n", local_read(&ps.nr_slab));
+	printk(KERN_INFO "%lu pages pagetables\n", local_read(&ps.nr_page_table_pages));
 }
 
 /*
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 16c513a..5ae1ead 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -81,10 +81,10 @@ static ssize_t node_read_meminfo(struct 
 		       nid, K(i.freehigh),
 		       nid, K(i.totalram - i.totalhigh),
 		       nid, K(i.freeram - i.freehigh),
-		       nid, K(ps.nr_dirty),
-		       nid, K(ps.nr_writeback),
-		       nid, K(ps.nr_mapped),
-		       nid, K(ps.nr_slab));
+		       nid, K(local_read(&ps.nr_dirty)),
+		       nid, K(local_read(&ps.nr_writeback)),
+		       nid, K(local_read(&ps.nr_mapped)),
+		       nid, K(local_read(&ps.nr_slab)));
 	n += hugetlb_report_node_meminfo(nid, buf + n);
 	return n;
 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5b6b0b6..e76b11f 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -135,7 +135,7 @@ static int meminfo_read_proc(char *page,
 /*
  * display in kilobytes.
  */
-#define K(x) ((x) << (PAGE_SHIFT - 10))
+#define K(x) ((unsigned long)(x) << (PAGE_SHIFT - 10))
 	si_meminfo(&i);
 	si_swapinfo(&i);
 	committed = atomic_read(&vm_committed_space);
@@ -188,13 +188,13 @@ static int meminfo_read_proc(char *page,
 		K(i.freeram-i.freehigh),
 		K(i.totalswap),
 		K(i.freeswap),
-		K(ps.nr_dirty),
-		K(ps.nr_writeback),
-		K(ps.nr_mapped),
-		K(ps.nr_slab),
+		K(local_read(&ps.nr_dirty)),
+		K(local_read(&ps.nr_writeback)),
+		K(local_read(&ps.nr_mapped)),
+		K(local_read(&ps.nr_slab)),
 		K(allowed),
 		K(committed),
-		K(ps.nr_page_table_pages),
+		K(local_read(&ps.nr_page_table_pages)),
 		(unsigned long)VMALLOC_TOTAL >> 10,
 		vmi.used >> 10,
 		vmi.largest_chunk >> 10
diff --git a/include/asm-x86_64/local.h b/include/asm-x86_64/local.h
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d52999c..1d95d05 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -8,6 +8,7 @@
 #include <linux/percpu.h>
 #include <linux/cache.h>
 #include <asm/pgtable.h>
+#include <asm/local.h>
 
 /*
  * Various page->flags bits:
@@ -77,7 +78,7 @@
 #define PG_uncached		19	/* Page has been mapped as uncached */
 
 /*
- * Global page accounting.  One instance per CPU.  Only unsigned longs are
+ * Global page accounting.  One instance per CPU.  Only local_ts are
  * allowed.
  *
  * - Fields can be modified with xxx_page_state and xxx_page_state_zone at
@@ -90,65 +91,66 @@
  * In this case, the field should be commented here.
  */
 struct page_state {
-	unsigned long nr_dirty;		/* Dirty writeable pages */
-	unsigned long nr_writeback;	/* Pages under writeback */
-	unsigned long nr_unstable;	/* NFS unstable pages */
-	unsigned long nr_page_table_pages;/* Pages used for pagetables */
-	unsigned long nr_mapped;	/* mapped into pagetables.
+	local_t nr_dirty;		/* Dirty writeable pages */
+	local_t nr_writeback;		/* Pages under writeback */
+	local_t nr_unstable;		/* NFS unstable pages */
+	local_t nr_page_table_pages;	/* Pages used for pagetables */
+	local_t nr_mapped;		/* mapped into pagetables.
 					 * only modified from process context */
-	unsigned long nr_slab;		/* In slab */
+	local_t nr_slab;		/* In slab */
 #define GET_PAGE_STATE_LAST nr_slab
 
 	/*
 	 * The below are zeroed by get_page_state().  Use get_full_page_state()
 	 * to add up all these.
 	 */
-	unsigned long pgpgin;		/* Disk reads */
-	unsigned long pgpgout;		/* Disk writes */
-	unsigned long pswpin;		/* swap reads */
-	unsigned long pswpout;		/* swap writes */
-
-	unsigned long pgalloc_high;	/* page allocations */
-	unsigned long pgalloc_normal;
-	unsigned long pgalloc_dma32;
-	unsigned long pgalloc_dma;
-
-	unsigned long pgfree;		/* page freeings */
-	unsigned long pgactivate;	/* pages moved inactive->active */
-	unsigned long pgdeactivate;	/* pages moved active->inactive */
-
-	unsigned long pgfault;		/* faults (major+minor) */
-	unsigned long pgmajfault;	/* faults (major only) */
-
-	unsigned long pgrefill_high;	/* inspected in refill_inactive_zone */
-	unsigned long pgrefill_normal;
-	unsigned long pgrefill_dma32;
-	unsigned long pgrefill_dma;
-
-	unsigned long pgsteal_high;	/* total highmem pages reclaimed */
-	unsigned long pgsteal_normal;
-	unsigned long pgsteal_dma32;
-	unsigned long pgsteal_dma;
-
-	unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
-	unsigned long pgscan_kswapd_normal;
-	unsigned long pgscan_kswapd_dma32;
-	unsigned long pgscan_kswapd_dma;
-
-	unsigned long pgscan_direct_high;/* total highmem pages scanned */
-	unsigned long pgscan_direct_normal;
-	unsigned long pgscan_direct_dma32;
-	unsigned long pgscan_direct_dma;
-
-	unsigned long pginodesteal;	/* pages reclaimed via inode freeing */
-	unsigned long slabs_scanned;	/* slab objects scanned */
-	unsigned long kswapd_steal;	/* pages reclaimed by kswapd */
-	unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
-	unsigned long pageoutrun;	/* kswapd's calls to page reclaim */
-	unsigned long allocstall;	/* direct reclaim calls */
-
-	unsigned long pgrotated;	/* pages rotated to tail of the LRU */
-	unsigned long nr_bounce;	/* pages for bounce buffers */
+	local_t pgpgin;			/* Disk reads */
+	local_t pgpgout;		/* Disk writes */
+	local_t pswpin;			/* swap reads */
+	local_t pswpout;		/* swap writes */
+
+	local_t pgalloc_high;		/* page allocations */
+	local_t pgalloc_normal;
+	local_t pgalloc_dma32;
+	local_t pgalloc_dma;
+
+	local_t pgfree;			/* page freeings */
+	local_t pgactivate;		/* pages moved inactive->active */
+	local_t pgdeactivate;		/* pages moved active->inactive */
+
+	local_t pgfault;		/* faults (major+minor) */
+	local_t pgmajfault;		/* faults (major only) */
+
+	local_t pgrefill_high;		/* inspected in refill_inactive_zone */
+	local_t pgrefill_normal;
+	local_t pgrefill_dma32;
+	local_t pgrefill_dma;
+
+	local_t pgsteal_high;		/* total highmem pages reclaimed */
+	local_t pgsteal_normal;
+	local_t pgsteal_dma32;
+	local_t pgsteal_dma;
+
+	local_t pgscan_kswapd_high;	/* total highmem pages scanned */
+	local_t pgscan_kswapd_normal;
+	local_t pgscan_kswapd_dma32;
+	local_t pgscan_kswapd_dma;
+
+	local_t pgscan_direct_high;	/* total highmem pages scanned */
+	local_t pgscan_direct_normal;
+	local_t pgscan_direct_dma32;
+	local_t pgscan_direct_dma;
+
+	local_t pginodesteal;		/* pages reclaimed via inode freeing */
+	local_t slabs_scanned;		/* slab objects scanned */
+	local_t kswapd_steal;		/* pages reclaimed by kswapd */
+	local_t kswapd_inodesteal;	/* reclaimed via kswapd inode freeing */
+	local_t pageoutrun;		/* kswapd's calls to page reclaim */
+	local_t allocstall;		/* direct reclaim calls */
+
+	local_t pgrotated;		/* pages rotated to tail of the LRU */
+	local_t nr_bounce;		/* pages for bounce buffers */
+	local_t pgcow;			/* COW page copies */
 };
 
 extern void get_page_state(struct page_state *ret);
diff --git a/mm/memory.c b/mm/memory.c
index 7197f9b..db3faea 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1410,6 +1410,7 @@ static inline void cow_user_page(struct 
 		return;
 		
 	}
+	inc_page_state(pgcow);
 	copy_user_highpage(dst, src, va);
 }
 
@@ -2067,6 +2068,7 @@ retry:
 		page = alloc_page_vma(GFP_HIGHUSER, vma, address);
 		if (!page)
 			goto oom;
+		inc_page_state(pgcow);
 		copy_user_highpage(page, new_page, address);
 		page_cache_release(new_page);
 		new_page = page;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fd47494..6f47e27 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1191,7 +1191,7 @@ static void show_node(struct zone *zone)
  * The result is unavoidably approximate - it can change
  * during and after execution of this function.
  */
-static DEFINE_PER_CPU(struct page_state, page_states) = {0};
+static DEFINE_PER_CPU(struct page_state, page_states);
 
 atomic_t nr_pagecache = ATOMIC_INIT(0);
 EXPORT_SYMBOL(nr_pagecache);
@@ -1207,18 +1207,19 @@ static void __get_page_state(struct page
 
 	cpu = first_cpu(*cpumask);
 	while (cpu < NR_CPUS) {
-		unsigned long *in, *out, off;
+		local_t *in, *out;
+		int off;
 
-		in = (unsigned long *)&per_cpu(page_states, cpu);
+		in = (local_t *)&per_cpu(page_states, cpu);
 
 		cpu = next_cpu(cpu, *cpumask);
 
 		if (cpu < NR_CPUS)
 			prefetch(&per_cpu(page_states, cpu));
 
-		out = (unsigned long *)ret;
+		out = (local_t *)ret;
 		for (off = 0; off < nr; off++)
-			*out++ += *in++;
+			local_add(local_read(in++), out++);
 	}
 }
 
@@ -1228,7 +1229,7 @@ void get_page_state_node(struct page_sta
 	cpumask_t mask = node_to_cpumask(node);
 
 	nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
-	nr /= sizeof(unsigned long);
+	nr /= sizeof(local_t);
 
 	__get_page_state(ret, nr+1, &mask);
 }
@@ -1239,7 +1240,7 @@ void get_page_state(struct page_state *r
 	cpumask_t mask = CPU_MASK_ALL;
 
 	nr = offsetof(struct page_state, GET_PAGE_STATE_LAST);
-	nr /= sizeof(unsigned long);
+	nr /= sizeof(local_t);
 
 	__get_page_state(ret, nr + 1, &mask);
 }
@@ -1248,7 +1249,7 @@ void get_full_page_state(struct page_sta
 {
 	cpumask_t mask = CPU_MASK_ALL;
 
-	__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
+	__get_page_state(ret, sizeof(*ret) / sizeof(local_t), &mask);
 }
 
 unsigned long read_page_state_offset(unsigned long offset)
@@ -1257,10 +1258,10 @@ unsigned long read_page_state_offset(uns
 	int cpu;
 
 	for_each_cpu(cpu) {
-		unsigned long in;
+		local_t *in;
 
-		in = (unsigned long)&per_cpu(page_states, cpu) + offset;
-		ret += *((unsigned long *)in);
+		in = (void *)&per_cpu(page_states, cpu) + offset;
+		ret += local_read(in);
 	}
 	return ret;
 }
@@ -1270,19 +1271,16 @@ void __mod_page_state_offset(unsigned lo
 	void *ptr;
 
 	ptr = &__get_cpu_var(page_states);
-	*(unsigned long *)(ptr + offset) += delta;
+	local_add(delta, (local_t *)(ptr + offset));
 }
 EXPORT_SYMBOL(__mod_page_state_offset);
 
 void mod_page_state_offset(unsigned long offset, unsigned long delta)
 {
-	unsigned long flags;
 	void *ptr;
 
-	local_irq_save(flags);
 	ptr = &__get_cpu_var(page_states);
-	*(unsigned long *)(ptr + offset) += delta;
-	local_irq_restore(flags);
+	local_add(delta, (local_t *)(ptr + offset));
 }
 EXPORT_SYMBOL(mod_page_state_offset);
 
@@ -1402,13 +1400,13 @@ void show_free_areas(void)
 		"unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
 		active,
 		inactive,
-		ps.nr_dirty,
-		ps.nr_writeback,
-		ps.nr_unstable,
+		(unsigned long)local_read(&ps.nr_dirty),
+		(unsigned long)local_read(&ps.nr_writeback),
+		(unsigned long)local_read(&ps.nr_unstable),
 		nr_free_pages(),
-		ps.nr_slab,
-		ps.nr_mapped,
-		ps.nr_page_table_pages);
+		(unsigned long)local_read(&ps.nr_slab),
+		(unsigned long)local_read(&ps.nr_mapped),
+		(unsigned long)local_read(&ps.nr_page_table_pages));
 
 	for_each_zone(zone) {
 		int i;
@@ -2320,6 +2318,7 @@ static char *vmstat_text[] = {
 
 	"pgrotated",
 	"nr_bounce",
+	"pgcow",
 };
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
@@ -2334,9 +2333,10 @@ static void *vmstat_start(struct seq_fil
 	if (!ps)
 		return ERR_PTR(-ENOMEM);
 	get_full_page_state(ps);
-	ps->pgpgin /= 2;		/* sectors -> kbytes */
-	ps->pgpgout /= 2;
-	return (unsigned long *)ps + *pos;
+	/* sectors -> kbytes */
+	local_set(&ps->pgpgin, local_read(&ps->pgpgin) / 2);
+	local_set(&ps->pgpgout, local_read(&ps->pgpgout) / 2);
+	return (local_t *)ps + *pos;
 }
 
 static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos)
@@ -2344,15 +2344,16 @@ static void *vmstat_next(struct seq_file
 	(*pos)++;
 	if (*pos >= ARRAY_SIZE(vmstat_text))
 		return NULL;
-	return (unsigned long *)m->private + *pos;
+	return (local_t *)m->private + *pos;
 }
 
 static int vmstat_show(struct seq_file *m, void *arg)
 {
-	unsigned long *l = arg;
-	unsigned long off = l - (unsigned long *)m->private;
+	local_t *l = arg;
+	unsigned long off = l - (local_t *)m->private;
 
-	seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
+	seq_printf(m, "%s %lu\n", vmstat_text[off],
+		   (unsigned long)local_read(l));
 	return 0;
 }
 
@@ -2377,7 +2378,7 @@ static int page_alloc_cpu_notify(struct 
 {
 	int cpu = (unsigned long)hcpu;
 	long *count;
-	unsigned long *src, *dest;
+	local_t *src, *dest;
 
 	if (action == CPU_DEAD) {
 		int i;
@@ -2388,18 +2389,17 @@ static int page_alloc_cpu_notify(struct 
 		*count = 0;
 		local_irq_disable();
 		__drain_pages(cpu);
+		local_irq_enable();
 
 		/* Add dead cpu's page_states to our own. */
-		dest = (unsigned long *)&__get_cpu_var(page_states);
-		src = (unsigned long *)&per_cpu(page_states, cpu);
+		dest = (local_t *)&__get_cpu_var(page_states);
+		src = (local_t *)&per_cpu(page_states, cpu);
 
-		for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
+		for (i = 0; i < sizeof(struct page_state)/sizeof(local_t);
 				i++) {
-			dest[i] += src[i];
-			src[i] = 0;
+			local_add(local_read(src + i), dest + i);
+			local_set(src + i, 0);
 		}
-
-		local_irq_enable();
 	}
 	return NOTIFY_OK;
 }

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-06 21:53 [PATCH] use local_t for page statistics Benjamin LaHaise
@ 2006-01-07  0:33 ` Andrew Morton
  2006-01-07  1:00   ` Marcelo Tosatti
                     ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Andrew Morton @ 2006-01-07  0:33 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: linux-kernel, linux-mm, Nick Piggin

Benjamin LaHaise <bcrl@kvack.org> wrote:
>
> The patch below converts the mm page_states counters to use local_t.  
> mod_page_state shows up in a few profiles on x86 and x86-64 due to the 
> disable/enable interrupts operations touching the flags register.  On 
> both my laptop (Pentium M) and P4 test box this results in about 10 
> additional /bin/bash -c exit 0 executions per second (P4 went from ~759/s 
> to ~771/s).  Tested on x86 and x86-64.  Oh, also add a pgcow statistic 
> for the number of COW page faults.

Bah.  I think this is a better approach than the just-merged
mm-page_state-opt.patch, so I should revert that patch first?


From: Nick Piggin <nickpiggin@yahoo.com.au>

Optimise page_state manipulations by introducing interrupt unsafe accessors
to page_state fields.  Callers must provide their own locking (either
disable interrupts or not update from interrupt context).

Switch over the hot callsites that can easily be moved under interrupts off
sections.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 include/linux/page-flags.h |   43 ++++++++++++----
 mm/page_alloc.c            |   89 +++++++++++++++++++----------------
 mm/rmap.c                  |   10 ++-
 mm/vmscan.c                |   27 +++++-----
 4 files changed, 104 insertions(+), 65 deletions(-)

diff -puN include/linux/page-flags.h~mm-page_state-opt include/linux/page-flags.h
--- devel/include/linux/page-flags.h~mm-page_state-opt	2006-01-06 00:04:15.000000000 -0800
+++ devel-akpm/include/linux/page-flags.h	2006-01-06 00:06:48.000000000 -0800
@@ -144,22 +144,33 @@ struct page_state {
 extern void get_page_state(struct page_state *ret);
 extern void get_page_state_node(struct page_state *ret, int node);
 extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned long offset);
-extern void __mod_page_state(unsigned long offset, unsigned long delta);
+extern unsigned long read_page_state_offset(unsigned long offset);
+extern void mod_page_state_offset(unsigned long offset, unsigned long delta);
+extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
-	__read_page_state(offsetof(struct page_state, member))
+	read_page_state_offset(offsetof(struct page_state, member))
 
 #define mod_page_state(member, delta)	\
-	__mod_page_state(offsetof(struct page_state, member), (delta))
+	mod_page_state_offset(offsetof(struct page_state, member), (delta))
 
-#define inc_page_state(member)	mod_page_state(member, 1UL)
-#define dec_page_state(member)	mod_page_state(member, 0UL - 1)
-#define add_page_state(member,delta) mod_page_state(member, (delta))
-#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
+#define __mod_page_state(member, delta)	\
+	__mod_page_state_offset(offsetof(struct page_state, member), (delta))
 
-#define mod_page_state_zone(zone, member, delta)			\
- do {									\
+#define inc_page_state(member)		mod_page_state(member, 1UL)
+#define dec_page_state(member)		mod_page_state(member, 0UL - 1)
+#define add_page_state(member,delta)	mod_page_state(member, (delta))
+#define sub_page_state(member,delta)	mod_page_state(member, 0UL - (delta))
+
+#define __inc_page_state(member)	__mod_page_state(member, 1UL)
+#define __dec_page_state(member)	__mod_page_state(member, 0UL - 1)
+#define __add_page_state(member,delta)	__mod_page_state(member, (delta))
+#define __sub_page_state(member,delta)	__mod_page_state(member, 0UL - (delta))
+
+#define page_state(member) (*__page_state(offsetof(struct page_state, member)))
+
+#define state_zone_offset(zone, member)					\
+({									\
 	unsigned offset;						\
 	if (is_highmem(zone))						\
 		offset = offsetof(struct page_state, member##_high);	\
@@ -169,7 +180,17 @@ extern void __mod_page_state(unsigned lo
 		offset = offsetof(struct page_state, member##_dma32);	\
 	else								\
 		offset = offsetof(struct page_state, member##_dma);	\
-	__mod_page_state(offset, (delta));				\
+	offset;								\
+})
+
+#define __mod_page_state_zone(zone, member, delta)			\
+ do {									\
+	__mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
+ } while (0)
+
+#define mod_page_state_zone(zone, member, delta)			\
+ do {									\
+	mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
  } while (0)
 
 /*
diff -puN mm/page_alloc.c~mm-page_state-opt mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-page_state-opt	2006-01-06 00:04:15.000000000 -0800
+++ devel-akpm/mm/page_alloc.c	2006-01-06 00:04:54.000000000 -0800
@@ -424,9 +424,9 @@ void __free_pages_ok(struct page *page, 
 		return;
 
 	list_add(&page->lru, &list);
-	mod_page_state(pgfree, 1 << order);
 	kernel_map_pages(page, 1<<order, 0);
 	local_irq_save(flags);
+	__mod_page_state(pgfree, 1 << order);
 	free_pages_bulk(page_zone(page), 1, &list, order);
 	local_irq_restore(flags);
 }
@@ -674,18 +674,14 @@ void drain_local_pages(void)
 }
 #endif /* CONFIG_PM */
 
-static void zone_statistics(struct zonelist *zonelist, struct zone *z)
+static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
 {
 #ifdef CONFIG_NUMA
-	unsigned long flags;
-	int cpu;
 	pg_data_t *pg = z->zone_pgdat;
 	pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
 	struct per_cpu_pageset *p;
 
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	p = zone_pcp(z,cpu);
+	p = zone_pcp(z, cpu);
 	if (pg == orig) {
 		p->numa_hit++;
 	} else {
@@ -696,7 +692,6 @@ static void zone_statistics(struct zonel
 		p->local_node++;
 	else
 		p->other_node++;
-	local_irq_restore(flags);
 #endif
 }
 
@@ -716,11 +711,11 @@ static void fastcall free_hot_cold_page(
 	if (free_pages_check(page))
 		return;
 
-	inc_page_state(pgfree);
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 	local_irq_save(flags);
+	__inc_page_state(pgfree);
 	list_add(&page->lru, &pcp->list);
 	pcp->count++;
 	if (pcp->count >= pcp->high)
@@ -753,49 +748,58 @@ static inline void prep_zero_page(struct
  * we cheat by calling it from here, in the order > 0 path.  Saves a branch
  * or two.
  */
-static struct page *
-buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
+static struct page *buffered_rmqueue(struct zonelist *zonelist,
+			struct zone *zone, int order, gfp_t gfp_flags)
 {
 	unsigned long flags;
 	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
+	int cpu;
 
 again:
+	cpu  = get_cpu();
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
 
-		page = NULL;
-		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+		pcp = &zone_pcp(zone, cpu)->pcp[cold];
 		local_irq_save(flags);
-		if (!pcp->count)
+		if (!pcp->count) {
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (likely(pcp->count)) {
-			page = list_entry(pcp->list.next, struct page, lru);
-			list_del(&page->lru);
-			pcp->count--;
+			if (unlikely(!pcp->count))
+				goto failed;
 		}
-		local_irq_restore(flags);
-		put_cpu();
+		page = list_entry(pcp->list.next, struct page, lru);
+		list_del(&page->lru);
+		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
-		spin_unlock_irqrestore(&zone->lock, flags);
+		spin_unlock(&zone->lock);
+		if (!page)
+			goto failed;
 	}
 
-	if (page != NULL) {
-		BUG_ON(bad_range(zone, page));
-		mod_page_state_zone(zone, pgalloc, 1 << order);
-		if (prep_new_page(page, order))
-			goto again;
+	__mod_page_state_zone(zone, pgalloc, 1 << order);
+	zone_statistics(zonelist, zone, cpu);
+	local_irq_restore(flags);
+	put_cpu();
 
-		if (gfp_flags & __GFP_ZERO)
-			prep_zero_page(page, order, gfp_flags);
+	BUG_ON(bad_range(zone, page));
+	if (prep_new_page(page, order))
+		goto again;
 
-		if (order && (gfp_flags & __GFP_COMP))
-			prep_compound_page(page, order);
-	}
+	if (gfp_flags & __GFP_ZERO)
+		prep_zero_page(page, order, gfp_flags);
+
+	if (order && (gfp_flags & __GFP_COMP))
+		prep_compound_page(page, order);
 	return page;
+
+failed:
+	local_irq_restore(flags);
+	put_cpu();
+	return NULL;
 }
 
 #define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
@@ -871,9 +875,8 @@ get_page_from_freelist(gfp_t gfp_mask, u
 				continue;
 		}
 
-		page = buffered_rmqueue(*z, order, gfp_mask);
+		page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
 		if (page) {
-			zone_statistics(zonelist, *z);
 			break;
 		}
 	} while (*(++z) != NULL);
@@ -1249,7 +1252,7 @@ void get_full_page_state(struct page_sta
 	__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
 }
 
-unsigned long __read_page_state(unsigned long offset)
+unsigned long read_page_state_offset(unsigned long offset)
 {
 	unsigned long ret = 0;
 	int cpu;
@@ -1263,18 +1266,26 @@ unsigned long __read_page_state(unsigned
 	return ret;
 }
 
-void __mod_page_state(unsigned long offset, unsigned long delta)
+void __mod_page_state_offset(unsigned long offset, unsigned long delta)
+{
+	void *ptr;
+
+	ptr = &__get_cpu_var(page_states);
+	*(unsigned long *)(ptr + offset) += delta;
+}
+EXPORT_SYMBOL(__mod_page_state_offset);
+
+void mod_page_state_offset(unsigned long offset, unsigned long delta)
 {
 	unsigned long flags;
-	void* ptr;
+	void *ptr;
 
 	local_irq_save(flags);
 	ptr = &__get_cpu_var(page_states);
-	*(unsigned long*)(ptr + offset) += delta;
+	*(unsigned long *)(ptr + offset) += delta;
 	local_irq_restore(flags);
 }
-
-EXPORT_SYMBOL(__mod_page_state);
+EXPORT_SYMBOL(mod_page_state_offset);
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
diff -puN mm/rmap.c~mm-page_state-opt mm/rmap.c
--- devel/mm/rmap.c~mm-page_state-opt	2006-01-06 00:04:15.000000000 -0800
+++ devel-akpm/mm/rmap.c	2006-01-06 00:04:15.000000000 -0800
@@ -451,7 +451,11 @@ static void __page_set_anon_rmap(struct 
 
 	page->index = linear_page_index(vma, address);
 
-	inc_page_state(nr_mapped);
+	/*
+	 * nr_mapped state can be updated without turning off
+	 * interrupts because it is not modified via interrupt.
+	 */
+	__inc_page_state(nr_mapped);
 }
 
 /**
@@ -498,7 +502,7 @@ void page_add_file_rmap(struct page *pag
 	BUG_ON(!pfn_valid(page_to_pfn(page)));
 
 	if (atomic_inc_and_test(&page->_mapcount))
-		inc_page_state(nr_mapped);
+		__inc_page_state(nr_mapped);
 }
 
 /**
@@ -522,7 +526,7 @@ void page_remove_rmap(struct page *page)
 		 */
 		if (page_test_and_clear_dirty(page))
 			set_page_dirty(page);
-		dec_page_state(nr_mapped);
+		__dec_page_state(nr_mapped);
 	}
 }
 
diff -puN mm/vmscan.c~mm-page_state-opt mm/vmscan.c
--- devel/mm/vmscan.c~mm-page_state-opt	2006-01-06 00:04:15.000000000 -0800
+++ devel-akpm/mm/vmscan.c	2006-01-06 00:04:15.000000000 -0800
@@ -645,16 +645,17 @@ static void shrink_cache(struct zone *zo
 			goto done;
 
 		max_scan -= nr_scan;
-		if (current_is_kswapd())
-			mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
-		else
-			mod_page_state_zone(zone, pgscan_direct, nr_scan);
 		nr_freed = shrink_list(&page_list, sc);
-		if (current_is_kswapd())
-			mod_page_state(kswapd_steal, nr_freed);
-		mod_page_state_zone(zone, pgsteal, nr_freed);
 
-		spin_lock_irq(&zone->lru_lock);
+		local_irq_disable();
+		if (current_is_kswapd()) {
+			__mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
+			__mod_page_state(kswapd_steal, nr_freed);
+		} else
+			__mod_page_state_zone(zone, pgscan_direct, nr_scan);
+		__mod_page_state_zone(zone, pgsteal, nr_freed);
+
+		spin_lock(&zone->lru_lock);
 		/*
 		 * Put back any unfreeable pages.
 		 */
@@ -816,11 +817,13 @@ refill_inactive_zone(struct zone *zone, 
 		}
 	}
 	zone->nr_active += pgmoved;
-	spin_unlock_irq(&zone->lru_lock);
-	pagevec_release(&pvec);
+	spin_unlock(&zone->lru_lock);
+
+	__mod_page_state_zone(zone, pgrefill, pgscanned);
+	__mod_page_state(pgdeactivate, pgdeactivate);
+	local_irq_enable();
 
-	mod_page_state_zone(zone, pgrefill, pgscanned);
-	mod_page_state(pgdeactivate, pgdeactivate);
+	pagevec_release(&pvec);
 }
 
 /*
_


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  0:33 ` Andrew Morton
@ 2006-01-07  1:00   ` Marcelo Tosatti
  2006-01-07  2:52   ` Nick Piggin
  2006-01-09 18:26   ` Benjamin LaHaise
  2 siblings, 0 replies; 13+ messages in thread
From: Marcelo Tosatti @ 2006-01-07  1:00 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Benjamin LaHaise, linux-kernel, linux-mm, Nick Piggin

On Fri, Jan 06, 2006 at 04:33:13PM -0800, Andrew Morton wrote:
> Benjamin LaHaise <bcrl@kvack.org> wrote:
> >
> > The patch below converts the mm page_states counters to use local_t.  
> > mod_page_state shows up in a few profiles on x86 and x86-64 due to the 
> > disable/enable interrupts operations touching the flags register.  On 
> > both my laptop (Pentium M) and P4 test box this results in about 10 
> > additional /bin/bash -c exit 0 executions per second (P4 went from ~759/s 
> > to ~771/s).  Tested on x86 and x86-64.  Oh, also add a pgcow statistic 
> > for the number of COW page faults.
> 
> Bah.  I think this is a better approach than the just-merged
> mm-page_state-opt.patch, so I should revert that patch first?

Don't think so - local_t operations are performed atomically, which is
not required for most hotpath page statistics operations since proper
locks are already held.

What is wanted for these cases are simple inc/dec (non-atomic)
instructions, which is what Nick's patch does by introducing
__mod_page_state.

Ben, have you tested mm-page_state-opt.patch? It should get rid of
most "flags" save/restore on stack.



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  0:33 ` Andrew Morton
  2006-01-07  1:00   ` Marcelo Tosatti
@ 2006-01-07  2:52   ` Nick Piggin
  2006-01-07  3:01     ` Andi Kleen
  2006-01-07  3:07     ` Andrew Morton
  2006-01-09 18:26   ` Benjamin LaHaise
  2 siblings, 2 replies; 13+ messages in thread
From: Nick Piggin @ 2006-01-07  2:52 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Benjamin LaHaise, linux-kernel, linux-mm

Andrew Morton wrote:
> Benjamin LaHaise <bcrl@kvack.org> wrote:
> 
>>The patch below converts the mm page_states counters to use local_t.  
>>mod_page_state shows up in a few profiles on x86 and x86-64 due to the 
>>disable/enable interrupts operations touching the flags register.  On 
>>both my laptop (Pentium M) and P4 test box this results in about 10 
>>additional /bin/bash -c exit 0 executions per second (P4 went from ~759/s 
>>to ~771/s).  Tested on x86 and x86-64.  Oh, also add a pgcow statistic 
>>for the number of COW page faults.
> 
> 
> Bah.  I think this is a better approach than the just-merged
> mm-page_state-opt.patch, so I should revert that patch first?
> 

No. On many load/store architectures there is no good way to do local_t,
so something like ppc32 or ia64 just uses all atomic operations for
local_t, and ppc64 uses 3 counters per-cpu thus tripling the cache
footprint.

Nick

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  2:52   ` Nick Piggin
@ 2006-01-07  3:01     ` Andi Kleen
  2006-01-07  3:19       ` Nick Piggin
  2006-01-09 20:54       ` Christoph Lameter
  2006-01-07  3:07     ` Andrew Morton
  1 sibling, 2 replies; 13+ messages in thread
From: Andi Kleen @ 2006-01-07  3:01 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Andrew Morton, Benjamin LaHaise, linux-kernel, linux-mm

On Saturday 07 January 2006 03:52, Nick Piggin wrote:

> No. On many load/store architectures there is no good way to do local_t,
> so something like ppc32 or ia64 just uses all atomic operations for

well, they're just broken and need to be fixed to not do that.

Also I bet with some tricks a seqlock like setup could be made to work.

> local_t, and ppc64 uses 3 counters per-cpu thus tripling the cache
> footprint.

and ppc64 has big caches so this also shouldn't be a problem.

-Andi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  2:52   ` Nick Piggin
  2006-01-07  3:01     ` Andi Kleen
@ 2006-01-07  3:07     ` Andrew Morton
  1 sibling, 0 replies; 13+ messages in thread
From: Andrew Morton @ 2006-01-07  3:07 UTC (permalink / raw)
  To: Nick Piggin; +Cc: bcrl, linux-kernel, linux-mm

Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> Andrew Morton wrote:
> > Benjamin LaHaise <bcrl@kvack.org> wrote:
> > 
> >>The patch below converts the mm page_states counters to use local_t.  
> >>mod_page_state shows up in a few profiles on x86 and x86-64 due to the 
> >>disable/enable interrupts operations touching the flags register.  On 
> >>both my laptop (Pentium M) and P4 test box this results in about 10 
> >>additional /bin/bash -c exit 0 executions per second (P4 went from ~759/s 
> >>to ~771/s).  Tested on x86 and x86-64.  Oh, also add a pgcow statistic 
> >>for the number of COW page faults.
> > 
> > 
> > Bah.  I think this is a better approach than the just-merged
> > mm-page_state-opt.patch, so I should revert that patch first?
> > 
> 
> No. On many load/store architectures there is no good way to do local_t,
> so something like ppc32 or ia64 just uses all atomic operations for
> local_t, and ppc64 uses 3 counters per-cpu thus tripling the cache
> footprint.
> 

Yes, local_t seems a bit half-assed at present.  And a bit broken with
interrupt nesting.

Surely 64-bit architectures would be better off using atomic64_t rather
than that v[3] monstrosity.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  3:01     ` Andi Kleen
@ 2006-01-07  3:19       ` Nick Piggin
  2006-01-07  3:25         ` Andi Kleen
  2006-01-09 20:54       ` Christoph Lameter
  1 sibling, 1 reply; 13+ messages in thread
From: Nick Piggin @ 2006-01-07  3:19 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andrew Morton, Benjamin LaHaise, linux-kernel, linux-mm

Andi Kleen wrote:
> On Saturday 07 January 2006 03:52, Nick Piggin wrote:
> 
> 
>>No. On many load/store architectures there is no good way to do local_t,
>>so something like ppc32 or ia64 just uses all atomic operations for
> 
> 
> well, they're just broken and need to be fixed to not do that.
> 

How?

> Also I bet with some tricks a seqlock like setup could be made to work.
> 

I asked you how before. If you can come up with a way then it indeed
might be a good solution... The problem I see with seqlock is that it
is only fast in the read path. That path is not the issue here.

> 
>>local_t, and ppc64 uses 3 counters per-cpu thus tripling the cache
>>footprint.
> 
> 
> and ppc64 has big caches so this also shouldn't be a problem.
> 

Well it is even less of a problem for them now, by about 1/3.

Performance-wise there is really no benefit for even i386 or x86-64
to move to local_t now either so I don't see what the fuss is about.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  3:19       ` Nick Piggin
@ 2006-01-07  3:25         ` Andi Kleen
  2006-01-07  3:48           ` Nick Piggin
  0 siblings, 1 reply; 13+ messages in thread
From: Andi Kleen @ 2006-01-07  3:25 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Andrew Morton, Benjamin LaHaise, linux-kernel, linux-mm

On Saturday 07 January 2006 04:19, Nick Piggin wrote:
> Andi Kleen wrote:
> > On Saturday 07 January 2006 03:52, Nick Piggin wrote:
> > 
> > 
> >>No. On many load/store architectures there is no good way to do local_t,
> >>so something like ppc32 or ia64 just uses all atomic operations for
> > 
> > 
> > well, they're just broken and need to be fixed to not do that.
> > 
> 
> How?

If anything use the 3x duplicated data setup, not atomic operations.

> 
> > Also I bet with some tricks a seqlock like setup could be made to work.
> > 
> 
> I asked you how before. If you can come up with a way then it indeed
> might be a good solution... 

I'll try to work something up.

> The problem I see with seqlock is that it 
> is only fast in the read path. That path is not the issue here.

The common case - not getting interrupted would be fast.

> > 
> >>local_t, and ppc64 uses 3 counters per-cpu thus tripling the cache
> >>footprint.
> > 
> > 
> > and ppc64 has big caches so this also shouldn't be a problem.
> > 
> 
> Well it is even less of a problem for them now, by about 1/3.
> 
> Performance-wise there is really no benefit for even i386 or x86-64
> to move to local_t now either so I don't see what the fuss is about.

Actually P4 doesn't like CLI/STI. For AMD and P-M it's not that much an issue,
but NetBurst really doesn't like it.

-Andi


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  3:25         ` Andi Kleen
@ 2006-01-07  3:48           ` Nick Piggin
  2006-01-07  4:03             ` Andi Kleen
  0 siblings, 1 reply; 13+ messages in thread
From: Nick Piggin @ 2006-01-07  3:48 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Andrew Morton, Benjamin LaHaise, linux-kernel, linux-mm

Andi Kleen wrote:
> On Saturday 07 January 2006 04:19, Nick Piggin wrote:
> 
>>Andi Kleen wrote:
>>
>>>On Saturday 07 January 2006 03:52, Nick Piggin wrote:
>>>
>>>
>>>
>>>>No. On many load/store architectures there is no good way to do local_t,
>>>>so something like ppc32 or ia64 just uses all atomic operations for
>>>
>>>
>>>well, they're just broken and need to be fixed to not do that.
>>>
>>
>>How?
> 
> 
> If anything use the 3x duplicated data setup, not atomic operations.
> 

At a 3x cache footprint cost? (and probably more than 3x for icache, though
I haven't checked) And I think hardware trends are against us. (Also, does
it have race issues with nested interrupts that Andrew noticed?)

> 
>>>Also I bet with some tricks a seqlock like setup could be made to work.
>>>
>>
>>I asked you how before. If you can come up with a way then it indeed
>>might be a good solution... 
> 
> 
> I'll try to work something up.
> 

Cool, I'd be interested to see.

> 
>>The problem I see with seqlock is that it 
>>is only fast in the read path. That path is not the issue here.
> 
> 
> The common case - not getting interrupted would be fast.
> 

The problem is that you can never do the final store without risking a
race with an interrupt. Because it is not a read-path.

The closest think I can see to a seqlock would be ll/sc operations, at
which point you're back to atomic ops.

> 
>>>>local_t, and ppc64 uses 3 counters per-cpu thus tripling the cache
>>>>footprint.
>>>
>>>
>>>and ppc64 has big caches so this also shouldn't be a problem.
>>>
>>
>>Well it is even less of a problem for them now, by about 1/3.
>>
>>Performance-wise there is really no benefit for even i386 or x86-64
>>to move to local_t now either so I don't see what the fuss is about.
> 
> 
> Actually P4 doesn't like CLI/STI. For AMD and P-M it's not that much an issue,
> but NetBurst really doesn't like it.
> 

Yes, it was worth over a second of real time and ~ 7% total kernel
time on kbuild on a P4.

(git: a74609fafa2e5cc31d558012abaaa55ec9ad9da4)

AMD and PM I didn't test but the improvement might still be noticable,
if much smaller.

-- 
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  3:48           ` Nick Piggin
@ 2006-01-07  4:03             ` Andi Kleen
  0 siblings, 0 replies; 13+ messages in thread
From: Andi Kleen @ 2006-01-07  4:03 UTC (permalink / raw)
  To: Nick Piggin; +Cc: Andrew Morton, Benjamin LaHaise, linux-kernel, linux-mm

On Saturday 07 January 2006 04:48, Nick Piggin wrote:

> 
> At a 3x cache footprint cost? (and probably more than 3x for icache, though
> I haven't checked) And I think hardware trends are against us. (Also, does
> it have race issues with nested interrupts that Andrew noticed?)

Well the alternative would be to just let them turn off interrupts.
If that's cheap for them that's fine too. And would be equivalent
to what the current high level code does.

If you worry about icache footprint it can be even done out of line.

-Andi

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  0:33 ` Andrew Morton
  2006-01-07  1:00   ` Marcelo Tosatti
  2006-01-07  2:52   ` Nick Piggin
@ 2006-01-09 18:26   ` Benjamin LaHaise
  2006-01-09 20:52     ` Christoph Lameter
  2 siblings, 1 reply; 13+ messages in thread
From: Benjamin LaHaise @ 2006-01-09 18:26 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm, Nick Piggin

On Fri, Jan 06, 2006 at 04:33:13PM -0800, Andrew Morton wrote:
> Bah.  I think this is a better approach than the just-merged
> mm-page_state-opt.patch, so I should revert that patch first?

After going over things, I think that I'll redo my patch on top of that 
one, as it means that architectures that can optimize away the save/restore 
of irq flags will be able to benefit from that.  Maybe after all this is 
said and done we can clean things up sufficiently to be able to inline the 
inc/dec where it is simple enough to do so.

		-ben
-- 
"You know, I've seen some crystals do some pretty trippy shit, man."
Don't Email: <dont@kvack.org>.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-09 18:26   ` Benjamin LaHaise
@ 2006-01-09 20:52     ` Christoph Lameter
  0 siblings, 0 replies; 13+ messages in thread
From: Christoph Lameter @ 2006-01-09 20:52 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: Andrew Morton, linux-kernel, linux-mm, Nick Piggin

On Mon, 9 Jan 2006, Benjamin LaHaise wrote:

> On Fri, Jan 06, 2006 at 04:33:13PM -0800, Andrew Morton wrote:
> > Bah.  I think this is a better approach than the just-merged
> > mm-page_state-opt.patch, so I should revert that patch first?
> 
> After going over things, I think that I'll redo my patch on top of that 
> one, as it means that architectures that can optimize away the save/restore 
> of irq flags will be able to benefit from that.  Maybe after all this is 
> said and done we can clean things up sufficiently to be able to inline the 
> inc/dec where it is simple enough to do so.

Could you also have a look at my zone counters and event counter 
patchsets? This will split up the page statistics into important 
counters that we use (which are then per zone) and the ones that are
only displayed via proc). It would really help to implement numa 
awareness in the VM if we could have per zone vm counters. The zone 
reclaim patchset would already benefit from it.

Zoned counters:

http://marc.theaimsgroup.com/?l=linux-kernel&m=113511649910826&w=2

Fast event counters:

http://marc.theaimsgroup.com/?l=linux-mm&m=113512324804851&w=2

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] use local_t for page statistics
  2006-01-07  3:01     ` Andi Kleen
  2006-01-07  3:19       ` Nick Piggin
@ 2006-01-09 20:54       ` Christoph Lameter
  1 sibling, 0 replies; 13+ messages in thread
From: Christoph Lameter @ 2006-01-09 20:54 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Nick Piggin, Andrew Morton, Benjamin LaHaise, linux-kernel, linux-mm

On Sat, 7 Jan 2006, Andi Kleen wrote:

> On Saturday 07 January 2006 03:52, Nick Piggin wrote:
> 
> > No. On many load/store architectures there is no good way to do local_t,
> > so something like ppc32 or ia64 just uses all atomic operations for
> 
> well, they're just broken and need to be fixed to not do that.

I tried to use local_t on ia64 for page statistics and have to agree with 
Nick. local_t has highly arch specific semantics.


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2006-01-09 20:54 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-01-06 21:53 [PATCH] use local_t for page statistics Benjamin LaHaise
2006-01-07  0:33 ` Andrew Morton
2006-01-07  1:00   ` Marcelo Tosatti
2006-01-07  2:52   ` Nick Piggin
2006-01-07  3:01     ` Andi Kleen
2006-01-07  3:19       ` Nick Piggin
2006-01-07  3:25         ` Andi Kleen
2006-01-07  3:48           ` Nick Piggin
2006-01-07  4:03             ` Andi Kleen
2006-01-09 20:54       ` Christoph Lameter
2006-01-07  3:07     ` Andrew Morton
2006-01-09 18:26   ` Benjamin LaHaise
2006-01-09 20:52     ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).