linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] Simplify node/zone portion of page->flags
@ 2003-11-22  1:14 Matthew Dobson
  2003-11-23 22:40 ` Andrew Morton
  0 siblings, 1 reply; 4+ messages in thread
From: Matthew Dobson @ 2003-11-22  1:14 UTC (permalink / raw)
  To: linux-kernel, linux-mm, mbligh, Jesse Barnes, Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 986 bytes --]

Currently we keep track of a pages node & zone in the top 8 bits (on 
32-bit arches, 10 bits on 64-bit arches) of page->flags.  We typically 
do: node_num * MAX_NR_ZONES + zone_num = 'nodezone'.  It's non-trivial 
to break this 'nodezone' back into node and zone numbers.  This patch 
modifies the way we compute the index to be: (node_num << ZONE_SHIFT) | 
zone_num.  This makes it trivial to recover either the node or zone 
number with a simple bitshift.  There are many places in the kernel 
where we do things like: page_zone(page)->zone_pgdat->node_id to 
determine the node a page belongs to.  With this patch we save several 
pointer dereferences, and it boils down to shifting some bits.

Comments/criticism requested.

[mcd@arrakis current]$ diffstat nodezone.patch
  include/linux/mm.h     |   21 ++++++++++++++++-----
  include/linux/mmzone.h |   15 ++++++++++++---
  mm/page_alloc.c        |    6 +++---
  3 files changed, 31 insertions(+), 11 deletions(-)

Cheers!

-Matt

[-- Attachment #2: nodezone.patch --]
[-- Type: text/plain, Size: 4229 bytes --]

diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.0-test9-bk23/include/linux/mm.h linux-2.6.0-test9-nodezone_shift/include/linux/mm.h
--- linux-2.6.0-test9-bk23/include/linux/mm.h	Sat Oct 25 11:42:50 2003
+++ linux-2.6.0-test9-nodezone_shift/include/linux/mm.h	Tue Nov 18 13:46:47 2003
@@ -323,20 +323,31 @@ static inline void put_page(struct page 
  * The zone field is never updated after free_area_init_core()
  * sets it, so none of the operations on it need to be atomic.
  */
-#define ZONE_SHIFT (BITS_PER_LONG - 8)
+#define NODEZONE_SHIFT (BITS_PER_LONG - MAX_NODES_SHIFT - MAX_ZONES_SHIFT)
+#define NODEZONE(node, zone)	((node << ZONES_SHIFT) | zone)
+
+static inline unsigned long page_zonenum(struct page *page)
+{
+	return (page->flags >> NODEZONE_SHIFT) & (~(~0UL << ZONES_SHIFT));
+}
+
+static inline unsigned long page_nodenum(struct page *page)
+{
+	return (page->flags >> NODEZONE_SHIFT + ZONES_SHIFT);
+}
 
 struct zone;
 extern struct zone *zone_table[];
 
 static inline struct zone *page_zone(struct page *page)
 {
-	return zone_table[page->flags >> ZONE_SHIFT];
+	return zone_table[page->flags >> NODEZONE_SHIFT];
 }
 
-static inline void set_page_zone(struct page *page, unsigned long zone_num)
+static inline void set_page_zone(struct page *page, unsigned long nodezone_num)
 {
-	page->flags &= ~(~0UL << ZONE_SHIFT);
-	page->flags |= zone_num << ZONE_SHIFT;
+	page->flags &= ~(~0UL << NODEZONE_SHIFT);
+	page->flags |= nodezone_num << NODEZONE_SHIFT;
 }
 
 #ifndef CONFIG_DISCONTIGMEM
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.0-test9-bk23/include/linux/mmzone.h linux-2.6.0-test9-nodezone_shift/include/linux/mmzone.h
--- linux-2.6.0-test9-bk23/include/linux/mmzone.h	Sat Oct 25 11:43:49 2003
+++ linux-2.6.0-test9-nodezone_shift/include/linux/mmzone.h	Tue Nov 18 14:34:34 2003
@@ -159,8 +159,10 @@ struct zone {
 #define ZONE_DMA		0
 #define ZONE_NORMAL		1
 #define ZONE_HIGHMEM		2
-#define MAX_NR_ZONES		3
-#define GFP_ZONEMASK	0x03
+
+#define MAX_NR_ZONES		3	/* Sync this with ZONES_SHIFT */
+#define ZONES_SHIFT		2	/* = ceil(log2(MAX_NR_ZONES)) */
+#define GFP_ZONEMASK		0x03
 
 /*
  * One allocation request operates on a zonelist. A zonelist
@@ -310,7 +312,7 @@ extern struct pglist_data contig_page_da
 
 #if BITS_PER_LONG == 32
 /*
- * with 32 bit flags field, page->zone is currently 8 bits.
+ * with 32 bit page->flags field, we reserve 8 bits for node/zone info.
  * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes.
  */
 #define MAX_NODES_SHIFT		6
@@ -327,6 +329,13 @@ extern struct pglist_data contig_page_da
 #error NODES_SHIFT > MAX_NODES_SHIFT
 #endif
 
+/* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */
+#define MAX_ZONES_SHIFT		2
+
+#if ZONES_SHIFT > MAX_ZONES_SHIFT
+#error ZONES_SHIFT > MAX_ZONES_SHIFT
+#endif
+
 extern DECLARE_BITMAP(node_online_map, MAX_NUMNODES);
 extern DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS);
 
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.0-test9-bk23/mm/page_alloc.c linux-2.6.0-test9-nodezone_shift/mm/page_alloc.c
--- linux-2.6.0-test9-bk23/mm/page_alloc.c	Sat Oct 25 11:42:53 2003
+++ linux-2.6.0-test9-nodezone_shift/mm/page_alloc.c	Tue Nov 18 13:48:05 2003
@@ -50,7 +50,7 @@ EXPORT_SYMBOL(nr_swap_pages);
  * Used by page_zone() to look up the address of the struct zone whose
  * id is encoded in the upper bits of page->flags
  */
-struct zone *zone_table[MAX_NR_ZONES*MAX_NUMNODES];
+struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)];
 EXPORT_SYMBOL(zone_table);
 
 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
@@ -1210,7 +1210,7 @@ void __init memmap_init_zone(struct page
 	struct page *page;
 
 	for (page = start; page < (start + size); page++) {
-		set_page_zone(page, nid * MAX_NR_ZONES + zone);
+		set_page_zone(page, NODEZONE(nid, zone));
 		set_page_count(page, 0);
 		SetPageReserved(page);
 		INIT_LIST_HEAD(&page->list);
@@ -1251,7 +1251,7 @@ static void __init free_area_init_core(s
 		unsigned long size, realsize;
 		unsigned long batch;
 
-		zone_table[nid * MAX_NR_ZONES + j] = zone;
+		zone_table[NODEZONE(nid, j)] = zone;
 		realsize = size = zones_size[j];
 		if (zholes_size)
 			realsize -= zholes_size[j];

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] Simplify node/zone portion of page->flags
  2003-11-22  1:14 [RFC] Simplify node/zone portion of page->flags Matthew Dobson
@ 2003-11-23 22:40 ` Andrew Morton
  2003-11-23 22:49   ` Jesse Barnes
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Morton @ 2003-11-23 22:40 UTC (permalink / raw)
  To: colpatch; +Cc: linux-kernel, linux-mm, mbligh, jbarnes

Matthew Dobson <colpatch@us.ibm.com> wrote:
>
> Currently we keep track of a pages node & zone in the top 8 bits (on 
>  32-bit arches, 10 bits on 64-bit arches) of page->flags.  We typically 
>  do: node_num * MAX_NR_ZONES + zone_num = 'nodezone'.  It's non-trivial 
>  to break this 'nodezone' back into node and zone numbers.  This patch 
>  modifies the way we compute the index to be: (node_num << ZONE_SHIFT) | 
>  zone_num.  This makes it trivial to recover either the node or zone 
>  number with a simple bitshift.  There are many places in the kernel 
>  where we do things like: page_zone(page)->zone_pgdat->node_id to 
>  determine the node a page belongs to.  With this patch we save several 
>  pointer dereferences, and it boils down to shifting some bits.

This rather conflicts with the patch from Jesse which I have.  Can you guys
work that out and let me know when you're done?


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] Simplify node/zone portion of page->flags
  2003-11-23 22:40 ` Andrew Morton
@ 2003-11-23 22:49   ` Jesse Barnes
  2003-11-24 23:31     ` Jesse Barnes
  0 siblings, 1 reply; 4+ messages in thread
From: Jesse Barnes @ 2003-11-23 22:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: colpatch, linux-kernel, linux-mm, mbligh

On Sun, Nov 23, 2003 at 02:40:52PM -0800, Andrew Morton wrote:
> >  zone_num.  This makes it trivial to recover either the node or zone 
> >  number with a simple bitshift.  There are many places in the kernel 
> >  where we do things like: page_zone(page)->zone_pgdat->node_id to 
> >  determine the node a page belongs to.  With this patch we save several 
> >  pointer dereferences, and it boils down to shifting some bits.
> 
> This rather conflicts with the patch from Jesse which I have.  Can you guys
> work that out and let me know when you're done?

I like Matt's patch, but haven't tested it yet.  I'll try it out on
Monday.

Jesse

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] Simplify node/zone portion of page->flags
  2003-11-23 22:49   ` Jesse Barnes
@ 2003-11-24 23:31     ` Jesse Barnes
  0 siblings, 0 replies; 4+ messages in thread
From: Jesse Barnes @ 2003-11-24 23:31 UTC (permalink / raw)
  To: Andrew Morton, colpatch, linux-kernel, linux-mm, mbligh

On Sun, Nov 23, 2003 at 02:49:03PM -0800, Jesse Barnes wrote:
> On Sun, Nov 23, 2003 at 02:40:52PM -0800, Andrew Morton wrote:
> > >  zone_num.  This makes it trivial to recover either the node or zone 
> > >  number with a simple bitshift.  There are many places in the kernel 
> > >  where we do things like: page_zone(page)->zone_pgdat->node_id to 
> > >  determine the node a page belongs to.  With this patch we save several 
> > >  pointer dereferences, and it boils down to shifting some bits.
> > 
> > This rather conflicts with the patch from Jesse which I have.  Can you guys
> > work that out and let me know when you're done?
> 
> I like Matt's patch, but haven't tested it yet.  I'll try it out on
> Monday.

Matt, this looks ok (at least it boots on my test system).  I wasn't
able test it on a very large system though, but it looks functionally
identical to the patch in Andrew's tree.  Btw, you probably want to put
() around NODEZONE_SHIFT + ZONES_SHIFT in page_nodenum() otherwise gcc
will complain loudly.

Jesse

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2003-11-24 23:32 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-11-22  1:14 [RFC] Simplify node/zone portion of page->flags Matthew Dobson
2003-11-23 22:40 ` Andrew Morton
2003-11-23 22:49   ` Jesse Barnes
2003-11-24 23:31     ` Jesse Barnes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).