linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Lameter <cl@linux.com>
To: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: David Rientjes <rientjes@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: linux-kernel@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Subject: [slubllv7 05/17] mm: Rearrange struct page
Date: Wed, 01 Jun 2011 12:25:48 -0500	[thread overview]
Message-ID: <20110601172614.725685218@linux.com> (raw)
In-Reply-To: 20110601172543.437240675@linux.com

[-- Attachment #1: resort_struct_page --]
[-- Type: text/plain, Size: 4950 bytes --]

We need to be able to use cmpxchg_double on the freelist and object count
field in struct page. Rearrange the fields in struct page according to
doubleword entities so that the freelist pointer comes before the counters.
Do the rearranging with a future in mind where we use more doubleword
atomics to avoid locking of updates to flags/mapping or lru pointers.

Create another union to allow access to counters in struct page as a
single unsigned long value.

The doublewords must be properly aligned for cmpxchg_double to work.
Sadly this increases the size of page struct by one word on some architectures.
But as a resultpage structs are now cacheline aligned on x86_64.

Signed-off-by: Christoph Lameter <cl@linux.com>

---
 include/linux/mm_types.h |   89 +++++++++++++++++++++++++++++++----------------
 1 file changed, 60 insertions(+), 29 deletions(-)

Index: linux-2.6/include/linux/mm_types.h
===================================================================
--- linux-2.6.orig/include/linux/mm_types.h	2011-05-31 09:46:41.912987862 -0500
+++ linux-2.6/include/linux/mm_types.h	2011-05-31 09:46:44.282987846 -0500
@@ -30,52 +30,74 @@ struct address_space;
  * moment. Note that we have no way to track which tasks are using
  * a page, though if it is a pagecache page, rmap structures can tell us
  * who is mapping it.
+ *
+ * The objects in struct page are organized in double word blocks in
+ * order to allows us to use atomic double word operations on portions
+ * of struct page. That is currently only used by slub but the arrangement
+ * allows the use of atomic double word operations on the flags/mapping
+ * and lru list pointers also.
  */
 struct page {
+	/* First double word block */
 	unsigned long flags;		/* Atomic flags, some possibly
 					 * updated asynchronously */
-	atomic_t _count;		/* Usage count, see below. */
+	struct address_space *mapping;	/* If low bit clear, points to
+					 * inode address_space, or NULL.
+					 * If page mapped as anonymous
+					 * memory, low bit is set, and
+					 * it points to anon_vma object:
+					 * see PAGE_MAPPING_ANON below.
+					 */
+	/* Second double word */
 	union {
-		atomic_t _mapcount;	/* Count of ptes mapped in mms,
-					 * to show when page is mapped
-					 * & limit reverse map searches.
+		struct {
+			pgoff_t index;		/* Our offset within mapping. */
+			atomic_t _mapcount;	/* Count of ptes mapped in mms,
+							 * to show when page is mapped
+							 * & limit reverse map searches.
+							 */
+			atomic_t _count;		/* Usage count, see below. */
+		};
+
+		struct {			/* SLUB cmpxchg_double area */
+			void *freelist;
+			union {
+				unsigned long counters;
+				struct {
+					unsigned inuse:16;
+					unsigned objects:15;
+					unsigned frozen:1;
+					/*
+					 * Kernel may make use of this field even when slub
+					 * uses the rest of the double word!
 					 */
-		struct {		/* SLUB */
-			unsigned inuse:16;
-			unsigned objects:15;
-			unsigned frozen:1;
+					atomic_t _count;
+				};
+			};
 		};
 	};
+
+	/* Third double word block */
+	struct list_head lru;		/* Pageout list, eg. active_list
+					 * protected by zone->lru_lock !
+					 */
+
+	/* Remainder is not double word aligned */
 	union {
-	    struct {
-		unsigned long private;		/* Mapping-private opaque data:
+	 	unsigned long private;		/* Mapping-private opaque data:
 					 	 * usually used for buffer_heads
 						 * if PagePrivate set; used for
 						 * swp_entry_t if PageSwapCache;
 						 * indicates order in the buddy
 						 * system if PG_buddy is set.
 						 */
-		struct address_space *mapping;	/* If low bit clear, points to
-						 * inode address_space, or NULL.
-						 * If page mapped as anonymous
-						 * memory, low bit is set, and
-						 * it points to anon_vma object:
-						 * see PAGE_MAPPING_ANON below.
-						 */
-	    };
 #if USE_SPLIT_PTLOCKS
-	    spinlock_t ptl;
+		spinlock_t ptl;
 #endif
-	    struct kmem_cache *slab;	/* SLUB: Pointer to slab */
-	    struct page *first_page;	/* Compound tail pages */
+		struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+		struct page *first_page;	/* Compound tail pages */
 	};
-	union {
-		pgoff_t index;		/* Our offset within mapping. */
-		void *freelist;		/* SLUB: freelist req. slab lock */
-	};
-	struct list_head lru;		/* Pageout list, eg. active_list
-					 * protected by zone->lru_lock !
-					 */
+
 	/*
 	 * On machines where all RAM is mapped into kernel address space,
 	 * we can simply calculate the virtual address. On machines with
@@ -101,7 +123,16 @@ struct page {
 	 */
 	void *shadow;
 #endif
-};
+}
+/*
+ * If another subsystem starts using the double word pairing for atomic
+ * operations on struct page then it must change the #if to ensure
+ * proper alignment of the page struct.
+ */
+#if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL)
+	__attribute__((__aligned__(2*sizeof(unsigned long))))
+#endif
+;
 
 typedef unsigned long __nocast vm_flags_t;
 


  parent reply	other threads:[~2011-06-01 17:29 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-01 17:25 [slubllv7 00/17] SLUB: Lockless freelists for objects V7 Christoph Lameter
2011-06-01 17:25 ` [slubllv7 01/17] slub: Push irq disable into allocate_slab() Christoph Lameter
2011-06-01 17:25 ` [slubllv7 02/17] slub: Do not use frozen page flag but a bit in the page counters Christoph Lameter
2011-06-01 17:25 ` [slubllv7 03/17] slub: Move page->frozen handling near where the page->freelist handling occurs Christoph Lameter
2011-06-01 17:25 ` [slubllv7 04/17] x86: Add support for cmpxchg_double Christoph Lameter
2011-06-09  9:53   ` Pekka Enberg
2011-06-10 15:17     ` Christoph Lameter
2011-06-11  9:50       ` Pekka Enberg
2011-06-11 17:02         ` Christoph Lameter
2011-06-14  5:49           ` Pekka Enberg
2011-06-14  8:04             ` Ingo Molnar
2011-06-14 14:04               ` Christoph Lameter
2011-06-14 15:05                 ` H. Peter Anvin
2011-06-15  8:55   ` Tejun Heo
2011-06-15 14:26     ` Christoph Lameter
2011-06-15 16:39       ` Tejun Heo
2011-06-15 17:19         ` Christoph Lameter
2011-06-25 23:49   ` [tip:x86/atomic] " tip-bot for Christoph Lameter
2011-06-01 17:25 ` Christoph Lameter [this message]
2011-06-09  9:57   ` [slubllv7 05/17] mm: Rearrange struct page Pekka Enberg
2011-06-09 16:45     ` Andrew Morton
2011-06-09 17:03       ` [PATCH] checkpatch: Add a "prefer __aligned" check Joe Perches
2011-06-01 17:25 ` [slubllv7 06/17] slub: Add cmpxchg_double_slab() Christoph Lameter
2011-07-11 19:55   ` Eric Dumazet
2011-07-12 15:59     ` Christoph Lameter
2011-07-12 16:06       ` Eric Dumazet
2011-07-12 16:47         ` Christoph Lameter
2011-07-12 18:40           ` H. Peter Anvin
2011-07-12 18:53             ` Christoph Lameter
2011-07-12 20:40               ` H. Peter Anvin
2011-06-01 17:25 ` [slubllv7 07/17] slub: explicit list_lock taking Christoph Lameter
2011-06-01 17:25 ` [slubllv7 08/17] slub: Pass kmem_cache struct to lock and freeze slab Christoph Lameter
2011-06-01 17:25 ` [slubllv7 09/17] slub: Rework allocator fastpaths Christoph Lameter
2011-06-01 17:25 ` [slubllv7 10/17] slub: Invert locking and avoid slab lock Christoph Lameter
2011-06-01 17:25 ` [slubllv7 11/17] slub: Disable interrupts in free_debug processing Christoph Lameter
2011-06-01 17:25 ` [slubllv7 12/17] slub: Avoid disabling interrupts in free slowpath Christoph Lameter
2011-06-01 17:25 ` [slubllv7 13/17] slub: Get rid of the another_slab label Christoph Lameter
2011-06-01 17:25 ` [slubllv7 14/17] slub: Add statistics for the case that the current slab does not match the node Christoph Lameter
2011-06-01 17:25 ` [slubllv7 15/17] slub: fast release on full slab Christoph Lameter
2011-06-01 17:25 ` [slubllv7 16/17] slub: Not necessary to check for empty slab on load_freelist Christoph Lameter
2011-06-01 17:26 ` [slubllv7 17/17] slub: slabinfo update for cmpxchg handling Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110601172614.725685218@linux.com \
    --to=cl@linux.com \
    --cc=penberg@cs.helsinki.fi \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).