All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrea Arcangeli <aarcange@redhat.com>
To: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	Adam Litke <agl@us.ibm.com>, Avi Kivity <avi@redhat.com>,
	Izik Eidus <ieidus@redhat.com>,
	Hugh Dickins <hugh.dickins@tiscali.co.uk>,
	Nick Piggin <npiggin@suse.de>, Rik van Riel <riel@redhat.com>,
	Mel Gorman <mel@csn.ul.ie>, Dave Hansen <dave@linux.vnet.ibm.com>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Ingo Molnar <mingo@elte.hu>, Mike Travis <travis@sgi.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Christoph Lameter <cl@linux-foundation.org>,
	Chris Wright <chrisw@sous-sol.org>,
	bpicco@redhat.com,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	Balbir Singh <balbir@linux.vnet.ibm.com>,
	Arnd Bergmann <arnd@arndb.de>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>,
	Chris Mason <chris.mason@oracle.com>
Subject: [PATCH 41 of 67] remove PG_buddy
Date: Thu, 08 Apr 2010 03:51:24 +0200	[thread overview]
Message-ID: <3816571b9b04d1e36cf5.1270691484@v2.random> (raw)
In-Reply-To: <patchbomb.1270691443@v2.random>

From: Andrea Arcangeli <aarcange@redhat.com>

PG_buddy can be converted to _mapcount == -2. So the PG_compound_lock can be
added to page->flags without overflowing (because of the sparse section bits
increasing) with CONFIG_X86_PAE=y and CONFIG_X86_PAT=y. This also has to move
the memory hotplug code from _mapcount to lru.next to avoid any risk of
clashes. We can't use lru.next for PG_buddy removal, but memory hotplug can use
lru.next even more easily than the mapcount instead.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---

diff --git a/fs/proc/page.c b/fs/proc/page.c
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -116,15 +116,17 @@ u64 stable_page_flags(struct page *page)
 	if (PageHuge(page))
 		u |= 1 << KPF_HUGE;
 
+	/*
+	 * Caveats on high order pages: page->_count will only be set
+	 * -1 on the head page; SLUB/SLQB do the same for PG_slab;
+	 * SLOB won't set PG_slab at all on compound pages.
+	 */
+	if (PageBuddy(page))
+		u |= 1 << KPF_BUDDY;
+
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
 
-	/*
-	 * Caveats on high order pages:
-	 * PG_buddy will only be set on the head page; SLUB/SLQB do the same
-	 * for PG_slab; SLOB won't set PG_slab at all on compound pages.
-	 */
 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
-	u |= kpf_copy_bit(k, KPF_BUDDY,		PG_buddy);
 
 	u |= kpf_copy_bit(k, KPF_ERROR,		PG_error);
 	u |= kpf_copy_bit(k, KPF_DIRTY,		PG_dirty);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,12 +13,16 @@ struct mem_section;
 #ifdef CONFIG_MEMORY_HOTPLUG
 
 /*
- * Types for free bootmem.
- * The normal smallest mapcount is -1. Here is smaller value than it.
+ * Types for free bootmem stored in page->lru.next. These have to be in
+ * some random range in unsigned long space for debugging purposes.
  */
-#define SECTION_INFO		(-1 - 1)
-#define MIX_SECTION_INFO	(-1 - 2)
-#define NODE_INFO		(-1 - 3)
+enum {
+	MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
+	SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
+	MIX_SECTION_INFO,
+	NODE_INFO,
+	MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
+};
 
 /*
  * pgdat resizing functions
diff --git a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -358,6 +358,27 @@ static inline void init_page_count(struc
 	atomic_set(&page->_count, 1);
 }
 
+/*
+ * PageBuddy() indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ */
+static inline int PageBuddy(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == -2;
+}
+
+static inline void __SetPageBuddy(struct page *page)
+{
+	VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
+	atomic_set(&page->_mapcount, -2);
+}
+
+static inline void __ClearPageBuddy(struct page *page)
+{
+	VM_BUG_ON(!PageBuddy(page));
+	atomic_set(&page->_mapcount, -1);
+}
+
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -48,9 +48,6 @@
  * struct page (these bits with information) are always mapped into kernel
  * address space...
  *
- * PG_buddy is set to indicate that the page is free and in the buddy system
- * (see mm/page_alloc.c).
- *
  * PG_hwpoison indicates that a page got corrupted in hardware and contains
  * data with incorrect ECC bits that triggered a machine check. Accessing is
  * not safe since it may cause another machine check. Don't touch!
@@ -96,7 +93,6 @@ enum pageflags {
 	PG_swapcache,		/* Swap page: swp_entry_t in private */
 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 	PG_reclaim,		/* To be reclaimed asap */
-	PG_buddy,		/* Page is free, on buddy lists */
 	PG_swapbacked,		/* Page is backed by RAM/swap */
 	PG_unevictable,		/* Page is "unevictable"  */
 #ifdef CONFIG_MMU
@@ -235,7 +231,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTC
  * risky: they bypass page accounting.
  */
 TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
-__PAGEFLAG(Buddy, buddy)
 PAGEFLAG(MappedToDisk, mappedtodisk)
 
 /* PG_readahead is only used for file reads; PG_reclaim is only for writes */
@@ -430,7 +425,7 @@ static inline void ClearPageCompound(str
 #define PAGE_FLAGS_CHECK_AT_FREE \
 	(1 << PG_lru	 | 1 << PG_locked    | \
 	 1 << PG_private | 1 << PG_private_2 | \
-	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
+	 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
 	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
 	 __PG_COMPOUND_LOCK)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -65,9 +65,10 @@ static void release_memory_resource(stru
 
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
-static void get_page_bootmem(unsigned long info,  struct page *page, int type)
+static void get_page_bootmem(unsigned long info,  struct page *page,
+			     unsigned long type)
 {
-	atomic_set(&page->_mapcount, type);
+	page->lru.next = (struct list_head *) type;
 	SetPagePrivate(page);
 	set_page_private(page, info);
 	atomic_inc(&page->_count);
@@ -77,15 +78,16 @@ static void get_page_bootmem(unsigned lo
  * so use __ref to tell modpost not to generate a warning */
 void __ref put_page_bootmem(struct page *page)
 {
-	int type;
+	unsigned long type;
 
-	type = atomic_read(&page->_mapcount);
-	BUG_ON(type >= -1);
+	type = (unsigned long) page->lru.next;
+	BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
+	       type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
 
 	if (atomic_dec_return(&page->_count) == 1) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
-		reset_page_mapcount(page);
+		INIT_LIST_HEAD(&page->lru);
 		__free_pages_bootmem(page, 0);
 	}
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -426,8 +426,8 @@ __find_combined_index(unsigned long page
  * (c) a page and its buddy have the same order &&
  * (d) a page and its buddy are in the same zone.
  *
- * For recording whether a page is in the buddy system, we use PG_buddy.
- * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
+ * For recording whether a page is in the buddy system, we set ->_mapcount -2.
+ * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock.
  *
  * For recording page's order, we use page_private(page).
  */
@@ -460,7 +460,7 @@ static inline int page_is_buddy(struct p
  * as necessary, plus some accounting needed to play nicely with other
  * parts of the VM system.
  * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_buddy. Page's
+ * free pages of length of (1 << order) and marked with _mapcount -2. Page's
  * order is recorded in page_private(page) field.
  * So when we are allocating or freeing one, we can derive the state of the
  * other.  That is, if we allocate a small block, and both were   
@@ -5214,7 +5214,6 @@ static struct trace_print_flags pageflag
 	{1UL << PG_swapcache,		"swapcache"	},
 	{1UL << PG_mappedtodisk,	"mappedtodisk"	},
 	{1UL << PG_reclaim,		"reclaim"	},
-	{1UL << PG_buddy,		"buddy"		},
 	{1UL << PG_swapbacked,		"swapbacked"	},
 	{1UL << PG_unevictable,		"unevictable"	},
 #ifdef CONFIG_MMU
diff --git a/mm/sparse.c b/mm/sparse.c
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -668,10 +668,10 @@ static void __kfree_section_memmap(struc
 static void free_map_bootmem(struct page *page, unsigned long nr_pages)
 {
 	unsigned long maps_section_nr, removing_section_nr, i;
-	int magic;
+	unsigned long magic;
 
 	for (i = 0; i < nr_pages; i++, page++) {
-		magic = atomic_read(&page->_mapcount);
+		magic = (unsigned long) page->lru.next;
 
 		BUG_ON(magic == NODE_INFO);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-04-08  2:57 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-04-08  1:50 [PATCH 00 of 67] Transparent Hugepage Support #18 Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 01 of 67] define MADV_HUGEPAGE Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 02 of 67] compound_lock Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 03 of 67] alter compound get_page/put_page Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 04 of 67] update futex compound knowledge Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 05 of 67] fix bad_page to show the real reason the page is bad Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 06 of 67] clear compound mapping Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 07 of 67] add native_set_pmd_at Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 08 of 67] add pmd paravirt ops Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 09 of 67] no paravirt version of pmd ops Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 10 of 67] export maybe_mkwrite Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 11 of 67] comment reminder in destroy_compound_page Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 12 of 67] config_transparent_hugepage Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 13 of 67] special pmd_trans_* functions Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 14 of 67] add pmd mangling generic functions Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 15 of 67] add pmd mangling functions to x86 Andrea Arcangeli
2010-04-08  1:50 ` [PATCH 16 of 67] bail out gup_fast on splitting pmd Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 17 of 67] pte alloc trans splitting Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 18 of 67] add pmd mmu_notifier helpers Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 19 of 67] clear page compound Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 20 of 67] add pmd_huge_pte to mm_struct Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 21 of 67] This fixes some minor issues that bugged me while going over the code: Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 22 of 67] Split out functions to handle hugetlb ranges, pte ranges and unmapped Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 23 of 67] Instead of passing a start address and a number of pages into the helper Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 24 of 67] Do page table walks with the well-known nested loops we use in several Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 25 of 67] split_huge_page_mm/vma Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 26 of 67] split_huge_page paging Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 27 of 67] clear_copy_huge_page Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 28 of 67] kvm mmu transparent hugepage support Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 29 of 67] _GFP_NO_KSWAPD Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 30 of 67] don't alloc harder for gfp nomemalloc even if nowait Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 31 of 67] transparent hugepage core Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 32 of 67] verify pmd_trans_huge isn't leaking Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 33 of 67] madvise(MADV_HUGEPAGE) Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 34 of 67] pmd_trans_huge migrate bugcheck Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 35 of 67] memcg compound Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 36 of 67] memcg huge memory Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 37 of 67] transparent hugepage vmstat Andrea Arcangeli
2010-04-08 11:53   ` Avi Kivity
2010-04-08  1:51 ` [PATCH 38 of 67] khugepaged Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 39 of 67] don't leave orhpaned swap cache after ksm merging Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 40 of 67] skip transhuge pages in ksm for now Andrea Arcangeli
2010-04-08  1:51 ` Andrea Arcangeli [this message]
2010-04-08  1:51 ` [PATCH 42 of 67] add x86 32bit support Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 43 of 67] mincore transparent hugepage support Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 44 of 67] add pmd_modify Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 45 of 67] mprotect: pass vma down to page table walkers Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 46 of 67] mprotect: transparent huge page support Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 47 of 67] set recommended min free kbytes Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 48 of 67] remove lumpy_reclaim Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 49 of 67] Take a reference to the anon_vma before migrating Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 50 of 67] Do not try to migrate unmapped anonymous pages Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 51 of 67] Share the anon_vma ref counts between KSM and page migration Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 52 of 67] Allow CONFIG_MIGRATION to be set without CONFIG_NUMA or memory hot-remove Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 53 of 67] Export unusable free space index via /proc/unusable_index Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 54 of 67] Export fragmentation index via /proc/extfrag_index Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 55 of 67] Move definition for LRU isolation modes to a header Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 56 of 67] Memory compaction core Andrea Arcangeli
2010-04-08 16:18   ` Johannes Weiner
2010-04-08 16:46     ` Andrea Arcangeli
2010-04-08 17:09       ` Andrea Arcangeli
2010-04-08 17:14         ` Andrea Arcangeli
2010-04-08 17:56           ` Johannes Weiner
2010-04-08 17:58             ` Andrea Arcangeli
2010-04-08 18:48               ` Johannes Weiner
2010-04-08 21:23                 ` Andrea Arcangeli
2010-04-08 21:32                   ` Andrea Arcangeli
2010-04-09 10:51                   ` Mel Gorman
2010-04-09 15:37                     ` Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 57 of 67] Add /proc trigger for memory compaction Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 58 of 67] Add /sys trigger for per-node " Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 59 of 67] Direct compact when a high-order allocation fails Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 60 of 67] Add a tunable that decides when memory should be compacted and when it should be reclaimed Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 61 of 67] Allow the migration of PageSwapCache pages Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 62 of 67] do not display compaction-related stats when !CONFIG_COMPACTION Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 63 of 67] disable migreate_prep() Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 64 of 67] page buddy can go away before reading page_order Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 65 of 67] select CONFIG_COMPACTION if TRANSPARENT_HUGEPAGE enabled Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 66 of 67] enable direct defrag Andrea Arcangeli
2010-04-08  1:51 ` [PATCH 67 of 67] memcg fix prepare migration Andrea Arcangeli
2010-04-08  3:57   ` Daisuke Nishimura
2010-04-13  1:29     ` Andrew Morton
2010-04-09  8:13   ` KAMEZAWA Hiroyuki
2010-04-08  9:39 ` [PATCH 00 of 67] Transparent Hugepage Support #18 Avi Kivity
2010-04-08 11:44   ` Avi Kivity
2010-04-08 15:23     ` Andrea Arcangeli
2010-04-08 15:27       ` Avi Kivity
2010-04-08 16:02         ` Andrea Arcangeli
2010-04-08 15:32       ` Christoph Lameter
2010-04-08 23:17         ` Andrea Arcangeli
2010-04-09  8:45     ` Avi Kivity
2010-04-09 15:50       ` Andrea Arcangeli
2010-04-09 17:44         ` Avi Kivity
2010-04-09  2:05 ` Transparent Hugepage Support #19 Andrea Arcangeli
2010-04-09 15:43   ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3816571b9b04d1e36cf5.1270691484@v2.random \
    --to=aarcange@redhat.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=avi@redhat.com \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=bpicco@redhat.com \
    --cc=chris.mason@oracle.com \
    --cc=chrisw@sous-sol.org \
    --cc=cl@linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hannes@cmpxchg.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=ieidus@redhat.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=mingo@elte.hu \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=nishimura@mxp.nes.nec.co.jp \
    --cc=npiggin@suse.de \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=travis@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.