All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
To: akpm@linux-foundation.org, mgorman@suse.de, hannes@cmpxchg.org,
	tony.luck@intel.com, matthew.garrett@nebula.com, dave@sr71.net,
	riel@redhat.com, arjan@linux.intel.com,
	srinivas.pandruvada@linux.intel.com, willy@linux.intel.com,
	kamezawa.hiroyu@jp.fujitsu.com, lenb@kernel.org, rjw@sisk.pl
Cc: gargankita@gmail.com, paulmck@linux.vnet.ibm.com,
	svaidy@linux.vnet.ibm.com, andi@firstfloor.org,
	isimatu.yasuaki@jp.fujitsu.com, santosh.shilimkar@ti.com,
	kosaki.motohiro@gmail.com, srivatsa.bhat@linux.vnet.ibm.com,
	linux-pm@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v3 18/35] mm: Introduce a "Region Allocator" to manage entire memory regions
Date: Fri, 30 Aug 2013 18:49:26 +0530	[thread overview]
Message-ID: <20130830131919.4947.16115.stgit@srivatsabhat.in.ibm.com> (raw)
In-Reply-To: <20130830131221.4947.99764.stgit@srivatsabhat.in.ibm.com>

Today, the MM subsystem uses the buddy 'Page Allocator' to manage memory
at a 'page' granularity. But this allocator has no notion of the physical
topology of the underlying memory hardware, and hence it is hard to
influence memory allocation decisions keeping the platform constraints
in mind.

So we need to augment the page-allocator with a new entity to manage
memory (at a much larger granularity) keeping the underlying platform
characteristics and the memory hardware topology in mind.

To that end, introduce a "Memory Region Allocator" as a backend to the
existing "Page Allocator".


Splitting the memory allocator into a Page-Allocator front-end and a
Region-Allocator backend:


                 Page Allocator          |      Memory Region Allocator
                                         -
           __    __    __                |    ________    ________
          |__|--|__|--|__|-- ...         -   |        |  |        |
           ____    ____    ____          |   |        |  |        |
          |____|--|____|--|____|-- ...   -   |        |--|        |-- ...
                                         |   |        |  |        |
                                         -   |________|  |________|
                                         |
                                         -
             Manages pages using         |     Manages memory regions
              buddy freelists            -  (allocates and frees entire
                                         |   memory regions, i.e., at a
                                         -   memory-region granularity)


The flow of memory allocations/frees between entities requesting memory
(applications/kernel) and the MM subsystem:

                  pages               regions
  Applications <========>   Page    <========>  Memory Region
   and Kernel             Allocator               Allocator



Since the region allocator is supposed to function as a backend to the
page allocator, we implement it on a per-zone basis (since the page-allocator
is also per-zone).

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/mmzone.h |   17 +++++++++++++++++
 mm/page_alloc.c        |   19 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ef602a8..c2956dd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -112,6 +112,21 @@ struct free_area {
 	unsigned long		nr_free;
 };
 
+/* A simplified free_area for managing entire memory regions */
+struct free_area_region {
+	struct list_head	list;
+	unsigned long		nr_free;
+};
+
+struct mem_region {
+	struct free_area_region	region_area[MAX_ORDER];
+};
+
+struct region_allocator {
+	struct mem_region	region[MAX_NR_ZONE_REGIONS];
+	int			next_region;
+};
+
 struct pglist_data;
 
 /*
@@ -405,6 +420,8 @@ struct zone {
 	struct zone_mem_region	zone_regions[MAX_NR_ZONE_REGIONS];
 	int 			nr_zone_regions;
 
+	struct region_allocator	region_allocator;
+
 #ifndef CONFIG_SPARSEMEM
 	/*
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0cc2a3e..905360c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5209,6 +5209,23 @@ static void __meminit zone_init_free_lists_late(struct zone *zone)
 	}
 }
 
+static void __meminit init_zone_region_allocator(struct zone *zone)
+{
+	struct free_area_region *area;
+	int i, j;
+
+	for (i = 0; i < zone->nr_zone_regions; i++) {
+		area = zone->region_allocator.region[i].region_area;
+
+		for (j = 0; j < MAX_ORDER; j++) {
+			INIT_LIST_HEAD(&area[j].list);
+			area[j].nr_free = 0;
+		}
+	}
+
+	zone->region_allocator.next_region = -1;
+}
+
 static void __meminit init_zone_memory_regions(struct pglist_data *pgdat)
 {
 	unsigned long start_pfn, end_pfn, absent;
@@ -5258,6 +5275,8 @@ static void __meminit init_zone_memory_regions(struct pglist_data *pgdat)
 
 		zone_init_free_lists_late(z);
 
+		init_zone_region_allocator(z);
+
 		/*
 		 * Revisit the last visited node memory region, in case it
 		 * spans multiple zones.


WARNING: multiple messages have this Message-ID (diff)
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
To: akpm@linux-foundation.org, mgorman@suse.de, hannes@cmpxchg.org,
	tony.luck@intel.com, matthew.garrett@nebula.com, dave@sr71.net,
	riel@redhat.com, arjan@linux.intel.com,
	srinivas.pandruvada@linux.intel.com, willy@linux.intel.com,
	kamezawa.hiroyu@jp.fujitsu.com, lenb@kernel.org, rjw@sisk.pl
Cc: gargankita@gmail.com, paulmck@linux.vnet.ibm.com,
	svaidy@linux.vnet.ibm.com, andi@firstfloor.org,
	isimatu.yasuaki@jp.fujitsu.com, santosh.shilimkar@ti.com,
	kosaki.motohiro@gmail.com, srivatsa.bhat@linux.vnet.ibm.com,
	linux-pm@vger.kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v3 18/35] mm: Introduce a "Region Allocator" to manage entire memory regions
Date: Fri, 30 Aug 2013 18:49:26 +0530	[thread overview]
Message-ID: <20130830131919.4947.16115.stgit@srivatsabhat.in.ibm.com> (raw)
In-Reply-To: <20130830131221.4947.99764.stgit@srivatsabhat.in.ibm.com>

Today, the MM subsystem uses the buddy 'Page Allocator' to manage memory
at a 'page' granularity. But this allocator has no notion of the physical
topology of the underlying memory hardware, and hence it is hard to
influence memory allocation decisions keeping the platform constraints
in mind.

So we need to augment the page-allocator with a new entity to manage
memory (at a much larger granularity) keeping the underlying platform
characteristics and the memory hardware topology in mind.

To that end, introduce a "Memory Region Allocator" as a backend to the
existing "Page Allocator".


Splitting the memory allocator into a Page-Allocator front-end and a
Region-Allocator backend:


                 Page Allocator          |      Memory Region Allocator
                                         -
           __    __    __                |    ________    ________
          |__|--|__|--|__|-- ...         -   |        |  |        |
           ____    ____    ____          |   |        |  |        |
          |____|--|____|--|____|-- ...   -   |        |--|        |-- ...
                                         |   |        |  |        |
                                         -   |________|  |________|
                                         |
                                         -
             Manages pages using         |     Manages memory regions
              buddy freelists            -  (allocates and frees entire
                                         |   memory regions, i.e., at a
                                         -   memory-region granularity)


The flow of memory allocations/frees between entities requesting memory
(applications/kernel) and the MM subsystem:

                  pages               regions
  Applications <========>   Page    <========>  Memory Region
   and Kernel             Allocator               Allocator



Since the region allocator is supposed to function as a backend to the
page allocator, we implement it on a per-zone basis (since the page-allocator
is also per-zone).

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
---

 include/linux/mmzone.h |   17 +++++++++++++++++
 mm/page_alloc.c        |   19 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ef602a8..c2956dd 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -112,6 +112,21 @@ struct free_area {
 	unsigned long		nr_free;
 };
 
+/* A simplified free_area for managing entire memory regions */
+struct free_area_region {
+	struct list_head	list;
+	unsigned long		nr_free;
+};
+
+struct mem_region {
+	struct free_area_region	region_area[MAX_ORDER];
+};
+
+struct region_allocator {
+	struct mem_region	region[MAX_NR_ZONE_REGIONS];
+	int			next_region;
+};
+
 struct pglist_data;
 
 /*
@@ -405,6 +420,8 @@ struct zone {
 	struct zone_mem_region	zone_regions[MAX_NR_ZONE_REGIONS];
 	int 			nr_zone_regions;
 
+	struct region_allocator	region_allocator;
+
 #ifndef CONFIG_SPARSEMEM
 	/*
 	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0cc2a3e..905360c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5209,6 +5209,23 @@ static void __meminit zone_init_free_lists_late(struct zone *zone)
 	}
 }
 
+static void __meminit init_zone_region_allocator(struct zone *zone)
+{
+	struct free_area_region *area;
+	int i, j;
+
+	for (i = 0; i < zone->nr_zone_regions; i++) {
+		area = zone->region_allocator.region[i].region_area;
+
+		for (j = 0; j < MAX_ORDER; j++) {
+			INIT_LIST_HEAD(&area[j].list);
+			area[j].nr_free = 0;
+		}
+	}
+
+	zone->region_allocator.next_region = -1;
+}
+
 static void __meminit init_zone_memory_regions(struct pglist_data *pgdat)
 {
 	unsigned long start_pfn, end_pfn, absent;
@@ -5258,6 +5275,8 @@ static void __meminit init_zone_memory_regions(struct pglist_data *pgdat)
 
 		zone_init_free_lists_late(z);
 
+		init_zone_region_allocator(z);
+
 		/*
 		 * Revisit the last visited node memory region, in case it
 		 * spans multiple zones.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-08-30 13:23 UTC|newest]

Thread overview: 100+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-30 13:13 [RESEND RFC PATCH v3 00/35] mm: Memory Power Management Srivatsa S. Bhat
2013-08-30 13:13 ` Srivatsa S. Bhat
2013-08-30 13:14 ` [RFC PATCH v3 01/35] mm: Restructure free-page stealing code and fix a bug Srivatsa S. Bhat
2013-08-30 13:14   ` Srivatsa S. Bhat
2013-08-30 13:14 ` [RFC PATCH v3 02/35] mm: Fix the value of fallback_migratetype in alloc_extfrag tracepoint Srivatsa S. Bhat
2013-08-30 13:14   ` Srivatsa S. Bhat
2013-08-30 13:14 ` [RFC PATCH v3 03/35] mm: Introduce memory regions data-structure to capture region boundaries within nodes Srivatsa S. Bhat
2013-08-30 13:14   ` Srivatsa S. Bhat
2013-08-30 13:15 ` [RFC PATCH v3 04/35] mm: Initialize node memory regions during boot Srivatsa S. Bhat
2013-08-30 13:15   ` Srivatsa S. Bhat
2013-09-02  6:20   ` Yasuaki Ishimatsu
2013-09-02  6:20     ` Yasuaki Ishimatsu
2013-09-02 17:43     ` Srivatsa S. Bhat
2013-09-02 17:43       ` Srivatsa S. Bhat
2013-09-03  4:53       ` Yasuaki Ishimatsu
2013-09-03  4:53         ` Yasuaki Ishimatsu
2013-08-30 13:15 ` [RFC PATCH v3 05/35] mm: Introduce and initialize zone memory regions Srivatsa S. Bhat
2013-08-30 13:15   ` Srivatsa S. Bhat
2013-08-30 13:15 ` [RFC PATCH v3 06/35] mm: Add helpers to retrieve node region and zone region for a given page Srivatsa S. Bhat
2013-08-30 13:15   ` Srivatsa S. Bhat
2013-09-03  5:56   ` Yasuaki Ishimatsu
2013-09-03  5:56     ` Yasuaki Ishimatsu
2013-09-03  5:56     ` Yasuaki Ishimatsu
2013-09-03  8:34     ` Srivatsa S. Bhat
2013-09-03  8:34       ` Srivatsa S. Bhat
2013-08-30 13:16 ` [RFC PATCH v3 07/35] mm: Add data-structures to describe memory regions within the zones' freelists Srivatsa S. Bhat
2013-08-30 13:16   ` Srivatsa S. Bhat
2013-08-30 13:16 ` [RFC PATCH v3 08/35] mm: Demarcate and maintain pageblocks in region-order in " Srivatsa S. Bhat
2013-08-30 13:16   ` Srivatsa S. Bhat
2013-09-04  7:49   ` Yasuaki Ishimatsu
2013-09-04  7:49     ` Yasuaki Ishimatsu
2013-09-04  7:49     ` Yasuaki Ishimatsu
2013-08-30 13:16 ` [RFC PATCH v3 09/35] mm: Track the freepage migratetype of pages accurately Srivatsa S. Bhat
2013-08-30 13:16   ` Srivatsa S. Bhat
2013-09-03  6:38   ` Yasuaki Ishimatsu
2013-09-03  6:38     ` Yasuaki Ishimatsu
2013-09-03  8:45     ` Srivatsa S. Bhat
2013-09-03  8:45       ` Srivatsa S. Bhat
2013-09-04  8:23       ` Yasuaki Ishimatsu
2013-09-04  8:23         ` Yasuaki Ishimatsu
2013-09-06  5:24         ` Srivatsa S. Bhat
2013-09-06  5:24           ` Srivatsa S. Bhat
2013-08-30 13:16 ` [RFC PATCH v3 10/35] mm: Use the correct migratetype during buddy merging Srivatsa S. Bhat
2013-08-30 13:16   ` Srivatsa S. Bhat
2013-08-30 13:17 ` [RFC PATCH v3 11/35] mm: Add an optimized version of del_from_freelist to keep page allocation fast Srivatsa S. Bhat
2013-08-30 13:17   ` Srivatsa S. Bhat
2013-08-30 13:17 ` [RFC PATCH v3 12/35] bitops: Document the difference in indexing between fls() and __fls() Srivatsa S. Bhat
2013-08-30 13:17   ` Srivatsa S. Bhat
2013-08-30 13:17 ` [RFC PATCH v3 13/35] mm: A new optimized O(log n) sorting algo to speed up buddy-sorting Srivatsa S. Bhat
2013-08-30 13:17   ` Srivatsa S. Bhat
2013-08-30 13:18 ` [RFC PATCH v3 14/35] mm: Add support to accurately track per-memory-region allocation Srivatsa S. Bhat
2013-08-30 13:18   ` Srivatsa S. Bhat
2013-08-30 13:18 ` [RFC PATCH v3 15/35] mm: Print memory region statistics to understand the buddy allocator behavior Srivatsa S. Bhat
2013-08-30 13:18   ` Srivatsa S. Bhat
2013-08-30 13:18 ` [RFC PATCH v3 16/35] mm: Enable per-memory-region fragmentation stats in pagetypeinfo Srivatsa S. Bhat
2013-08-30 13:18   ` Srivatsa S. Bhat
2013-08-30 13:19 ` [RFC PATCH v3 17/35] mm: Add aggressive bias to prefer lower regions during page allocation Srivatsa S. Bhat
2013-08-30 13:19   ` Srivatsa S. Bhat
2013-08-30 13:19 ` Srivatsa S. Bhat [this message]
2013-08-30 13:19   ` [RFC PATCH v3 18/35] mm: Introduce a "Region Allocator" to manage entire memory regions Srivatsa S. Bhat
2013-08-30 13:19 ` [RFC PATCH v3 19/35] mm: Add a mechanism to add pages to buddy freelists in bulk Srivatsa S. Bhat
2013-08-30 13:19   ` Srivatsa S. Bhat
2013-08-30 13:20 ` [RFC PATCH v3 20/35] mm: Provide a mechanism to delete pages from " Srivatsa S. Bhat
2013-08-30 13:20   ` Srivatsa S. Bhat
2013-08-30 13:20 ` [RFC PATCH v3 21/35] mm: Provide a mechanism to release free memory to the region allocator Srivatsa S. Bhat
2013-08-30 13:20   ` Srivatsa S. Bhat
2013-08-30 13:20 ` [RFC PATCH v3 22/35] mm: Provide a mechanism to request free memory from " Srivatsa S. Bhat
2013-08-30 13:20   ` Srivatsa S. Bhat
2013-08-30 13:21 ` [RFC PATCH v3 23/35] mm: Maintain the counter for freepages in " Srivatsa S. Bhat
2013-08-30 13:21   ` Srivatsa S. Bhat
2013-08-30 13:21 ` [RFC PATCH v3 24/35] mm: Propagate the sorted-buddy bias for picking free regions, to " Srivatsa S. Bhat
2013-08-30 13:21   ` Srivatsa S. Bhat
2013-08-30 13:21 ` [RFC PATCH v3 25/35] mm: Fix vmstat to also account for freepages in the " Srivatsa S. Bhat
2013-08-30 13:21   ` Srivatsa S. Bhat
2013-08-30 13:22 ` [RFC PATCH v3 26/35] mm: Drop some very expensive sorted-buddy related checks under DEBUG_PAGEALLOC Srivatsa S. Bhat
2013-08-30 13:22   ` Srivatsa S. Bhat
2013-08-30 13:22 ` [RFC PATCH v3 27/35] mm: Connect Page Allocator(PA) to Region Allocator(RA); add PA => RA flow Srivatsa S. Bhat
2013-08-30 13:22   ` Srivatsa S. Bhat
2013-08-30 13:22 ` [RFC PATCH v3 28/35] mm: Connect Page Allocator(PA) to Region Allocator(RA); add PA <= " Srivatsa S. Bhat
2013-08-30 13:22   ` Srivatsa S. Bhat
2013-08-30 13:23 ` [RFC PATCH v3 29/35] mm: Update the freepage migratetype of pages during region allocation Srivatsa S. Bhat
2013-08-30 13:23   ` Srivatsa S. Bhat
2013-08-30 13:23 ` [RFC PATCH v3 30/35] mm: Provide a mechanism to check if a given page is in the region allocator Srivatsa S. Bhat
2013-08-30 13:23   ` Srivatsa S. Bhat
2013-08-30 13:23 ` [RFC PATCH v3 31/35] mm: Add a way to request pages of a particular region from " Srivatsa S. Bhat
2013-08-30 13:23   ` Srivatsa S. Bhat
2013-08-30 13:24 ` [RFC PATCH v3 32/35] mm: Modify move_freepages() to handle pages in the region allocator properly Srivatsa S. Bhat
2013-08-30 13:24   ` Srivatsa S. Bhat
2013-08-30 13:24 ` [RFC PATCH v3 33/35] mm: Never change migratetypes of pageblocks during freepage stealing Srivatsa S. Bhat
2013-08-30 13:24   ` Srivatsa S. Bhat
2013-08-30 13:24 ` [RFC PATCH v3 34/35] mm: Set pageblock migratetype when allocating regions from region allocator Srivatsa S. Bhat
2013-08-30 13:24   ` Srivatsa S. Bhat
2013-08-30 13:24 ` [RFC PATCH v3 35/35] mm: Use a cache between page-allocator and region-allocator Srivatsa S. Bhat
2013-08-30 13:24   ` Srivatsa S. Bhat
2013-08-30 13:26 ` [RESEND RFC PATCH v3 00/35] mm: Memory Power Management Srivatsa S. Bhat
2013-08-30 13:26   ` Srivatsa S. Bhat
2013-08-30 15:27 ` Dave Hansen
2013-08-30 15:27   ` Dave Hansen
2013-08-30 17:50   ` Srivatsa S. Bhat
2013-08-30 17:50     ` Srivatsa S. Bhat

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130830131919.4947.16115.stgit@srivatsabhat.in.ibm.com \
    --to=srivatsa.bhat@linux.vnet.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=arjan@linux.intel.com \
    --cc=dave@sr71.net \
    --cc=gargankita@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=isimatu.yasuaki@jp.fujitsu.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@gmail.com \
    --cc=lenb@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=matthew.garrett@nebula.com \
    --cc=mgorman@suse.de \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=rjw@sisk.pl \
    --cc=santosh.shilimkar@ti.com \
    --cc=srinivas.pandruvada@linux.intel.com \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=tony.luck@intel.com \
    --cc=willy@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.