All of lore.kernel.org
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: linux-mm@kvack.org
Cc: David Hildenbrand <david@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Yang Shi <shy828301@gmail.com>,
	David Rientjes <rientjes@google.com>,
	James Houghton <jthoughton@google.com>,
	Mike Rapoport <rppt@kernel.org>,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v2 10/12] mm: convert MAX_ORDER sized static arrays to dynamic ones.
Date: Thu, 11 Aug 2022 19:16:41 -0400	[thread overview]
Message-ID: <20220811231643.1012912-11-zi.yan@sent.com> (raw)
In-Reply-To: <20220811231643.1012912-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

This prepares for the upcoming changes to make MAX_ORDER a boot time
parameter instead of compilation time constant. All static arrays with
MAX_ORDER size are converted to pointers and their memory is allocated
at runtime.

free_area array in struct zone is allocated using memblock_alloc_node()
at boot time and using kzalloc() when memory is hot-added.

Signed-off-by: Zi Yan <ziy@nvidia.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: David Airlie <airlied@linux.ie>
Cc: kexec@lists.infradead.org
Cc: linux-doc@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 .../admin-guide/kdump/vmcoreinfo.rst          |  2 +-
 drivers/gpu/drm/ttm/ttm_device.c              |  7 ++-
 drivers/gpu/drm/ttm/ttm_pool.c                | 58 +++++++++++++++++--
 include/drm/ttm/ttm_pool.h                    |  4 +-
 include/linux/mmzone.h                        |  2 +-
 mm/page_alloc.c                               | 32 ++++++++--
 6 files changed, 87 insertions(+), 18 deletions(-)

diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index c572b5230fe0..a775462aa7c7 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -172,7 +172,7 @@ variables.
 Offset of the free_list's member. This value is used to compute the number
 of free pages.
 
-Each zone has a free_area structure array called free_area[MAX_ORDER + 1].
+Each zone has a free_area structure array called free_area with length of MAX_ORDER + 1.
 The free_list represents a linked list of free page blocks.
 
 (list_head, next|prev)
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index e7147e304637..442a77bb5b4f 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -92,7 +92,9 @@ static int ttm_global_init(void)
 		>> PAGE_SHIFT;
 	num_dma32 = min(num_dma32, 2UL << (30 - PAGE_SHIFT));
 
-	ttm_pool_mgr_init(num_pages);
+	ret = ttm_pool_mgr_init(num_pages);
+	if (ret)
+		goto out;
 	ttm_tt_mgr_init(num_pages, num_dma32);
 
 	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
@@ -218,7 +220,8 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 	bdev->funcs = funcs;
 
 	ttm_sys_man_init(bdev);
-	ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
+	if (ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32))
+		return -ENOMEM;
 
 	bdev->vma_manager = vma_manager;
 	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 85d19f425af6..d76f7d476421 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -64,11 +64,11 @@ module_param(page_pool_size, ulong, 0644);
 
 static atomic_long_t allocated_pages;
 
-static struct ttm_pool_type global_write_combined[MAX_ORDER + 1];
-static struct ttm_pool_type global_uncached[MAX_ORDER + 1];
+static struct ttm_pool_type *global_write_combined;
+static struct ttm_pool_type *global_uncached;
 
-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1];
-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1];
+static struct ttm_pool_type *global_dma32_write_combined;
+static struct ttm_pool_type *global_dma32_uncached;
 
 static spinlock_t shrinker_lock;
 static struct list_head shrinker_list;
@@ -493,8 +493,10 @@ EXPORT_SYMBOL(ttm_pool_free);
  * @use_dma32: true if GFP_DMA32 should be used
  *
  * Initialize the pool and its pool types.
+ *
+ * Returns: 0 on successe, negative error code otherwise
  */
-void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
+int ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 		   bool use_dma_alloc, bool use_dma32)
 {
 	unsigned int i, j;
@@ -506,11 +508,30 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 	pool->use_dma32 = use_dma32;
 
 	if (use_dma_alloc) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
+		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+			pool->caching[i].orders =
+				kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+					GFP_KERNEL);
+			if (!pool->caching[i].orders) {
+				i--;
+				goto failed;
+			}
 			for (j = 0; j <= MAX_ORDER; ++j)
 				ttm_pool_type_init(&pool->caching[i].orders[j],
 						   pool, i, j);
+
+		}
+		return 0;
+
+failed:
+		for (; i >= 0; i--) {
+			for (j = 0; j <= MAX_ORDER; ++j)
+				ttm_pool_type_fini(&pool->caching[i].orders[j]);
+			kfree(pool->caching[i].orders);
+		}
+		return -ENOMEM;
 	}
+	return 0;
 }
 
 /**
@@ -701,6 +722,31 @@ int ttm_pool_mgr_init(unsigned long num_pages)
 	spin_lock_init(&shrinker_lock);
 	INIT_LIST_HEAD(&shrinker_list);
 
+	if (!global_write_combined) {
+		global_write_combined = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+						GFP_KERNEL);
+		if (!global_write_combined)
+			return -ENOMEM;
+	}
+	if (!global_uncached) {
+		global_uncached = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+					  GFP_KERNEL);
+		if (!global_uncached)
+			return -ENOMEM;
+	}
+	if (!global_dma32_write_combined) {
+		global_dma32_write_combined = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+						      GFP_KERNEL);
+		if (!global_dma32_write_combined)
+			return -ENOMEM;
+	}
+	if (!global_dma32_uncached) {
+		global_dma32_uncached = kvcalloc(MAX_ORDER + 1, sizeof(struct ttm_pool_type),
+						GFP_KERNEL);
+		if (!global_dma32_uncached)
+			return -ENOMEM;
+	}
+
 	for (i = 0; i <= MAX_ORDER; ++i) {
 		ttm_pool_type_init(&global_write_combined[i], NULL,
 				   ttm_write_combined, i);
diff --git a/include/drm/ttm/ttm_pool.h b/include/drm/ttm/ttm_pool.h
index 8ce14f9d202a..f5ce60f629ae 100644
--- a/include/drm/ttm/ttm_pool.h
+++ b/include/drm/ttm/ttm_pool.h
@@ -72,7 +72,7 @@ struct ttm_pool {
 	bool use_dma32;
 
 	struct {
-		struct ttm_pool_type orders[MAX_ORDER + 1];
+		struct ttm_pool_type *orders;
 	} caching[TTM_NUM_CACHING_TYPES];
 };
 
@@ -80,7 +80,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 		   struct ttm_operation_ctx *ctx);
 void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt);
 
-void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
+int ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 		   bool use_dma_alloc, bool use_dma32);
 void ttm_pool_fini(struct ttm_pool *pool);
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b83b481e250b..60d8cce2aed8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -635,7 +635,7 @@ struct zone {
 	ZONE_PADDING(_pad1_)
 
 	/* free areas of different sizes */
-	struct free_area	free_area[MAX_ORDER + 1];
+	struct free_area	*free_area;
 
 	/* zone flags, see below */
 	unsigned long		flags;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3f3af7cd5164..941a94bb8cf0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6195,11 +6195,21 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 
 	for_each_populated_zone(zone) {
 		unsigned int order;
-		unsigned long nr[MAX_ORDER + 1], flags, total = 0;
-		unsigned char types[MAX_ORDER + 1];
+		unsigned long *nr, flags, total = 0;
+		unsigned char *types;
 
 		if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
 			continue;
+
+		nr = kmalloc_array(MAX_ORDER + 1, sizeof(unsigned long), GFP_KERNEL);
+		if (!nr)
+			break;
+		types = kmalloc_array(MAX_ORDER + 1, sizeof(unsigned char), GFP_KERNEL);
+		if (!types) {
+			kfree(nr);
+			break;
+		}
+
 		show_node(zone);
 		printk(KERN_CONT "%s: ", zone->name);
 
@@ -7649,8 +7659,8 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 	lruvec_init(&pgdat->__lruvec);
 }
 
-static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
-							unsigned long remaining_pages)
+static void __init zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
+					unsigned long remaining_pages, bool hotplug)
 {
 	atomic_long_set(&zone->managed_pages, remaining_pages);
 	zone_set_nid(zone, nid);
@@ -7659,6 +7669,16 @@ static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx,
 	spin_lock_init(&zone->lock);
 	zone_seqlock_init(zone);
 	zone_pcp_init(zone);
+	if (hotplug)
+		zone->free_area =
+			kcalloc_node(MAX_ORDER + 1, sizeof(struct free_area),
+				     GFP_KERNEL, nid);
+	else
+		zone->free_area =
+			memblock_alloc_node(sizeof(struct free_area) * (MAX_ORDER + 1),
+					    sizeof(struct free_area), nid);
+	BUG_ON(!zone->free_area);
+
 }
 
 /*
@@ -7697,7 +7717,7 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
 	}
 
 	for (z = 0; z < MAX_NR_ZONES; z++)
-		zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+		zone_init_internals(&pgdat->node_zones[z], z, nid, 0, true);
 }
 #endif
 
@@ -7760,7 +7780,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
 		 * when the bootmem allocator frees pages into the buddy system.
 		 * And all highmem pages will be managed by the buddy system.
 		 */
-		zone_init_internals(zone, j, nid, freesize);
+		zone_init_internals(zone, j, nid, freesize, false);
 
 		if (!size)
 			continue;
-- 
2.35.1


  parent reply	other threads:[~2022-08-11 23:18 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-11 23:16 [RFC PATCH v2 00/12] Make MAX_ORDER adjustable as a kernel boot time parameter Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 01/12] arch: mm: rename FORCE_MAX_ZONEORDER to ARCH_FORCE_MAX_ORDER Zi Yan
2022-08-13 15:36   ` Mike Rapoport
2022-08-15 12:53     ` Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 02/12] mm: rectify MAX_ORDER semantics to be the largest page order from buddy allocator Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 03/12] mm: replace MAX_ORDER when it is used to indicate max physical contiguity Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 04/12] mm: adapt deferred struct page init to new MAX_ORDER Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 05/12] mm: prevent pageblock size being larger than section size Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 06/12] fs: proc: use pageblock_nr_pages for reschedule period in read_kcore() Zi Yan
2022-08-23 10:36   ` David Hildenbrand
2022-08-11 23:16 ` [RFC PATCH v2 07/12] virtio: virtio_balloon: use pageblock_order instead of MAX_ORDER Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 08/12] mm/page_reporting: set page_reporting_order to -1 to prevent it running Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 09/12] mm: Make MAX_ORDER of buddy allocator configurable via Kconfig SET_MAX_ORDER Zi Yan
2022-08-13  1:11   ` Randy Dunlap
2022-08-13  2:37     ` Zi Yan
2022-08-13  2:40       ` Randy Dunlap
2022-08-11 23:16 ` Zi Yan [this message]
2022-08-11 23:16 ` [RFC PATCH v2 11/12] mm: introduce MIN_MAX_ORDER to replace MAX_ORDER as compile time constant Zi Yan
2022-08-11 23:16 ` [RFC PATCH v2 12/12] mm: make MAX_ORDER a kernel boot time parameter Zi Yan
2022-08-13  1:11   ` Randy Dunlap
2022-08-13  2:38     ` Zi Yan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220811231643.1012912-11-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=david@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=jthoughton@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=shy828301@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.