linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: willy@infradead.org, corbet@lwn.net, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, x86@kernel.org, hpa@zytor.com,
	dave.hansen@linux.intel.com, luto@kernel.org,
	peterz@infradead.org, rppt@kernel.org, akpm@linux-foundation.org,
	hughd@google.com, ebiederm@xmission.com, keescook@chromium.org,
	ardb@kernel.org, nivedita@alum.mit.edu, jroedel@suse.de,
	masahiroy@kernel.org, nathan@kernel.org, terrelln@fb.com,
	vincenzo.frascino@arm.com, martin.b.radev@gmail.com,
	andreyknvl@google.com, daniel.kiper@oracle.com,
	rafael.j.wysocki@intel.com, dan.j.williams@intel.com,
	Jonathan.Cameron@huawei.com, bhe@redhat.com, rminnich@gmail.com,
	ashish.kalra@amd.com, guro@fb.com, hannes@cmpxchg.org,
	mhocko@kernel.org, iamjoonsoo.kim@lge.com, vbabka@suse.cz,
	alex.shi@linux.alibaba.com, david@redhat.com,
	richard.weiyang@gmail.com, vdavydov.dev@gmail.com,
	graf@amazon.com, jason.zeng@intel.com, lei.l.li@intel.com,
	daniel.m.jordan@oracle.com, steven.sistare@oracle.com,
	linux-fsdevel@vger.kernel.org, linux-doc@vger.kernel.org,
	kexec@lists.infradead.org
Subject: [RFC v2 11/43] PKRAM: prepare for adding preserved ranges to memblock reserved
Date: Tue, 30 Mar 2021 14:35:46 -0700	[thread overview]
Message-ID: <1617140178-8773-12-git-send-email-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <1617140178-8773-1-git-send-email-anthony.yznaga@oracle.com>

Calling memblock_reserve() repeatedly to add preserved ranges is
inefficient and risks clobbering preserved memory if the memblock
reserved regions array must be resized.  Instead, calculate the size
needed to accomodate the preserved ranges, find a suitable range for
a new reserved regions array that does not overlap any preserved range,
and populate it with a new, merged regions array.

Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 mm/pkram.c | 241 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 241 insertions(+)

diff --git a/mm/pkram.c b/mm/pkram.c
index 4cfa236a4126..b4a14837946a 100644
--- a/mm/pkram.c
+++ b/mm/pkram.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -1121,3 +1122,243 @@ static unsigned long pkram_populate_regions_list(void)
 
 	return priv.nr_regions;
 }
+
+struct pkram_region *pkram_first_region(struct pkram_super_block *sb, struct pkram_region_list **rlp, int *idx)
+{
+	WARN_ON(!sb);
+	WARN_ON(!sb->region_list_pfn);
+
+	if (!sb || !sb->region_list_pfn)
+		return NULL;
+
+	*rlp = pfn_to_kaddr(sb->region_list_pfn);
+	*idx = 0;
+
+	return &(*rlp)->regions[0];
+}
+
+struct pkram_region *pkram_next_region(struct pkram_region_list **rlp, int *idx)
+{
+	struct pkram_region_list *rl = *rlp;
+	int i = *idx;
+
+	i++;
+	if (i >= PKRAM_REGIONS_LIST_MAX) {
+		if (!rl->next_pfn) {
+			pr_err("PKRAM: %s: no more pkram_region_list pages\n", __func__);
+			return NULL;
+		}
+		rl = pfn_to_kaddr(rl->next_pfn);
+		*rlp = rl;
+		i = 0;
+	}
+	*idx = i;
+
+	if (rl->regions[i].size == 0)
+		return NULL;
+
+	return &rl->regions[i];
+}
+
+struct pkram_region *pkram_first_region_topdown(struct pkram_super_block *sb, struct pkram_region_list **rlp, int *idx)
+{
+	struct pkram_region_list *rl;
+
+	WARN_ON(!sb);
+	WARN_ON(!sb->region_list_pfn);
+
+	if (!sb || !sb->region_list_pfn)
+		return NULL;
+
+	rl = pfn_to_kaddr(sb->region_list_pfn);
+	if (!rl->prev_pfn) {
+		WARN_ON(1);
+		return NULL;
+	}
+	rl = pfn_to_kaddr(rl->prev_pfn);
+
+	*rlp = rl;
+
+	*idx = (sb->nr_regions - 1) % PKRAM_REGIONS_LIST_MAX;
+
+	return &rl->regions[*idx];
+}
+
+struct pkram_region *pkram_next_region_topdown(struct pkram_region_list **rlp, int *idx)
+{
+	struct pkram_region_list *rl = *rlp;
+	int i = *idx;
+
+	if (i == 0) {
+		if (!rl->prev_pfn)
+			return NULL;
+		rl = pfn_to_kaddr(rl->prev_pfn);
+		*rlp = rl;
+		i = PKRAM_REGIONS_LIST_MAX - 1;
+	} else
+		i--;
+
+	*idx = i;
+
+	return &rl->regions[i];
+}
+
+/*
+ * Use the pkram regions list to find an available block of memory that does
+ * not overlap with preserved pages.
+ */
+phys_addr_t __init find_available_topdown(phys_addr_t size)
+{
+	phys_addr_t hole_start, hole_end, hole_size;
+	struct pkram_region_list *rl;
+	struct pkram_region *r;
+	phys_addr_t addr = 0;
+	int idx;
+
+	hole_end = memblock.current_limit;
+	r = pkram_first_region_topdown(pkram_sb, &rl, &idx);
+
+	while (r) {
+		hole_start = r->base + r->size;
+		hole_size = hole_end - hole_start;
+
+		if (hole_size >= size) {
+			addr = memblock_find_in_range(hole_start, hole_end,
+							size, PAGE_SIZE);
+			if (addr)
+				break;
+		}
+
+		hole_end = r->base;
+		r = pkram_next_region_topdown(&rl, &idx);
+	}
+
+	if (!addr)
+		addr = memblock_find_in_range(0, hole_end, size, PAGE_SIZE);
+
+	return addr;
+}
+
+int __init pkram_create_merged_reserved(struct memblock_type *new)
+{
+	unsigned long cnt_a;
+	unsigned long cnt_b;
+	long i, j, k;
+	struct memblock_region *r;
+	struct memblock_region *rgn;
+	struct pkram_region *pkr;
+	struct pkram_region_list *rl;
+	int idx;
+	unsigned long total_size = 0;
+	unsigned long nr_preserved = 0;
+
+	cnt_a = memblock.reserved.cnt;
+	cnt_b = pkram_sb->nr_regions;
+
+	i = 0;
+	j = 0;
+	k = 0;
+
+	pkr = pkram_first_region(pkram_sb, &rl, &idx);
+	if (!pkr)
+		return -EINVAL;
+	while (i < cnt_a && j < cnt_b && pkr) {
+		r = &memblock.reserved.regions[i];
+		rgn = &new->regions[k];
+
+		if (r->base + r->size <= pkr->base) {
+			*rgn = *r;
+			i++;
+		} else if (pkr->base + pkr->size <= r->base) {
+			rgn->base = pkr->base;
+			rgn->size = pkr->size;
+			memblock_set_region_node(rgn, MAX_NUMNODES);
+
+			nr_preserved +=  (rgn->size >> PAGE_SHIFT);
+			pkr = pkram_next_region(&rl, &idx);
+			j++;
+		} else {
+			pr_err("PKRAM: unexpected overlap:\n");
+			pr_err("PKRAM: reserved: base=%pa,size=%pa,flags=0x%x\n", &r->base, &r->size, (int)r->flags);
+			pr_err("PKRAM: pkram: base=%pa,size=%pa\n", &pkr->base, &pkr->size);
+			return -EBUSY;
+		}
+		total_size += rgn->size;
+		k++;
+	}
+
+	while (i < cnt_a) {
+		r = &memblock.reserved.regions[i];
+		rgn = &new->regions[k];
+
+		*rgn = *r;
+
+		total_size += rgn->size;
+		i++;
+		k++;
+	}
+	while (j < cnt_b && pkr) {
+		rgn = &new->regions[k];
+		rgn->base = pkr->base;
+		rgn->size = pkr->size;
+		memblock_set_region_node(rgn, MAX_NUMNODES);
+
+		nr_preserved += (rgn->size >> PAGE_SHIFT);
+		total_size += rgn->size;
+		pkr = pkram_next_region(&rl, &idx);
+		j++;
+		k++;
+	}
+
+	WARN_ON(cnt_a + cnt_b != k);
+	new->cnt = cnt_a + cnt_b;
+	new->total_size = total_size;
+
+	return 0;
+}
+
+/*
+ * Reserve pages that belong to preserved memory.  This is accomplished by
+ * merging the existing reserved ranges with the preserved ranges into
+ * a new, sufficiently sized memblock reserved array.
+ *
+ * This function should be called at boot time as early as possible to prevent
+ * preserved memory from being recycled.
+ */
+int __init pkram_merge_with_reserved(void)
+{
+	struct memblock_type new;
+	unsigned long new_max;
+	phys_addr_t new_size;
+	phys_addr_t addr;
+	int err;
+
+	/*
+	 * Need space to insert one more range into memblock.reserved
+	 * without memblock_double_array() being called.
+	 */
+	if (memblock.reserved.cnt == memblock.reserved.max) {
+		WARN_ONCE(1, "PKRAM: no space for new memblock list\n");
+		return -ENOMEM;
+	}
+
+	new_max = memblock.reserved.max + pkram_sb->nr_regions;
+	new_size = PAGE_ALIGN(sizeof (struct memblock_region) * new_max);
+
+	addr = find_available_topdown(new_size);
+	if (!addr || memblock_reserve(addr, new_size))
+		return -ENOMEM;
+
+	new.regions = __va(addr);
+	new.max = new_max;
+	err = pkram_create_merged_reserved(&new);
+	if (err)
+		return err;
+
+	memblock.reserved.cnt = new.cnt;
+	memblock.reserved.max = new.max;
+	memblock.reserved.total_size = new.total_size;
+	memblock.reserved.regions = new.regions;
+
+	return 0;
+}
-- 
1.8.3.1



  parent reply	other threads:[~2021-03-30 21:26 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-30 21:35 [RFC v2 00/43] PKRAM: Preserved-over-Kexec RAM Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 01/43] mm: add PKRAM API stubs and Kconfig Anthony Yznaga
2021-03-31 18:43   ` Randy Dunlap
2021-03-31 20:28     ` Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 02/43] mm: PKRAM: implement node load and save functions Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 03/43] mm: PKRAM: implement object " Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 04/43] mm: PKRAM: implement page stream operations Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 05/43] mm: PKRAM: support preserving transparent hugepages Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 06/43] mm: PKRAM: implement byte stream operations Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 07/43] mm: PKRAM: link nodes by pfn before reboot Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 08/43] mm: PKRAM: introduce super block Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 09/43] PKRAM: track preserved pages in a physical mapping pagetable Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 10/43] PKRAM: pass a list of preserved ranges to the next kernel Anthony Yznaga
2021-03-30 21:35 ` Anthony Yznaga [this message]
2021-03-30 21:35 ` [RFC v2 12/43] mm: PKRAM: reserve preserved memory at boot Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 13/43] PKRAM: free the preserved ranges list Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 14/43] PKRAM: prevent inadvertent use of a stale superblock Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 15/43] PKRAM: provide a way to ban pages from use by PKRAM Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 16/43] kexec: PKRAM: prevent kexec clobbering preserved pages in some cases Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 17/43] PKRAM: provide a way to check if a memory range has preserved pages Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 18/43] kexec: PKRAM: avoid clobbering already " Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 19/43] mm: PKRAM: allow preserved memory to be freed from userspace Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 20/43] PKRAM: disable feature when running the kdump kernel Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 21/43] x86/KASLR: PKRAM: support physical kaslr Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 22/43] x86/boot/compressed/64: use 1GB pages for mappings Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 23/43] mm: shmem: introduce shmem_insert_page Anthony Yznaga
2021-03-30 21:35 ` [RFC v2 24/43] mm: shmem: enable saving to PKRAM Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 25/43] mm: shmem: prevent swapping of PKRAM-enabled tmpfs pages Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 26/43] mm: shmem: specify the mm to use when inserting pages Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 27/43] mm: shmem: when inserting, handle pages already charged to a memcg Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 28/43] x86/mm/numa: add numa_isolate_memblocks() Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 29/43] PKRAM: ensure memblocks with preserved pages init'd for numa Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 30/43] memblock: PKRAM: mark memblocks that contain preserved pages Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 31/43] memblock, mm: defer initialization of " Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 32/43] shmem: preserve shmem files a chunk at a time Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 33/43] PKRAM: atomically add and remove link pages Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 34/43] shmem: PKRAM: multithread preserving and restoring shmem pages Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 35/43] shmem: introduce shmem_insert_pages() Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 36/43] PKRAM: add support for loading pages in bulk Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 37/43] shmem: PKRAM: enable bulk loading of preserved pages into shmem Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 38/43] mm: implement splicing a list of pages to the LRU Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 39/43] shmem: optimize adding pages to the LRU in shmem_insert_pages() Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 40/43] shmem: initial support for adding multiple pages to pagecache Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 41/43] XArray: add xas_export_node() and xas_import_node() Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 42/43] shmem: reduce time holding xa_lock when inserting pages Anthony Yznaga
2021-03-30 21:36 ` [RFC v2 43/43] PKRAM: improve index alignment of pkram_link entries Anthony Yznaga
2021-06-05 13:39 ` [RFC v2 00/43] PKRAM: Preserved-over-Kexec RAM Pavel Tatashin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1617140178-8773-12-git-send-email-anthony.yznaga@oracle.com \
    --to=anthony.yznaga@oracle.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shi@linux.alibaba.com \
    --cc=andreyknvl@google.com \
    --cc=ardb@kernel.org \
    --cc=ashish.kalra@amd.com \
    --cc=bhe@redhat.com \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=daniel.kiper@oracle.com \
    --cc=daniel.m.jordan@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=graf@amazon.com \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=jason.zeng@intel.com \
    --cc=jroedel@suse.de \
    --cc=keescook@chromium.org \
    --cc=kexec@lists.infradead.org \
    --cc=lei.l.li@intel.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=martin.b.radev@gmail.com \
    --cc=masahiroy@kernel.org \
    --cc=mhocko@kernel.org \
    --cc=mingo@redhat.com \
    --cc=nathan@kernel.org \
    --cc=nivedita@alum.mit.edu \
    --cc=peterz@infradead.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=richard.weiyang@gmail.com \
    --cc=rminnich@gmail.com \
    --cc=rppt@kernel.org \
    --cc=steven.sistare@oracle.com \
    --cc=terrelln@fb.com \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=vdavydov.dev@gmail.com \
    --cc=vincenzo.frascino@arm.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).