kexec.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Cc: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	x86@kernel.org, hpa@zytor.com, dave.hansen@linux.intel.com,
	luto@kernel.org, peterz@infradead.org, rppt@kernel.org,
	akpm@linux-foundation.org, ebiederm@xmission.com,
	keescook@chromium.org, graf@amazon.com, jason.zeng@intel.com,
	lei.l.li@intel.com, steven.sistare@oracle.com,
	fam.zheng@bytedance.com, mgalaxy@akamai.com,
	kexec@lists.infradead.org
Subject: [RFC v3 10/21] PKRAM: prepare for adding preserved ranges to memblock reserved
Date: Wed, 26 Apr 2023 17:08:46 -0700	[thread overview]
Message-ID: <1682554137-13938-11-git-send-email-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <1682554137-13938-1-git-send-email-anthony.yznaga@oracle.com>

Calling memblock_reserve() repeatedly to add preserved ranges is
inefficient and risks clobbering preserved memory if the memblock
reserved regions array must be resized.  Instead, calculate the size
needed to accommodate the preserved ranges, find a suitable range for
a new reserved regions array that does not overlap any preserved range,
and populate it with a new, merged regions array.

Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 mm/pkram.c | 244 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 244 insertions(+)

diff --git a/mm/pkram.c b/mm/pkram.c
index 3790e5180feb..c649504fa1fa 100644
--- a/mm/pkram.c
+++ b/mm/pkram.c
@@ -7,6 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -1138,3 +1139,246 @@ static unsigned long pkram_populate_regions_list(void)
 
 	return priv.nr_regions;
 }
+
+struct pkram_region *pkram_first_region(struct pkram_super_block *sb,
+					struct pkram_region_list **rlp, int *idx)
+{
+	WARN_ON(!sb);
+	WARN_ON(!sb->region_list_pfn);
+
+	if (!sb || !sb->region_list_pfn)
+		return NULL;
+
+	*rlp = pfn_to_kaddr(sb->region_list_pfn);
+	*idx = 0;
+
+	return &(*rlp)->regions[0];
+}
+
+struct pkram_region *pkram_next_region(struct pkram_region_list **rlp, int *idx)
+{
+	struct pkram_region_list *rl = *rlp;
+	int i = *idx;
+
+	i++;
+	if (i >= PKRAM_REGIONS_LIST_MAX) {
+		if (!rl->next_pfn) {
+			pr_err("PKRAM: %s: no more pkram_region_list pages\n", __func__);
+			return NULL;
+		}
+		rl = pfn_to_kaddr(rl->next_pfn);
+		*rlp = rl;
+		i = 0;
+	}
+	*idx = i;
+
+	if (rl->regions[i].size == 0)
+		return NULL;
+
+	return &rl->regions[i];
+}
+
+struct pkram_region *pkram_first_region_topdown(struct pkram_super_block *sb,
+						struct pkram_region_list **rlp, int *idx)
+{
+	struct pkram_region_list *rl;
+
+	WARN_ON(!sb);
+	WARN_ON(!sb->region_list_pfn);
+
+	if (!sb || !sb->region_list_pfn)
+		return NULL;
+
+	rl = pfn_to_kaddr(sb->region_list_pfn);
+	if (!rl->prev_pfn) {
+		WARN_ON(1);
+		return NULL;
+	}
+	rl = pfn_to_kaddr(rl->prev_pfn);
+
+	*rlp = rl;
+
+	*idx = (sb->nr_regions - 1) % PKRAM_REGIONS_LIST_MAX;
+
+	return &rl->regions[*idx];
+}
+
+struct pkram_region *pkram_next_region_topdown(struct pkram_region_list **rlp, int *idx)
+{
+	struct pkram_region_list *rl = *rlp;
+	int i = *idx;
+
+	if (i == 0) {
+		if (!rl->prev_pfn)
+			return NULL;
+		rl = pfn_to_kaddr(rl->prev_pfn);
+		*rlp = rl;
+		i = PKRAM_REGIONS_LIST_MAX - 1;
+	} else
+		i--;
+
+	*idx = i;
+
+	return &rl->regions[i];
+}
+
+/*
+ * Use the pkram regions list to allocate a block of memory that does
+ * not overlap with preserved pages.
+ */
+phys_addr_t __init alloc_topdown(phys_addr_t size)
+{
+	phys_addr_t hole_start, hole_end, hole_size;
+	struct pkram_region_list *rl;
+	struct pkram_region *r;
+	phys_addr_t addr = 0;
+	int idx;
+
+	hole_end = memblock.current_limit;
+	r = pkram_first_region_topdown(pkram_sb, &rl, &idx);
+
+	while (r) {
+		hole_start = r->base + r->size;
+		hole_size = hole_end - hole_start;
+
+		if (hole_size >= size) {
+			addr = memblock_phys_alloc_range(size, PAGE_SIZE,
+							hole_start, hole_end);
+			if (addr)
+				break;
+		}
+
+		hole_end = r->base;
+		r = pkram_next_region_topdown(&rl, &idx);
+	}
+
+	if (!addr)
+		addr = memblock_phys_alloc_range(size, PAGE_SIZE, 0, hole_end);
+
+	return addr;
+}
+
+int __init pkram_create_merged_reserved(struct memblock_type *new)
+{
+	unsigned long cnt_a;
+	unsigned long cnt_b;
+	long i, j, k;
+	struct memblock_region *r;
+	struct memblock_region *rgn;
+	struct pkram_region *pkr;
+	struct pkram_region_list *rl;
+	int idx;
+	unsigned long total_size = 0;
+	unsigned long nr_preserved = 0;
+
+	cnt_a = memblock.reserved.cnt;
+	cnt_b = pkram_sb->nr_regions;
+
+	i = 0;
+	j = 0;
+	k = 0;
+
+	pkr = pkram_first_region(pkram_sb, &rl, &idx);
+	if (!pkr)
+		return -EINVAL;
+	while (i < cnt_a && j < cnt_b && pkr) {
+		r = &memblock.reserved.regions[i];
+		rgn = &new->regions[k];
+
+		if (r->base + r->size <= pkr->base) {
+			*rgn = *r;
+			i++;
+		} else if (pkr->base + pkr->size <= r->base) {
+			rgn->base = pkr->base;
+			rgn->size = pkr->size;
+			memblock_set_region_node(rgn, MAX_NUMNODES);
+
+			nr_preserved +=  (rgn->size >> PAGE_SHIFT);
+			pkr = pkram_next_region(&rl, &idx);
+			j++;
+		} else {
+			pr_err("PKRAM: unexpected overlap:\n");
+			pr_err("PKRAM: reserved: base=%pa,size=%pa,flags=0x%x\n", &r->base,
+				&r->size, (int)r->flags);
+			pr_err("PKRAM: pkram: base=%pa,size=%pa\n", &pkr->base, &pkr->size);
+			return -EBUSY;
+		}
+		total_size += rgn->size;
+		k++;
+	}
+
+	while (i < cnt_a) {
+		r = &memblock.reserved.regions[i];
+		rgn = &new->regions[k];
+
+		*rgn = *r;
+
+		total_size += rgn->size;
+		i++;
+		k++;
+	}
+	while (j < cnt_b && pkr) {
+		rgn = &new->regions[k];
+		rgn->base = pkr->base;
+		rgn->size = pkr->size;
+		memblock_set_region_node(rgn, MAX_NUMNODES);
+
+		nr_preserved += (rgn->size >> PAGE_SHIFT);
+		total_size += rgn->size;
+		pkr = pkram_next_region(&rl, &idx);
+		j++;
+		k++;
+	}
+
+	WARN_ON(cnt_a + cnt_b != k);
+	new->cnt = cnt_a + cnt_b;
+	new->total_size = total_size;
+
+	return 0;
+}
+
+/*
+ * Reserve pages that belong to preserved memory.  This is accomplished by
+ * merging the existing reserved ranges with the preserved ranges into
+ * a new, sufficiently sized memblock reserved array.
+ *
+ * This function should be called at boot time as early as possible to prevent
+ * preserved memory from being recycled.
+ */
+int __init pkram_merge_with_reserved(void)
+{
+	struct memblock_type new;
+	unsigned long new_max;
+	phys_addr_t new_size;
+	phys_addr_t addr;
+	int err;
+
+	/*
+	 * Need space to insert one more range into memblock.reserved
+	 * without memblock_double_array() being called.
+	 */
+	if (memblock.reserved.cnt == memblock.reserved.max) {
+		WARN_ONCE(1, "PKRAM: no space for new memblock list\n");
+		return -ENOMEM;
+	}
+
+	new_max = memblock.reserved.max + pkram_sb->nr_regions;
+	new_size = PAGE_ALIGN(sizeof(struct memblock_region) * new_max);
+
+	addr = alloc_topdown(new_size);
+	if (!addr)
+		return -ENOMEM;
+
+	new.regions = __va(addr);
+	new.max = new_max;
+	err = pkram_create_merged_reserved(&new);
+	if (err)
+		return err;
+
+	memblock.reserved.cnt = new.cnt;
+	memblock.reserved.max = new.max;
+	memblock.reserved.total_size = new.total_size;
+	memblock.reserved.regions = new.regions;
+
+	return 0;
+}
-- 
1.9.4


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

  parent reply	other threads:[~2023-04-27  0:10 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-27  0:08 [RFC v3 00/21] Preserved-over-Kexec RAM Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 01/21] mm: add PKRAM API stubs and Kconfig Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 02/21] mm: PKRAM: implement node load and save functions Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 03/21] mm: PKRAM: implement object " Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 04/21] mm: PKRAM: implement folio stream operations Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 05/21] mm: PKRAM: implement byte " Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 06/21] mm: PKRAM: link nodes by pfn before reboot Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 07/21] mm: PKRAM: introduce super block Anthony Yznaga
2023-06-05  2:40   ` Coiby Xu
2023-06-06  2:01     ` Anthony Yznaga
2023-06-06  2:55       ` Coiby Xu
2023-06-06  3:12         ` Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 08/21] PKRAM: track preserved pages in a physical mapping pagetable Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 09/21] PKRAM: pass a list of preserved ranges to the next kernel Anthony Yznaga
2023-04-27  0:08 ` Anthony Yznaga [this message]
2023-04-27  0:08 ` [RFC v3 11/21] mm: PKRAM: reserve preserved memory at boot Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 12/21] PKRAM: free the preserved ranges list Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 13/21] PKRAM: prevent inadvertent use of a stale superblock Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 14/21] PKRAM: provide a way to ban pages from use by PKRAM Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 15/21] kexec: PKRAM: prevent kexec clobbering preserved pages in some cases Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 16/21] PKRAM: provide a way to check if a memory range has preserved pages Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 17/21] kexec: PKRAM: avoid clobbering already " Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 18/21] mm: PKRAM: allow preserved memory to be freed from userspace Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 19/21] PKRAM: disable feature when running the kdump kernel Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 20/21] x86/KASLR: PKRAM: support physical kaslr Anthony Yznaga
2023-04-27  0:08 ` [RFC v3 21/21] x86/boot/compressed/64: use 1GB pages for mappings Anthony Yznaga
2023-04-27 18:40   ` H. Peter Anvin
2023-04-27 22:38     ` Anthony Yznaga
2023-05-26 13:57 ` [RFC v3 00/21] Preserved-over-Kexec RAM Gowans, James
2023-05-31 23:14   ` Anthony Yznaga
2023-06-01  2:15 ` Baoquan He
2023-06-01 23:58   ` Anthony Yznaga

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1682554137-13938-11-git-send-email-anthony.yznaga@oracle.com \
    --to=anthony.yznaga@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=ebiederm@xmission.com \
    --cc=fam.zheng@bytedance.com \
    --cc=graf@amazon.com \
    --cc=hpa@zytor.com \
    --cc=jason.zeng@intel.com \
    --cc=keescook@chromium.org \
    --cc=kexec@lists.infradead.org \
    --cc=lei.l.li@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mgalaxy@akamai.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rppt@kernel.org \
    --cc=steven.sistare@oracle.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).