KVM Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi
@ 2021-04-07 18:59 Andrew Jones
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler Andrew Jones
                   ` (7 more replies)
  0 siblings, 8 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

This series is a collection of patches derived from [1] that pave the
way for new targets, e.g. target-efi[2]. These patches mostly address
the elimination of memory map assumptions and they shouldn't have any
functional changes. The last two patches are a couple of patches not
related to the memory map, but they also prepare for bare metal targets.
I tossed them in since I don't think they should be too controversial.
This patch series is also available here [3].

[1] https://github.com/rhdrjones/kvm-unit-tests/commits/target-efi
[2] https://www.youtube.com/watch?v=kvaufVrL0J0
[3] https://gitlab.com/rhdrjones/kvm-unit-tests/-/commits/efiprep

Thanks,
drew


Andrew Jones (8):
  arm/arm64: Reorganize cstart assembler
  arm/arm64: Move setup_vm into setup
  pci-testdev: ioremap regions
  arm/arm64: mmu: Stop mapping an assumed IO region
  arm/arm64: mmu: Remove memory layout assumptions
  arm/arm64: setup: Consolidate memory layout assumptions
  chr-testdev: Silently fail init
  arm/arm64: psci: don't assume method is hvc

 arm/cstart.S           |  72 +++++++--------
 arm/cstart64.S         |  29 +++---
 arm/flat.lds           |  23 +++++
 arm/selftest.c         |  34 ++------
 lib/arm/asm/io.h       |   6 ++
 lib/arm/asm/mmu-api.h  |   1 +
 lib/arm/asm/mmu.h      |   1 +
 lib/arm/asm/page.h     |   2 +
 lib/arm/asm/psci.h     |   9 +-
 lib/arm/asm/setup.h    |   7 +-
 lib/arm/mmu.c          |  94 +++++++++++++++++---
 lib/arm/psci.c         |  17 +++-
 lib/arm/setup.c        | 194 +++++++++++++++++++++++++----------------
 lib/arm64/asm/io.h     |   6 ++
 lib/arm64/asm/mmu.h    |   1 +
 lib/arm64/asm/page.h   |   2 +
 lib/chr-testdev.c      |   5 +-
 lib/pci-host-generic.c |   5 +-
 lib/pci-host-generic.h |   4 +-
 lib/pci-testdev.c      |   4 +
 20 files changed, 339 insertions(+), 177 deletions(-)

-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-09 17:18   ` Nikos Nikoleris
  2021-04-13 16:34   ` Alexandru Elisei
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup Andrew Jones
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

Move secondary_entry helper functions out of .init and into .text,
since secondary_entry isn't run at "init" time.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 arm/cstart.S   | 62 +++++++++++++++++++++++++++-----------------------
 arm/cstart64.S | 22 +++++++++++-------
 2 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/arm/cstart.S b/arm/cstart.S
index d88a98362940..653ab1e8a141 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -96,32 +96,7 @@ start:
 	bl	exit
 	b	halt
 
-
-.macro set_mode_stack mode, stack
-	add	\stack, #S_FRAME_SIZE
-	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
-	isb
-	mov	sp, \stack
-.endm
-
-exceptions_init:
-	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
-	bic	r2, #CR_V		@ SCTLR.V := 0
-	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
-	ldr	r2, =vector_table
-	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
-
-	mrs	r2, cpsr
-
-	/* first frame reserved for svc mode */
-	set_mode_stack	UND_MODE, r0
-	set_mode_stack	ABT_MODE, r0
-	set_mode_stack	IRQ_MODE, r0
-	set_mode_stack	FIQ_MODE, r0
-
-	msr	cpsr_cxsf, r2		@ back to svc mode
-	isb
-	mov	pc, lr
+.text
 
 enable_vfp:
 	/* Enable full access to CP10 and CP11: */
@@ -133,8 +108,6 @@ enable_vfp:
 	vmsr	fpexc, r0
 	mov	pc, lr
 
-.text
-
 .global get_mmu_off
 get_mmu_off:
 	ldr	r0, =auxinfo
@@ -235,6 +208,39 @@ asm_mmu_disable:
 
 	mov     pc, lr
 
+/*
+ * Vectors
+ */
+
+.macro set_mode_stack mode, stack
+	add	\stack, #S_FRAME_SIZE
+	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
+	isb
+	mov	sp, \stack
+.endm
+
+exceptions_init:
+	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
+	bic	r2, #CR_V		@ SCTLR.V := 0
+	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
+	ldr	r2, =vector_table
+	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
+
+	mrs	r2, cpsr
+
+	/*
+	 * Input r0 is the stack top, which is the exception stacks base
+	 * The first frame is reserved for svc mode
+	 */
+	set_mode_stack	UND_MODE, r0
+	set_mode_stack	ABT_MODE, r0
+	set_mode_stack	IRQ_MODE, r0
+	set_mode_stack	FIQ_MODE, r0
+
+	msr	cpsr_cxsf, r2		@ back to svc mode
+	isb
+	mov	pc, lr
+
 /*
  * Vector stubs
  * Simplified version of the Linux kernel implementation
diff --git a/arm/cstart64.S b/arm/cstart64.S
index 0a85338bcdae..d39cf4dfb99c 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -89,10 +89,12 @@ start:
 	msr	cpacr_el1, x4
 
 	/* set up exception handling */
+	mov	x4, x0				// x0 is the addr of the dtb
 	bl	exceptions_init
 
 	/* complete setup */
-	bl	setup				// x0 is the addr of the dtb
+	mov	x0, x4				// restore the addr of the dtb
+	bl	setup
 	bl	get_mmu_off
 	cbnz	x0, 1f
 	bl	setup_vm
@@ -109,13 +111,6 @@ start:
 	bl	exit
 	b	halt
 
-exceptions_init:
-	adrp	x4, vector_table
-	add	x4, x4, :lo12:vector_table
-	msr	vbar_el1, x4
-	isb
-	ret
-
 .text
 
 .globl get_mmu_off
@@ -251,6 +246,17 @@ asm_mmu_disable:
 
 /*
  * Vectors
+ */
+
+exceptions_init:
+	adrp	x0, vector_table
+	add	x0, x0, :lo12:vector_table
+	msr	vbar_el1, x0
+	isb
+	ret
+
+/*
+ * Vector stubs
  * Adapted from arch/arm64/kernel/entry.S
  */
 .macro vector_stub, name, vec
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-09 17:24   ` Nikos Nikoleris
  2021-04-14 15:19   ` Alexandru Elisei
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 3/8] pci-testdev: ioremap regions Andrew Jones
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

Consolidate our setup calls to reduce the amount we need to do from
init::start. Also remove a couple of pointless comments from setup().

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 arm/cstart.S    | 6 ------
 arm/cstart64.S  | 5 -----
 lib/arm/setup.c | 7 +++++--
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/arm/cstart.S b/arm/cstart.S
index 653ab1e8a141..731f841695ce 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -81,12 +81,7 @@ start:
 	/* complete setup */
 	pop	{r0-r1}
 	bl	setup
-	bl	get_mmu_off
-	cmp	r0, #0
-	bne	1f
-	bl	setup_vm
 
-1:
 	/* run the test */
 	ldr	r0, =__argc
 	ldr	r0, [r0]
@@ -108,7 +103,6 @@ enable_vfp:
 	vmsr	fpexc, r0
 	mov	pc, lr
 
-.global get_mmu_off
 get_mmu_off:
 	ldr	r0, =auxinfo
 	ldr	r0, [r0, #4]
diff --git a/arm/cstart64.S b/arm/cstart64.S
index d39cf4dfb99c..add60a2b4e74 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -95,11 +95,7 @@ start:
 	/* complete setup */
 	mov	x0, x4				// restore the addr of the dtb
 	bl	setup
-	bl	get_mmu_off
-	cbnz	x0, 1f
-	bl	setup_vm
 
-1:
 	/* run the test */
 	adrp	x0, __argc
 	ldr	w0, [x0, :lo12:__argc]
@@ -113,7 +109,6 @@ start:
 
 .text
 
-.globl get_mmu_off
 get_mmu_off:
 	adrp	x0, auxinfo
 	ldr	x0, [x0, :lo12:auxinfo + 8]
diff --git a/lib/arm/setup.c b/lib/arm/setup.c
index 751ba980000a..9c16f6004e9f 100644
--- a/lib/arm/setup.c
+++ b/lib/arm/setup.c
@@ -16,6 +16,8 @@
 #include <alloc.h>
 #include <alloc_phys.h>
 #include <alloc_page.h>
+#include <vmalloc.h>
+#include <auxinfo.h>
 #include <argv.h>
 #include <asm/thread_info.h>
 #include <asm/setup.h>
@@ -233,7 +235,6 @@ void setup(const void *fdt)
 		freemem += initrd_size;
 	}
 
-	/* call init functions */
 	mem_init(PAGE_ALIGN((unsigned long)freemem));
 	cpu_init();
 
@@ -243,7 +244,6 @@ void setup(const void *fdt)
 	/* mem_init must be called before io_init */
 	io_init();
 
-	/* finish setup */
 	timer_save_state();
 
 	ret = dt_get_bootargs(&bootargs);
@@ -256,4 +256,7 @@ void setup(const void *fdt)
 		memcpy(env, initrd, initrd_size);
 		setup_env(env, initrd_size);
 	}
+
+	if (!(auxinfo.flags & AUXINFO_MMU_OFF))
+		setup_vm();
 }
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 3/8] pci-testdev: ioremap regions
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler Andrew Jones
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-13 14:12   ` Nikos Nikoleris
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region Andrew Jones
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

Don't assume the physical addresses used with PCI have already been
identity mapped.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 lib/pci-host-generic.c | 5 ++---
 lib/pci-host-generic.h | 4 ++--
 lib/pci-testdev.c      | 4 ++++
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/pci-host-generic.c b/lib/pci-host-generic.c
index 818150dc0a66..de93b8feac39 100644
--- a/lib/pci-host-generic.c
+++ b/lib/pci-host-generic.c
@@ -122,7 +122,7 @@ static struct pci_host_bridge *pci_dt_probe(void)
 		      sizeof(host->addr_space[0]) * nr_addr_spaces);
 	assert(host != NULL);
 
-	host->start		= base.addr;
+	host->start		= ioremap(base.addr, base.size);
 	host->size		= base.size;
 	host->bus		= bus;
 	host->bus_max		= bus_max;
@@ -279,8 +279,7 @@ phys_addr_t pci_host_bridge_get_paddr(u64 pci_addr)
 
 static void __iomem *pci_get_dev_conf(struct pci_host_bridge *host, int devfn)
 {
-	return (void __iomem *)(unsigned long)
-		host->start + (devfn << PCI_ECAM_DEVFN_SHIFT);
+	return (void __iomem *)host->start + (devfn << PCI_ECAM_DEVFN_SHIFT);
 }
 
 u8 pci_config_readb(pcidevaddr_t dev, u8 off)
diff --git a/lib/pci-host-generic.h b/lib/pci-host-generic.h
index fd30e7c74ed8..0ffe6380ec8f 100644
--- a/lib/pci-host-generic.h
+++ b/lib/pci-host-generic.h
@@ -18,8 +18,8 @@ struct pci_addr_space {
 };
 
 struct pci_host_bridge {
-	phys_addr_t		start;
-	phys_addr_t		size;
+	void __iomem		*start;
+	size_t			size;
 	int			bus;
 	int			bus_max;
 	int			nr_addr_spaces;
diff --git a/lib/pci-testdev.c b/lib/pci-testdev.c
index 039bb44781c1..4f2e5663b2d6 100644
--- a/lib/pci-testdev.c
+++ b/lib/pci-testdev.c
@@ -185,7 +185,11 @@ int pci_testdev(void)
 	mem = ioremap(addr, PAGE_SIZE);
 
 	addr = pci_bar_get_addr(&pci_dev, 1);
+#if defined(__i386__) || defined(__x86_64__)
 	io = (void *)(unsigned long)addr;
+#else
+	io = ioremap(addr, PAGE_SIZE);
+#endif
 
 	nr_tests += pci_testdev_all(mem, &pci_testdev_mem_ops);
 	nr_tests += pci_testdev_all(io, &pci_testdev_io_ops);
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
                   ` (2 preceding siblings ...)
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 3/8] pci-testdev: ioremap regions Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-13 14:06   ` Nikos Nikoleris
  2021-04-14 15:42   ` Alexandru Elisei
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions Andrew Jones
                   ` (3 subsequent siblings)
  7 siblings, 2 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

By providing a proper ioremap function, we can just rely on devices
calling it for each region they need (as they already do) instead of
mapping a big assumed I/O range. The persistent maps weirdness allows
us to call setup_vm after io_init. Why don't we just call setup_vm
before io_init, I hear you ask? Well, that's because tests like sieve
want to start with the MMU off and later call setup_vm, and all the
while have working I/O. Some unit tests are just really demanding...

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 lib/arm/asm/io.h      |  6 ++++
 lib/arm/asm/mmu-api.h |  1 +
 lib/arm/asm/mmu.h     |  1 +
 lib/arm/asm/page.h    |  2 ++
 lib/arm/mmu.c         | 82 ++++++++++++++++++++++++++++++++++++++-----
 lib/arm64/asm/io.h    |  6 ++++
 lib/arm64/asm/mmu.h   |  1 +
 lib/arm64/asm/page.h  |  2 ++
 8 files changed, 93 insertions(+), 8 deletions(-)

diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h
index ba3b0b2412ad..e4caa6ff5d1e 100644
--- a/lib/arm/asm/io.h
+++ b/lib/arm/asm/io.h
@@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr)
 		     : "r" (val));
 }
 
+#define ioremap ioremap
+static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
+{
+	return __ioremap(phys_addr, size);
+}
+
 #define virt_to_phys virt_to_phys
 static inline phys_addr_t virt_to_phys(const volatile void *x)
 {
diff --git a/lib/arm/asm/mmu-api.h b/lib/arm/asm/mmu-api.h
index 05fc12b5afb8..b9a5a8f6b3c1 100644
--- a/lib/arm/asm/mmu-api.h
+++ b/lib/arm/asm/mmu-api.h
@@ -17,6 +17,7 @@ extern void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
 extern void mmu_set_range_ptes(pgd_t *pgtable, uintptr_t virt_offset,
 			       phys_addr_t phys_start, phys_addr_t phys_end,
 			       pgprot_t prot);
+extern void mmu_set_persistent_maps(pgd_t *pgtable);
 extern pteval_t *mmu_get_pte(pgd_t *pgtable, uintptr_t vaddr);
 extern void mmu_clear_user(pgd_t *pgtable, unsigned long vaddr);
 #endif
diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
index 122874b8aebe..d88a4f16df42 100644
--- a/lib/arm/asm/mmu.h
+++ b/lib/arm/asm/mmu.h
@@ -12,6 +12,7 @@
 #define PTE_SHARED		L_PTE_SHARED
 #define PTE_AF			PTE_EXT_AF
 #define PTE_WBWA		L_PTE_MT_WRITEALLOC
+#define PTE_UNCACHED		L_PTE_MT_UNCACHED
 
 /* See B3.18.7 TLB maintenance operations */
 
diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h
index 1fb5cd26ac66..8eb4a883808e 100644
--- a/lib/arm/asm/page.h
+++ b/lib/arm/asm/page.h
@@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
 extern phys_addr_t __virt_to_phys(unsigned long addr);
 extern unsigned long __phys_to_virt(phys_addr_t addr);
 
+extern void *__ioremap(phys_addr_t phys_addr, size_t size);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASMARM_PAGE_H_ */
diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
index 15eef007f256..a7b7ae51afe3 100644
--- a/lib/arm/mmu.c
+++ b/lib/arm/mmu.c
@@ -11,6 +11,7 @@
 #include <asm/mmu.h>
 #include <asm/setup.h>
 #include <asm/page.h>
+#include <asm/io.h>
 
 #include "alloc_page.h"
 #include "vmalloc.h"
@@ -21,6 +22,57 @@
 
 extern unsigned long etext;
 
+#define MMU_MAX_PERSISTENT_MAPS 64
+
+struct mmu_persistent_map {
+	uintptr_t virt_offset;
+	phys_addr_t phys_start;
+	phys_addr_t phys_end;
+	pgprot_t prot;
+	bool sect;
+};
+
+static struct mmu_persistent_map mmu_persistent_maps[MMU_MAX_PERSISTENT_MAPS];
+
+static void
+mmu_set_persistent_range(uintptr_t virt_offset, phys_addr_t phys_start,
+			 phys_addr_t phys_end, pgprot_t prot, bool sect)
+{
+	int i;
+
+	assert(phys_end);
+
+	for (i = 0; i < MMU_MAX_PERSISTENT_MAPS; ++i) {
+		if (!mmu_persistent_maps[i].phys_end)
+			break;
+	}
+	assert(i < MMU_MAX_PERSISTENT_MAPS);
+
+	mmu_persistent_maps[i] = (struct mmu_persistent_map){
+		.virt_offset = virt_offset,
+		.phys_start = phys_start,
+		.phys_end = phys_end,
+		.prot = prot,
+		.sect = sect,
+	};
+}
+
+void mmu_set_persistent_maps(pgd_t *pgtable)
+{
+	struct mmu_persistent_map *map;
+
+	for (map = &mmu_persistent_maps[0]; map->phys_end; ++map) {
+		if (map->sect)
+			mmu_set_range_sect(pgtable, map->virt_offset,
+					   map->phys_start, map->phys_end,
+					   map->prot);
+		else
+			mmu_set_range_ptes(pgtable, map->virt_offset,
+					   map->phys_start, map->phys_end,
+					   map->prot);
+	}
+}
+
 pgd_t *mmu_idmap;
 
 /* CPU 0 starts with disabled MMU */
@@ -157,7 +209,6 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
 void *setup_mmu(phys_addr_t phys_end)
 {
 	uintptr_t code_end = (uintptr_t)&etext;
-	struct mem_region *r;
 
 	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
 	if (phys_end > (3ul << 30))
@@ -172,13 +223,6 @@ void *setup_mmu(phys_addr_t phys_end)
 
 	mmu_idmap = alloc_page();
 
-	for (r = mem_regions; r->end; ++r) {
-		if (!(r->flags & MR_F_IO))
-			continue;
-		mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end,
-				   __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER));
-	}
-
 	/* armv8 requires code shared between EL1 and EL0 to be read-only */
 	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
 		PHYS_OFFSET, code_end,
@@ -188,10 +232,32 @@ void *setup_mmu(phys_addr_t phys_end)
 		code_end, phys_end,
 		__pgprot(PTE_WBWA | PTE_USER));
 
+	mmu_set_persistent_maps(mmu_idmap);
+
 	mmu_enable(mmu_idmap);
 	return mmu_idmap;
 }
 
+void __iomem *__ioremap(phys_addr_t phys_addr, size_t size)
+{
+	phys_addr_t paddr_aligned = phys_addr & PAGE_MASK;
+	phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size);
+	pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER);
+
+	assert(sizeof(long) == 8 || !(phys_addr >> 32));
+
+	mmu_set_persistent_range(paddr_aligned, paddr_aligned, paddr_end,
+				 prot, false);
+
+	if (mmu_enabled()) {
+		pgd_t *pgtable = current_thread_info()->pgtable;
+		mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned,
+				   paddr_end, prot);
+	}
+
+	return (void __iomem *)(unsigned long)phys_addr;
+}
+
 phys_addr_t __virt_to_phys(unsigned long addr)
 {
 	if (mmu_enabled()) {
diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h
index e0a03b250d5b..be19f471c0fa 100644
--- a/lib/arm64/asm/io.h
+++ b/lib/arm64/asm/io.h
@@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 	return val;
 }
 
+#define ioremap ioremap
+static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
+{
+	return __ioremap(phys_addr, size);
+}
+
 #define virt_to_phys virt_to_phys
 static inline phys_addr_t virt_to_phys(const volatile void *x)
 {
diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h
index 72d75eafc882..72371b2d9fe3 100644
--- a/lib/arm64/asm/mmu.h
+++ b/lib/arm64/asm/mmu.h
@@ -8,6 +8,7 @@
 #include <asm/barrier.h>
 
 #define PMD_SECT_UNCACHED	PMD_ATTRINDX(MT_DEVICE_nGnRE)
+#define PTE_UNCACHED		PTE_ATTRINDX(MT_DEVICE_nGnRE)
 #define PTE_WBWA		PTE_ATTRINDX(MT_NORMAL)
 
 static inline void flush_tlb_all(void)
diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h
index ae4484b22114..d0fac6ea563d 100644
--- a/lib/arm64/asm/page.h
+++ b/lib/arm64/asm/page.h
@@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
 extern phys_addr_t __virt_to_phys(unsigned long addr);
 extern unsigned long __phys_to_virt(phys_addr_t addr);
 
+extern void *__ioremap(phys_addr_t phys_addr, size_t size);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASMARM64_PAGE_H_ */
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
                   ` (3 preceding siblings ...)
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-13 14:27   ` Nikos Nikoleris
  2021-04-15 15:48   ` Alexandru Elisei
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate " Andrew Jones
                   ` (2 subsequent siblings)
  7 siblings, 2 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

Rather than making too many assumptions about the memory layout
in mmu code, just set up the page tables per the memory regions
(which means putting all the memory layout assumptions in setup).
To ensure we get the right default flags set we need to split the
primary region into two regions for code and data.

We still only expect the primary regions to be present, but the
next patch will remove that assumption too.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 lib/arm/asm/setup.h |  1 +
 lib/arm/mmu.c       | 26 +++++++++++++++-----------
 lib/arm/setup.c     | 22 ++++++++++++++--------
 3 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
index c8afb2493f8d..210c14f818fb 100644
--- a/lib/arm/asm/setup.h
+++ b/lib/arm/asm/setup.h
@@ -15,6 +15,7 @@ extern int nr_cpus;
 
 #define MR_F_PRIMARY		(1U << 0)
 #define MR_F_IO			(1U << 1)
+#define MR_F_CODE		(1U << 2)
 #define MR_F_UNKNOWN		(1U << 31)
 
 struct mem_region {
diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
index a7b7ae51afe3..edd2b9da809b 100644
--- a/lib/arm/mmu.c
+++ b/lib/arm/mmu.c
@@ -20,8 +20,6 @@
 
 #include <linux/compiler.h>
 
-extern unsigned long etext;
-
 #define MMU_MAX_PERSISTENT_MAPS 64
 
 struct mmu_persistent_map {
@@ -208,7 +206,7 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
 
 void *setup_mmu(phys_addr_t phys_end)
 {
-	uintptr_t code_end = (uintptr_t)&etext;
+	struct mem_region *r;
 
 	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
 	if (phys_end > (3ul << 30))
@@ -223,14 +221,20 @@ void *setup_mmu(phys_addr_t phys_end)
 
 	mmu_idmap = alloc_page();
 
-	/* armv8 requires code shared between EL1 and EL0 to be read-only */
-	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
-		PHYS_OFFSET, code_end,
-		__pgprot(PTE_WBWA | PTE_RDONLY | PTE_USER));
-
-	mmu_set_range_ptes(mmu_idmap, code_end,
-		code_end, phys_end,
-		__pgprot(PTE_WBWA | PTE_USER));
+	for (r = mem_regions; r->end; ++r) {
+		if (r->flags & MR_F_IO) {
+			continue;
+		} else if (r->flags & MR_F_CODE) {
+			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
+			/* armv8 requires code shared between EL1 and EL0 to be read-only */
+			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
+					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
+		} else {
+			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
+			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
+					   __pgprot(PTE_WBWA | PTE_USER));
+		}
+	}
 
 	mmu_set_persistent_maps(mmu_idmap);
 
diff --git a/lib/arm/setup.c b/lib/arm/setup.c
index 9c16f6004e9f..9da5d24b0be9 100644
--- a/lib/arm/setup.c
+++ b/lib/arm/setup.c
@@ -31,6 +31,7 @@
 #define NR_INITIAL_MEM_REGIONS 16
 
 extern unsigned long stacktop;
+extern unsigned long etext;
 
 struct timer_state __timer_state;
 
@@ -88,10 +89,12 @@ unsigned int mem_region_get_flags(phys_addr_t paddr)
 
 static void mem_init(phys_addr_t freemem_start)
 {
+	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
 	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
-	struct mem_region primary, mem = {
+	struct mem_region mem = {
 		.start = (phys_addr_t)-1,
 	};
+	struct mem_region *primary = NULL;
 	phys_addr_t base, top;
 	int nr_regs, nr_io = 0, i;
 
@@ -110,8 +113,6 @@ static void mem_init(phys_addr_t freemem_start)
 	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
 	assert(nr_regs > 0);
 
-	primary = (struct mem_region){ 0 };
-
 	for (i = 0; i < nr_regs; ++i) {
 		struct mem_region *r = &mem_regions[nr_io + i];
 
@@ -123,7 +124,7 @@ static void mem_init(phys_addr_t freemem_start)
 		 */
 		if (freemem_start >= r->start && freemem_start < r->end) {
 			r->flags |= MR_F_PRIMARY;
-			primary = *r;
+			primary = r;
 		}
 
 		/*
@@ -135,13 +136,18 @@ static void mem_init(phys_addr_t freemem_start)
 		if (r->end > mem.end)
 			mem.end = r->end;
 	}
-	assert(primary.end != 0);
+	assert(primary);
 	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
 
-	__phys_offset = primary.start;	/* PHYS_OFFSET */
-	__phys_end = primary.end;	/* PHYS_END */
+	__phys_offset = primary->start;	/* PHYS_OFFSET */
+	__phys_end = primary->end;	/* PHYS_END */
+
+	/* Split the primary region into two regions; code and data */
+	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
+	mem_regions[nr_io + i] = mem;
+	primary->end = code_end, primary->flags |= MR_F_CODE;
 
-	phys_alloc_init(freemem_start, primary.end - freemem_start);
+	phys_alloc_init(freemem_start, __phys_end - freemem_start);
 	phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES);
 
 	phys_alloc_get_unused(&base, &top);
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
                   ` (4 preceding siblings ...)
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-13 16:41   ` Nikos Nikoleris
  2021-04-15 16:59   ` Alexandru Elisei
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init Andrew Jones
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc Andrew Jones
  7 siblings, 2 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

Keep as much memory layout assumptions as possible in init::start
and a single setup function. This prepares us for calling setup()
from different start functions which have been linked with different
linker scripts. To do this, stacktop is only referenced from
init::start, making freemem_start a parameter to setup(). We also
split mem_init() into three parts, one that populates the mem regions
per the DT, one that populates the mem regions per assumptions,
and one that does the mem init. The concept of a primary region
is dropped, but we add a sanity check for the absence of memory
holes, because we don't know how to deal with them yet.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 arm/cstart.S        |   4 +-
 arm/cstart64.S      |   2 +
 arm/flat.lds        |  23 ++++++
 lib/arm/asm/setup.h |   8 +--
 lib/arm/mmu.c       |   2 -
 lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
 6 files changed, 123 insertions(+), 81 deletions(-)

diff --git a/arm/cstart.S b/arm/cstart.S
index 731f841695ce..14444124c43f 100644
--- a/arm/cstart.S
+++ b/arm/cstart.S
@@ -80,7 +80,9 @@ start:
 
 	/* complete setup */
 	pop	{r0-r1}
-	bl	setup
+	mov	r1, #0
+	ldr	r2, =stacktop		@ r1,r2 is the base of free memory
+	bl	setup			@ r0 is the addr of the dtb
 
 	/* run the test */
 	ldr	r0, =__argc
diff --git a/arm/cstart64.S b/arm/cstart64.S
index add60a2b4e74..434723d4b45d 100644
--- a/arm/cstart64.S
+++ b/arm/cstart64.S
@@ -94,6 +94,8 @@ start:
 
 	/* complete setup */
 	mov	x0, x4				// restore the addr of the dtb
+	adrp	x1, stacktop
+	add	x1, x1, :lo12:stacktop		// x1 is the base of free memory
 	bl	setup
 
 	/* run the test */
diff --git a/arm/flat.lds b/arm/flat.lds
index 6ed377c0eaa0..6fb459efb815 100644
--- a/arm/flat.lds
+++ b/arm/flat.lds
@@ -1,3 +1,26 @@
+/*
+ * init::start will pass stacktop to setup() as the base of free memory.
+ * setup() will then move the FDT and initrd to that base before calling
+ * mem_init(). With those movements and this linker script, we'll end up
+ * having the following memory layout:
+ *
+ *    +----------------------+   <-- top of physical memory
+ *    |                      |
+ *    ~                      ~
+ *    |                      |
+ *    +----------------------+   <-- top of initrd
+ *    |                      |
+ *    +----------------------+   <-- top of FDT
+ *    |                      |
+ *    +----------------------+   <-- top of cpu0's stack
+ *    |                      |
+ *    +----------------------+   <-- top of text/data/bss sections
+ *    |                      |
+ *    |                      |
+ *    +----------------------+   <-- load address
+ *    |                      |
+ *    +----------------------+   <-- physical address 0x0
+ */
 
 SECTIONS
 {
diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
index 210c14f818fb..f0e70b119fb0 100644
--- a/lib/arm/asm/setup.h
+++ b/lib/arm/asm/setup.h
@@ -13,9 +13,8 @@
 extern u64 cpus[NR_CPUS];	/* per-cpu IDs (MPIDRs) */
 extern int nr_cpus;
 
-#define MR_F_PRIMARY		(1U << 0)
-#define MR_F_IO			(1U << 1)
-#define MR_F_CODE		(1U << 2)
+#define MR_F_IO			(1U << 0)
+#define MR_F_CODE		(1U << 1)
 #define MR_F_UNKNOWN		(1U << 31)
 
 struct mem_region {
@@ -26,6 +25,7 @@ struct mem_region {
 extern struct mem_region *mem_regions;
 extern phys_addr_t __phys_offset, __phys_end;
 
+extern struct mem_region *mem_region_find(phys_addr_t paddr);
 extern unsigned int mem_region_get_flags(phys_addr_t paddr);
 
 #define PHYS_OFFSET		(__phys_offset)
@@ -35,6 +35,6 @@ extern unsigned int mem_region_get_flags(phys_addr_t paddr);
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 #define SMP_CACHE_BYTES		L1_CACHE_BYTES
 
-void setup(const void *fdt);
+void setup(const void *fdt, phys_addr_t freemem_start);
 
 #endif /* _ASMARM_SETUP_H_ */
diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
index edd2b9da809b..7cff22a12e86 100644
--- a/lib/arm/mmu.c
+++ b/lib/arm/mmu.c
@@ -225,12 +225,10 @@ void *setup_mmu(phys_addr_t phys_end)
 		if (r->flags & MR_F_IO) {
 			continue;
 		} else if (r->flags & MR_F_CODE) {
-			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
 			/* armv8 requires code shared between EL1 and EL0 to be read-only */
 			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
 					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
 		} else {
-			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
 			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
 					   __pgprot(PTE_WBWA | PTE_USER));
 		}
diff --git a/lib/arm/setup.c b/lib/arm/setup.c
index 9da5d24b0be9..5cda2d919d2b 100644
--- a/lib/arm/setup.c
+++ b/lib/arm/setup.c
@@ -28,9 +28,9 @@
 
 #include "io.h"
 
-#define NR_INITIAL_MEM_REGIONS 16
+#define MAX_DT_MEM_REGIONS	16
+#define NR_EXTRA_MEM_REGIONS	16
 
-extern unsigned long stacktop;
 extern unsigned long etext;
 
 struct timer_state __timer_state;
@@ -41,7 +41,7 @@ u32 initrd_size;
 u64 cpus[NR_CPUS] = { [0 ... NR_CPUS-1] = (u64)~0 };
 int nr_cpus;
 
-static struct mem_region __initial_mem_regions[NR_INITIAL_MEM_REGIONS + 1];
+static struct mem_region __initial_mem_regions[MAX_DT_MEM_REGIONS + NR_EXTRA_MEM_REGIONS];
 struct mem_region *mem_regions = __initial_mem_regions;
 phys_addr_t __phys_offset, __phys_end;
 
@@ -75,28 +75,62 @@ static void cpu_init(void)
 	set_cpu_online(0, true);
 }
 
-unsigned int mem_region_get_flags(phys_addr_t paddr)
+static int mem_regions_next_index(void)
 {
 	struct mem_region *r;
+	int n;
 
-	for (r = mem_regions; r->end; ++r) {
-		if (paddr >= r->start && paddr < r->end)
-			return r->flags;
+	for (r = mem_regions, n = 0; r->end; ++r, ++n)
+		;
+	return n;
+}
+
+static void mem_regions_get_dt_regions(void)
+{
+	struct dt_pbus_reg regs[MAX_DT_MEM_REGIONS];
+	int nr_regs, i, n;
+
+	nr_regs = dt_get_memory_params(regs, MAX_DT_MEM_REGIONS);
+	assert(nr_regs > 0);
+
+	n = mem_regions_next_index();
+
+	for (i = 0; i < nr_regs; ++i) {
+		struct mem_region *r = &mem_regions[n + i];
+		r->start = regs[i].addr;
+		r->end = regs[i].addr + regs[i].size;
 	}
+}
+
+struct mem_region *mem_region_find(phys_addr_t paddr)
+{
+	struct mem_region *r;
+
+	for (r = mem_regions; r->end; ++r)
+		if (paddr >= r->start && paddr < r->end)
+			return r;
+	return NULL;
+}
 
-	return MR_F_UNKNOWN;
+unsigned int mem_region_get_flags(phys_addr_t paddr)
+{
+	struct mem_region *r = mem_region_find(paddr);
+	return r ? r->flags : MR_F_UNKNOWN;
 }
 
-static void mem_init(phys_addr_t freemem_start)
+static void mem_regions_add_assumed(void)
 {
 	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
-	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
-	struct mem_region mem = {
-		.start = (phys_addr_t)-1,
-	};
-	struct mem_region *primary = NULL;
-	phys_addr_t base, top;
-	int nr_regs, nr_io = 0, i;
+	int n = mem_regions_next_index();
+	struct mem_region mem = {0}, *r;
+
+	r = mem_region_find(code_end - 1);
+	assert(r);
+
+	/* Split the region with the code into two regions; code and data */
+	mem.start = code_end, mem.end = r->end;
+	mem_regions[n++] = mem;
+	r->end = code_end, r->flags = MR_F_CODE;
 
 	/*
 	 * mach-virt I/O regions:
@@ -104,50 +138,47 @@ static void mem_init(phys_addr_t freemem_start)
 	 *   - 512M at 256G (arm64, arm uses highmem=off)
 	 *   - 512G at 512G (arm64, arm uses highmem=off)
 	 */
-	mem_regions[nr_io++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
+	mem_regions[n++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
 #ifdef __aarch64__
-	mem_regions[nr_io++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
-	mem_regions[nr_io++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
+	mem_regions[n++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
+	mem_regions[n++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
 #endif
+}
 
-	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
-	assert(nr_regs > 0);
-
-	for (i = 0; i < nr_regs; ++i) {
-		struct mem_region *r = &mem_regions[nr_io + i];
+static void mem_init(phys_addr_t freemem_start)
+{
+	phys_addr_t base, top;
+	struct mem_region *freemem, *r, mem = {
+		.start = (phys_addr_t)-1,
+	};
 
-		r->start = regs[i].addr;
-		r->end = regs[i].addr + regs[i].size;
+	freemem = mem_region_find(freemem_start);
+	assert(freemem && !(freemem->flags & (MR_F_IO | MR_F_CODE)));
 
-		/*
-		 * pick the region we're in for our primary region
-		 */
-		if (freemem_start >= r->start && freemem_start < r->end) {
-			r->flags |= MR_F_PRIMARY;
-			primary = r;
+	for (r = mem_regions; r->end; ++r) {
+		assert(!(r->start & ~PHYS_MASK) && !((r->end - 1) & ~PHYS_MASK));
+		if (!(r->flags & MR_F_IO)) {
+			if (r->start < mem.start)
+				mem.start = r->start;
+			if (r->end > mem.end)
+				mem.end = r->end;
 		}
-
-		/*
-		 * set the lowest and highest addresses found,
-		 * ignoring potential gaps
-		 */
-		if (r->start < mem.start)
-			mem.start = r->start;
-		if (r->end > mem.end)
-			mem.end = r->end;
 	}
-	assert(primary);
-	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
+	assert(mem.end);
+
+	/* Check for holes */
+	r = mem_region_find(mem.start);
+	while (r && r->end != mem.end)
+		r = mem_region_find(r->end);
+	assert(r);
 
-	__phys_offset = primary->start;	/* PHYS_OFFSET */
-	__phys_end = primary->end;	/* PHYS_END */
+	/* Ensure our selected freemem region is somewhere in our full range */
+	assert(freemem_start >= mem.start && freemem->end <= mem.end);
 
-	/* Split the primary region into two regions; code and data */
-	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
-	mem_regions[nr_io + i] = mem;
-	primary->end = code_end, primary->flags |= MR_F_CODE;
+	__phys_offset = mem.start;	/* PHYS_OFFSET */
+	__phys_end = mem.end;		/* PHYS_END */
 
-	phys_alloc_init(freemem_start, __phys_end - freemem_start);
+	phys_alloc_init(freemem_start, freemem->end - freemem_start);
 	phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES);
 
 	phys_alloc_get_unused(&base, &top);
@@ -197,35 +228,17 @@ static void timer_save_state(void)
 	__timer_state.vtimer.irq_flags = fdt32_to_cpu(data[8]);
 }
 
-void setup(const void *fdt)
+void setup(const void *fdt, phys_addr_t freemem_start)
 {
-	void *freemem = &stacktop;
+	void *freemem;
 	const char *bootargs, *tmp;
 	u32 fdt_size;
 	int ret;
 
-	/*
-	 * Before calling mem_init we need to move the fdt and initrd
-	 * to safe locations. We move them to construct the memory
-	 * map illustrated below:
-	 *
-	 *    +----------------------+   <-- top of physical memory
-	 *    |                      |
-	 *    ~                      ~
-	 *    |                      |
-	 *    +----------------------+   <-- top of initrd
-	 *    |                      |
-	 *    +----------------------+   <-- top of FDT
-	 *    |                      |
-	 *    +----------------------+   <-- top of cpu0's stack
-	 *    |                      |
-	 *    +----------------------+   <-- top of text/data/bss sections,
-	 *    |                      |       see arm/flat.lds
-	 *    |                      |
-	 *    +----------------------+   <-- load address
-	 *    |                      |
-	 *    +----------------------+
-	 */
+	assert(sizeof(long) == 8 || freemem_start < (3ul << 30));
+	freemem = (void *)(unsigned long)freemem_start;
+
+	/* Move the FDT to the base of free memory */
 	fdt_size = fdt_totalsize(fdt);
 	ret = fdt_move(fdt, freemem, fdt_size);
 	assert(ret == 0);
@@ -233,6 +246,7 @@ void setup(const void *fdt)
 	assert(ret == 0);
 	freemem += fdt_size;
 
+	/* Move the initrd to the top of the FDT */
 	ret = dt_get_initrd(&tmp, &initrd_size);
 	assert(ret == 0 || ret == -FDT_ERR_NOTFOUND);
 	if (ret == 0) {
@@ -241,7 +255,10 @@ void setup(const void *fdt)
 		freemem += initrd_size;
 	}
 
+	mem_regions_get_dt_regions();
+	mem_regions_add_assumed();
 	mem_init(PAGE_ALIGN((unsigned long)freemem));
+
 	cpu_init();
 
 	/* cpu_init must be called before thread_info_init */
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
                   ` (5 preceding siblings ...)
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate " Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-13 16:42   ` Nikos Nikoleris
  2021-04-15 17:03   ` Alexandru Elisei
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc Andrew Jones
  7 siblings, 2 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

If there's no virtio-console / chr-testdev configured, then the user
probably didn't want them. Just silently fail rather than stating
the obvious.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 lib/chr-testdev.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/lib/chr-testdev.c b/lib/chr-testdev.c
index 6890f63c8b29..b3c641a833fe 100644
--- a/lib/chr-testdev.c
+++ b/lib/chr-testdev.c
@@ -54,11 +54,8 @@ void chr_testdev_init(void)
 	int ret;
 
 	vcon = virtio_bind(VIRTIO_ID_CONSOLE);
-	if (vcon == NULL) {
-		printf("%s: %s: can't find a virtio-console\n",
-				__func__, TESTDEV_NAME);
+	if (vcon == NULL)
 		return;
-	}
 
 	ret = vcon->config->find_vqs(vcon, 2, vqs, NULL, io_names);
 	if (ret < 0) {
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc
  2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
                   ` (6 preceding siblings ...)
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init Andrew Jones
@ 2021-04-07 18:59 ` Andrew Jones
  2021-04-09 17:46   ` Nikos Nikoleris
  2021-04-19 16:33   ` Alexandru Elisei
  7 siblings, 2 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-07 18:59 UTC (permalink / raw)
  To: kvm; +Cc: alexandru.elisei, nikos.nikoleris, andre.przywara, eric.auger

The method can also be smc and it will be when running on bare metal.

Signed-off-by: Andrew Jones <drjones@redhat.com>
---
 arm/selftest.c     | 34 +++++++---------------------------
 lib/arm/asm/psci.h |  9 +++++++--
 lib/arm/psci.c     | 17 +++++++++++++++--
 lib/arm/setup.c    | 22 ++++++++++++++++++++++
 4 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/arm/selftest.c b/arm/selftest.c
index 4495b161cdd5..9f459ed3d571 100644
--- a/arm/selftest.c
+++ b/arm/selftest.c
@@ -400,33 +400,13 @@ static void check_vectors(void *arg __unused)
 	exit(report_summary());
 }
 
-static bool psci_check(void)
+static void psci_print(void)
 {
-	const struct fdt_property *method;
-	int node, len, ver;
-
-	node = fdt_node_offset_by_compatible(dt_fdt(), -1, "arm,psci-0.2");
-	if (node < 0) {
-		printf("PSCI v0.2 compatibility required\n");
-		return false;
-	}
-
-	method = fdt_get_property(dt_fdt(), node, "method", &len);
-	if (method == NULL) {
-		printf("bad psci device tree node\n");
-		return false;
-	}
-
-	if (len < 4 || strcmp(method->data, "hvc") != 0) {
-		printf("psci method must be hvc\n");
-		return false;
-	}
-
-	ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
-	printf("PSCI version %d.%d\n", PSCI_VERSION_MAJOR(ver),
-				       PSCI_VERSION_MINOR(ver));
-
-	return true;
+	int ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
+	report_info("PSCI version: %d.%d", PSCI_VERSION_MAJOR(ver),
+					  PSCI_VERSION_MINOR(ver));
+	report_info("PSCI method: %s", psci_invoke == psci_invoke_hvc ?
+				       "hvc" : "smc");
 }
 
 static void cpu_report(void *data __unused)
@@ -465,7 +445,7 @@ int main(int argc, char **argv)
 
 	} else if (strcmp(argv[1], "smp") == 0) {
 
-		report(psci_check(), "PSCI version");
+		psci_print();
 		on_cpus(cpu_report, NULL);
 		while (!cpumask_full(&ready))
 			cpu_relax();
diff --git a/lib/arm/asm/psci.h b/lib/arm/asm/psci.h
index 7b956bf5987d..e385ce27f5d1 100644
--- a/lib/arm/asm/psci.h
+++ b/lib/arm/asm/psci.h
@@ -3,8 +3,13 @@
 #include <libcflat.h>
 #include <linux/psci.h>
 
-extern int psci_invoke(unsigned long function_id, unsigned long arg0,
-		       unsigned long arg1, unsigned long arg2);
+typedef int (*psci_invoke_fn)(unsigned long function_id, unsigned long arg0,
+			      unsigned long arg1, unsigned long arg2);
+extern psci_invoke_fn psci_invoke;
+extern int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
+			   unsigned long arg1, unsigned long arg2);
+extern int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
+			   unsigned long arg1, unsigned long arg2);
 extern int psci_cpu_on(unsigned long cpuid, unsigned long entry_point);
 extern void psci_system_reset(void);
 extern int cpu_psci_cpu_boot(unsigned int cpu);
diff --git a/lib/arm/psci.c b/lib/arm/psci.c
index 936c83948b6a..46300f30822c 100644
--- a/lib/arm/psci.c
+++ b/lib/arm/psci.c
@@ -11,9 +11,11 @@
 #include <asm/page.h>
 #include <asm/smp.h>
 
+psci_invoke_fn psci_invoke;
+
 __attribute__((noinline))
-int psci_invoke(unsigned long function_id, unsigned long arg0,
-		unsigned long arg1, unsigned long arg2)
+int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
+		    unsigned long arg1, unsigned long arg2)
 {
 	asm volatile(
 		"hvc #0"
@@ -22,6 +24,17 @@ int psci_invoke(unsigned long function_id, unsigned long arg0,
 	return function_id;
 }
 
+__attribute__((noinline))
+int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
+		    unsigned long arg1, unsigned long arg2)
+{
+	asm volatile(
+		"smc #0"
+	: "+r" (function_id)
+	: "r" (arg0), "r" (arg1), "r" (arg2));
+	return function_id;
+}
+
 int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
 {
 #ifdef __arm__
diff --git a/lib/arm/setup.c b/lib/arm/setup.c
index 5cda2d919d2b..e595a9e5a167 100644
--- a/lib/arm/setup.c
+++ b/lib/arm/setup.c
@@ -25,6 +25,7 @@
 #include <asm/processor.h>
 #include <asm/smp.h>
 #include <asm/timer.h>
+#include <asm/psci.h>
 
 #include "io.h"
 
@@ -55,6 +56,26 @@ int mpidr_to_cpu(uint64_t mpidr)
 	return -1;
 }
 
+static void psci_set_conduit(void)
+{
+	const void *fdt = dt_fdt();
+	const struct fdt_property *method;
+	int node, len;
+
+	node = fdt_node_offset_by_compatible(fdt, -1, "arm,psci-0.2");
+	assert_msg(node >= 0, "PSCI v0.2 compatibility required");
+
+	method = fdt_get_property(fdt, node, "method", &len);
+	assert(method != NULL && len == 4);
+
+	if (strcmp(method->data, "hvc") == 0)
+		psci_invoke = psci_invoke_hvc;
+	else if (strcmp(method->data, "smc") == 0)
+		psci_invoke = psci_invoke_smc;
+	else
+		assert_msg(false, "Unknown PSCI conduit: %s", method->data);
+}
+
 static void cpu_set(int fdtnode __unused, u64 regval, void *info __unused)
 {
 	int cpu = nr_cpus++;
@@ -259,6 +280,7 @@ void setup(const void *fdt, phys_addr_t freemem_start)
 	mem_regions_add_assumed();
 	mem_init(PAGE_ALIGN((unsigned long)freemem));
 
+	psci_set_conduit();
 	cpu_init();
 
 	/* cpu_init must be called before thread_info_init */
-- 
2.26.3


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler Andrew Jones
@ 2021-04-09 17:18   ` Nikos Nikoleris
  2021-04-09 17:28     ` Andrew Jones
  2021-04-13 16:34   ` Alexandru Elisei
  1 sibling, 1 reply; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-09 17:18 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> Move secondary_entry helper functions out of .init and into .text,
> since secondary_entry isn't run at "init" time.
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>   arm/cstart.S   | 62 +++++++++++++++++++++++++++-----------------------
>   arm/cstart64.S | 22 +++++++++++-------
>   2 files changed, 48 insertions(+), 36 deletions(-)
> 
> diff --git a/arm/cstart.S b/arm/cstart.S
> index d88a98362940..653ab1e8a141 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -96,32 +96,7 @@ start:
>   	bl	exit
>   	b	halt
>   
> -
> -.macro set_mode_stack mode, stack
> -	add	\stack, #S_FRAME_SIZE
> -	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> -	isb
> -	mov	sp, \stack
> -.endm
> -
> -exceptions_init:
> -	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> -	bic	r2, #CR_V		@ SCTLR.V := 0
> -	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> -	ldr	r2, =vector_table
> -	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> -
> -	mrs	r2, cpsr
> -
> -	/* first frame reserved for svc mode */
> -	set_mode_stack	UND_MODE, r0
> -	set_mode_stack	ABT_MODE, r0
> -	set_mode_stack	IRQ_MODE, r0
> -	set_mode_stack	FIQ_MODE, r0
> -
> -	msr	cpsr_cxsf, r2		@ back to svc mode
> -	isb
> -	mov	pc, lr
> +.text
>   
>   enable_vfp:
>   	/* Enable full access to CP10 and CP11: */
> @@ -133,8 +108,6 @@ enable_vfp:
>   	vmsr	fpexc, r0
>   	mov	pc, lr
>   
> -.text
> -
>   .global get_mmu_off
>   get_mmu_off:
>   	ldr	r0, =auxinfo
> @@ -235,6 +208,39 @@ asm_mmu_disable:
>   
>   	mov     pc, lr
>   
> +/*
> + * Vectors
> + */
> +
> +.macro set_mode_stack mode, stack
> +	add	\stack, #S_FRAME_SIZE
> +	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> +	isb
> +	mov	sp, \stack
> +.endm
> +
> +exceptions_init:
> +	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> +	bic	r2, #CR_V		@ SCTLR.V := 0
> +	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> +	ldr	r2, =vector_table
> +	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> +
> +	mrs	r2, cpsr
> +
> +	/*
> +	 * Input r0 is the stack top, which is the exception stacks base

Minor, feel free to ignore - wouldn't it be better to put this comment 
at the start of this routine to inform the caller?

I am not sure about the practical implications of having an .init 
section but in any case, moving secondary_entry helper functions to 
.text seems sensible.

Reviewed-by Nikos Nikoleris <nikos.nikoleris@arm.com>

Thanks,

Nikos

> +	 * The first frame is reserved for svc mode
> +	 */
> +	set_mode_stack	UND_MODE, r0
> +	set_mode_stack	ABT_MODE, r0
> +	set_mode_stack	IRQ_MODE, r0
> +	set_mode_stack	FIQ_MODE, r0
> +
> +	msr	cpsr_cxsf, r2		@ back to svc mode
> +	isb
> +	mov	pc, lr
> +
>   /*
>    * Vector stubs
>    * Simplified version of the Linux kernel implementation
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index 0a85338bcdae..d39cf4dfb99c 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -89,10 +89,12 @@ start:
>   	msr	cpacr_el1, x4
>   
>   	/* set up exception handling */
> +	mov	x4, x0				// x0 is the addr of the dtb
>   	bl	exceptions_init
>   
>   	/* complete setup */
> -	bl	setup				// x0 is the addr of the dtb
> +	mov	x0, x4				// restore the addr of the dtb
> +	bl	setup
>   	bl	get_mmu_off
>   	cbnz	x0, 1f
>   	bl	setup_vm
> @@ -109,13 +111,6 @@ start:
>   	bl	exit
>   	b	halt
>   
> -exceptions_init:
> -	adrp	x4, vector_table
> -	add	x4, x4, :lo12:vector_table
> -	msr	vbar_el1, x4
> -	isb
> -	ret
> -
>   .text
>   
>   .globl get_mmu_off
> @@ -251,6 +246,17 @@ asm_mmu_disable:
>   
>   /*
>    * Vectors
> + */
> +
> +exceptions_init:
> +	adrp	x0, vector_table
> +	add	x0, x0, :lo12:vector_table
> +	msr	vbar_el1, x0
> +	isb
> +	ret
> +
> +/*
> + * Vector stubs
>    * Adapted from arch/arm64/kernel/entry.S
>    */
>   .macro vector_stub, name, vec
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup Andrew Jones
@ 2021-04-09 17:24   ` Nikos Nikoleris
  2021-04-14 15:19   ` Alexandru Elisei
  1 sibling, 0 replies; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-09 17:24 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> Consolidate our setup calls to reduce the amount we need to do from
> init::start. Also remove a couple of pointless comments from setup().
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>

No need for this to be in assembly.

Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

Thanks,

Nikos

> ---
>   arm/cstart.S    | 6 ------
>   arm/cstart64.S  | 5 -----
>   lib/arm/setup.c | 7 +++++--
>   3 files changed, 5 insertions(+), 13 deletions(-)
> 
> diff --git a/arm/cstart.S b/arm/cstart.S
> index 653ab1e8a141..731f841695ce 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -81,12 +81,7 @@ start:
>   	/* complete setup */
>   	pop	{r0-r1}
>   	bl	setup
> -	bl	get_mmu_off
> -	cmp	r0, #0
> -	bne	1f
> -	bl	setup_vm
>   
> -1:
>   	/* run the test */
>   	ldr	r0, =__argc
>   	ldr	r0, [r0]
> @@ -108,7 +103,6 @@ enable_vfp:
>   	vmsr	fpexc, r0
>   	mov	pc, lr
>   
> -.global get_mmu_off
>   get_mmu_off:
>   	ldr	r0, =auxinfo
>   	ldr	r0, [r0, #4]
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index d39cf4dfb99c..add60a2b4e74 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -95,11 +95,7 @@ start:
>   	/* complete setup */
>   	mov	x0, x4				// restore the addr of the dtb
>   	bl	setup
> -	bl	get_mmu_off
> -	cbnz	x0, 1f
> -	bl	setup_vm
>   
> -1:
>   	/* run the test */
>   	adrp	x0, __argc
>   	ldr	w0, [x0, :lo12:__argc]
> @@ -113,7 +109,6 @@ start:
>   
>   .text
>   
> -.globl get_mmu_off
>   get_mmu_off:
>   	adrp	x0, auxinfo
>   	ldr	x0, [x0, :lo12:auxinfo + 8]
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 751ba980000a..9c16f6004e9f 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -16,6 +16,8 @@
>   #include <alloc.h>
>   #include <alloc_phys.h>
>   #include <alloc_page.h>
> +#include <vmalloc.h>
> +#include <auxinfo.h>
>   #include <argv.h>
>   #include <asm/thread_info.h>
>   #include <asm/setup.h>
> @@ -233,7 +235,6 @@ void setup(const void *fdt)
>   		freemem += initrd_size;
>   	}
>   
> -	/* call init functions */
>   	mem_init(PAGE_ALIGN((unsigned long)freemem));
>   	cpu_init();
>   
> @@ -243,7 +244,6 @@ void setup(const void *fdt)
>   	/* mem_init must be called before io_init */
>   	io_init();
>   
> -	/* finish setup */
>   	timer_save_state();
>   
>   	ret = dt_get_bootargs(&bootargs);
> @@ -256,4 +256,7 @@ void setup(const void *fdt)
>   		memcpy(env, initrd, initrd_size);
>   		setup_env(env, initrd_size);
>   	}
> +
> +	if (!(auxinfo.flags & AUXINFO_MMU_OFF))
> +		setup_vm();
>   }
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler
  2021-04-09 17:18   ` Nikos Nikoleris
@ 2021-04-09 17:28     ` Andrew Jones
  0 siblings, 0 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-09 17:28 UTC (permalink / raw)
  To: Nikos Nikoleris; +Cc: kvm, alexandru.elisei, andre.przywara, eric.auger

On Fri, Apr 09, 2021 at 06:18:05PM +0100, Nikos Nikoleris wrote:
> On 07/04/2021 19:59, Andrew Jones wrote:
> > +exceptions_init:
> > +	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> > +	bic	r2, #CR_V		@ SCTLR.V := 0
> > +	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> > +	ldr	r2, =vector_table
> > +	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> > +
> > +	mrs	r2, cpsr
> > +
> > +	/*
> > +	 * Input r0 is the stack top, which is the exception stacks base
> 
> Minor, feel free to ignore - wouldn't it be better to put this comment at
> the start of this routine to inform the caller?

Yes, that's a good suggestion. Will do for v2.

> 
> I am not sure about the practical implications of having an .init section
> but in any case, moving secondary_entry helper functions to .text seems
> sensible.
> 
> Reviewed-by Nikos Nikoleris <nikos.nikoleris@arm.com>

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc Andrew Jones
@ 2021-04-09 17:46   ` Nikos Nikoleris
  2021-04-14  9:06     ` Andrew Jones
  2021-04-19 16:33   ` Alexandru Elisei
  1 sibling, 1 reply; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-09 17:46 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> The method can also be smc and it will be when running on bare metal.
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>   arm/selftest.c     | 34 +++++++---------------------------
>   lib/arm/asm/psci.h |  9 +++++++--
>   lib/arm/psci.c     | 17 +++++++++++++++--
>   lib/arm/setup.c    | 22 ++++++++++++++++++++++
>   4 files changed, 51 insertions(+), 31 deletions(-)
> 
> diff --git a/arm/selftest.c b/arm/selftest.c
> index 4495b161cdd5..9f459ed3d571 100644
> --- a/arm/selftest.c
> +++ b/arm/selftest.c
> @@ -400,33 +400,13 @@ static void check_vectors(void *arg __unused)
>   	exit(report_summary());
>   }
>   
> -static bool psci_check(void)
> +static void psci_print(void)
>   {
> -	const struct fdt_property *method;
> -	int node, len, ver;
> -
> -	node = fdt_node_offset_by_compatible(dt_fdt(), -1, "arm,psci-0.2");
> -	if (node < 0) {
> -		printf("PSCI v0.2 compatibility required\n");
> -		return false;
> -	}
> -
> -	method = fdt_get_property(dt_fdt(), node, "method", &len);
> -	if (method == NULL) {
> -		printf("bad psci device tree node\n");
> -		return false;
> -	}
> -
> -	if (len < 4 || strcmp(method->data, "hvc") != 0) {
> -		printf("psci method must be hvc\n");
> -		return false;
> -	}
> -
> -	ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
> -	printf("PSCI version %d.%d\n", PSCI_VERSION_MAJOR(ver),
> -				       PSCI_VERSION_MINOR(ver));
> -
> -	return true;
> +	int ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
> +	report_info("PSCI version: %d.%d", PSCI_VERSION_MAJOR(ver),
> +					  PSCI_VERSION_MINOR(ver));
> +	report_info("PSCI method: %s", psci_invoke == psci_invoke_hvc ?
> +				       "hvc" : "smc");
>   }
>   
>   static void cpu_report(void *data __unused)
> @@ -465,7 +445,7 @@ int main(int argc, char **argv)
>   
>   	} else if (strcmp(argv[1], "smp") == 0) {
>   
> -		report(psci_check(), "PSCI version");
> +		psci_print();
>   		on_cpus(cpu_report, NULL);
>   		while (!cpumask_full(&ready))
>   			cpu_relax();
> diff --git a/lib/arm/asm/psci.h b/lib/arm/asm/psci.h
> index 7b956bf5987d..e385ce27f5d1 100644
> --- a/lib/arm/asm/psci.h
> +++ b/lib/arm/asm/psci.h
> @@ -3,8 +3,13 @@
>   #include <libcflat.h>
>   #include <linux/psci.h>
>   
> -extern int psci_invoke(unsigned long function_id, unsigned long arg0,
> -		       unsigned long arg1, unsigned long arg2);
> +typedef int (*psci_invoke_fn)(unsigned long function_id, unsigned long arg0,
> +			      unsigned long arg1, unsigned long arg2);
> +extern psci_invoke_fn psci_invoke;
> +extern int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
> +			   unsigned long arg1, unsigned long arg2);
> +extern int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
> +			   unsigned long arg1, unsigned long arg2);
>   extern int psci_cpu_on(unsigned long cpuid, unsigned long entry_point);
>   extern void psci_system_reset(void);
>   extern int cpu_psci_cpu_boot(unsigned int cpu);
> diff --git a/lib/arm/psci.c b/lib/arm/psci.c
> index 936c83948b6a..46300f30822c 100644
> --- a/lib/arm/psci.c
> +++ b/lib/arm/psci.c
> @@ -11,9 +11,11 @@
>   #include <asm/page.h>
>   #include <asm/smp.h>
>   
> +psci_invoke_fn psci_invoke;
> +
>   __attribute__((noinline))
> -int psci_invoke(unsigned long function_id, unsigned long arg0,
> -		unsigned long arg1, unsigned long arg2)
> +int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
> +		    unsigned long arg1, unsigned long arg2)
>   {
>   	asm volatile(
>   		"hvc #0"
> @@ -22,6 +24,17 @@ int psci_invoke(unsigned long function_id, unsigned long arg0,
>   	return function_id;
>   }
>   
> +__attribute__((noinline))

Is noinline necessary? We shouldn't be calling psci_invoke_smc and 
psci_invoke_hmc directly, it's unlikely that the compiler will have a 
chance to inline them. But I might be missing something here because I 
don't see why it was there in psci_invoke either.

Otherwise Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

Thanks,

Nikos

> +int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
> +		    unsigned long arg1, unsigned long arg2)
> +{
> +	asm volatile(
> +		"smc #0"
> +	: "+r" (function_id)
> +	: "r" (arg0), "r" (arg1), "r" (arg2));
> +	return function_id;
> +}
> +
>   int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
>   {
>   #ifdef __arm__
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 5cda2d919d2b..e595a9e5a167 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -25,6 +25,7 @@
>   #include <asm/processor.h>
>   #include <asm/smp.h>
>   #include <asm/timer.h>
> +#include <asm/psci.h>
>   
>   #include "io.h"
>   
> @@ -55,6 +56,26 @@ int mpidr_to_cpu(uint64_t mpidr)
>   	return -1;
>   }
>   
> +static void psci_set_conduit(void)
> +{
> +	const void *fdt = dt_fdt();
> +	const struct fdt_property *method;
> +	int node, len;
> +
> +	node = fdt_node_offset_by_compatible(fdt, -1, "arm,psci-0.2");
> +	assert_msg(node >= 0, "PSCI v0.2 compatibility required");
> +
> +	method = fdt_get_property(fdt, node, "method", &len);
> +	assert(method != NULL && len == 4);
> +
> +	if (strcmp(method->data, "hvc") == 0)
> +		psci_invoke = psci_invoke_hvc;
> +	else if (strcmp(method->data, "smc") == 0)
> +		psci_invoke = psci_invoke_smc;
> +	else
> +		assert_msg(false, "Unknown PSCI conduit: %s", method->data);
> +}
> +
>   static void cpu_set(int fdtnode __unused, u64 regval, void *info __unused)
>   {
>   	int cpu = nr_cpus++;
> @@ -259,6 +280,7 @@ void setup(const void *fdt, phys_addr_t freemem_start)
>   	mem_regions_add_assumed();
>   	mem_init(PAGE_ALIGN((unsigned long)freemem));
>   
> +	psci_set_conduit();
>   	cpu_init();
>   
>   	/* cpu_init must be called before thread_info_init */
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region Andrew Jones
@ 2021-04-13 14:06   ` Nikos Nikoleris
  2021-04-14 15:42   ` Alexandru Elisei
  1 sibling, 0 replies; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-13 14:06 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> By providing a proper ioremap function, we can just rely on devices
> calling it for each region they need (as they already do) instead of
> mapping a big assumed I/O range. The persistent maps weirdness allows
> us to call setup_vm after io_init. Why don't we just call setup_vm
> before io_init, I hear you ask? Well, that's because tests like sieve
> want to start with the MMU off and later call setup_vm, and all the
> while have working I/O. Some unit tests are just really demanding...
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>

That's a very nice improvement! I wonder if any of the current calls to 
ioremap are for ranges big enough to allow for a sect map. However, this 
would be a performance improvent and something we can look at at some 
point in the future.

Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

> ---
>   lib/arm/asm/io.h      |  6 ++++
>   lib/arm/asm/mmu-api.h |  1 +
>   lib/arm/asm/mmu.h     |  1 +
>   lib/arm/asm/page.h    |  2 ++
>   lib/arm/mmu.c         | 82 ++++++++++++++++++++++++++++++++++++++-----
>   lib/arm64/asm/io.h    |  6 ++++
>   lib/arm64/asm/mmu.h   |  1 +
>   lib/arm64/asm/page.h  |  2 ++
>   8 files changed, 93 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h
> index ba3b0b2412ad..e4caa6ff5d1e 100644
> --- a/lib/arm/asm/io.h
> +++ b/lib/arm/asm/io.h
> @@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr)
>   		     : "r" (val));
>   }
>   
> +#define ioremap ioremap
> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	return __ioremap(phys_addr, size);
> +}
> +
>   #define virt_to_phys virt_to_phys
>   static inline phys_addr_t virt_to_phys(const volatile void *x)
>   {
> diff --git a/lib/arm/asm/mmu-api.h b/lib/arm/asm/mmu-api.h
> index 05fc12b5afb8..b9a5a8f6b3c1 100644
> --- a/lib/arm/asm/mmu-api.h
> +++ b/lib/arm/asm/mmu-api.h
> @@ -17,6 +17,7 @@ extern void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>   extern void mmu_set_range_ptes(pgd_t *pgtable, uintptr_t virt_offset,
>   			       phys_addr_t phys_start, phys_addr_t phys_end,
>   			       pgprot_t prot);
> +extern void mmu_set_persistent_maps(pgd_t *pgtable);
>   extern pteval_t *mmu_get_pte(pgd_t *pgtable, uintptr_t vaddr);
>   extern void mmu_clear_user(pgd_t *pgtable, unsigned long vaddr);
>   #endif
> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
> index 122874b8aebe..d88a4f16df42 100644
> --- a/lib/arm/asm/mmu.h
> +++ b/lib/arm/asm/mmu.h
> @@ -12,6 +12,7 @@
>   #define PTE_SHARED		L_PTE_SHARED
>   #define PTE_AF			PTE_EXT_AF
>   #define PTE_WBWA		L_PTE_MT_WRITEALLOC
> +#define PTE_UNCACHED		L_PTE_MT_UNCACHED
>   
>   /* See B3.18.7 TLB maintenance operations */
>   
> diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h
> index 1fb5cd26ac66..8eb4a883808e 100644
> --- a/lib/arm/asm/page.h
> +++ b/lib/arm/asm/page.h
> @@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>   extern phys_addr_t __virt_to_phys(unsigned long addr);
>   extern unsigned long __phys_to_virt(phys_addr_t addr);
>   
> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
> +
>   #endif /* !__ASSEMBLY__ */
>   #endif /* _ASMARM_PAGE_H_ */
> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> index 15eef007f256..a7b7ae51afe3 100644
> --- a/lib/arm/mmu.c
> +++ b/lib/arm/mmu.c
> @@ -11,6 +11,7 @@
>   #include <asm/mmu.h>
>   #include <asm/setup.h>
>   #include <asm/page.h>
> +#include <asm/io.h>
>   
>   #include "alloc_page.h"
>   #include "vmalloc.h"
> @@ -21,6 +22,57 @@
>   
>   extern unsigned long etext;
>   
> +#define MMU_MAX_PERSISTENT_MAPS 64
> +
> +struct mmu_persistent_map {
> +	uintptr_t virt_offset;
> +	phys_addr_t phys_start;
> +	phys_addr_t phys_end;
> +	pgprot_t prot;
> +	bool sect;
> +};
> +
> +static struct mmu_persistent_map mmu_persistent_maps[MMU_MAX_PERSISTENT_MAPS];
> +
> +static void
> +mmu_set_persistent_range(uintptr_t virt_offset, phys_addr_t phys_start,
> +			 phys_addr_t phys_end, pgprot_t prot, bool sect)
> +{
> +	int i;
> +
> +	assert(phys_end);
> +
> +	for (i = 0; i < MMU_MAX_PERSISTENT_MAPS; ++i) {
> +		if (!mmu_persistent_maps[i].phys_end)
> +			break;
> +	}
> +	assert(i < MMU_MAX_PERSISTENT_MAPS);
> +
> +	mmu_persistent_maps[i] = (struct mmu_persistent_map){
> +		.virt_offset = virt_offset,
> +		.phys_start = phys_start,
> +		.phys_end = phys_end,
> +		.prot = prot,
> +		.sect = sect,
> +	};
> +}
> +
> +void mmu_set_persistent_maps(pgd_t *pgtable)
> +{
> +	struct mmu_persistent_map *map;
> +
> +	for (map = &mmu_persistent_maps[0]; map->phys_end; ++map) {
> +		if (map->sect)
> +			mmu_set_range_sect(pgtable, map->virt_offset,
> +					   map->phys_start, map->phys_end,
> +					   map->prot);
> +		else
> +			mmu_set_range_ptes(pgtable, map->virt_offset,
> +					   map->phys_start, map->phys_end,
> +					   map->prot);
> +	}
> +}
> +
>   pgd_t *mmu_idmap;
>   
>   /* CPU 0 starts with disabled MMU */
> @@ -157,7 +209,6 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>   void *setup_mmu(phys_addr_t phys_end)
>   {
>   	uintptr_t code_end = (uintptr_t)&etext;
> -	struct mem_region *r;
>   
>   	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
>   	if (phys_end > (3ul << 30))
> @@ -172,13 +223,6 @@ void *setup_mmu(phys_addr_t phys_end)
>   
>   	mmu_idmap = alloc_page();
>   
> -	for (r = mem_regions; r->end; ++r) {
> -		if (!(r->flags & MR_F_IO))
> -			continue;
> -		mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end,
> -				   __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER));
> -	}
> -
>   	/* armv8 requires code shared between EL1 and EL0 to be read-only */
>   	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
>   		PHYS_OFFSET, code_end,
> @@ -188,10 +232,32 @@ void *setup_mmu(phys_addr_t phys_end)
>   		code_end, phys_end,
>   		__pgprot(PTE_WBWA | PTE_USER));
>   
> +	mmu_set_persistent_maps(mmu_idmap);
> +
>   	mmu_enable(mmu_idmap);
>   	return mmu_idmap;
>   }
>   
> +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	phys_addr_t paddr_aligned = phys_addr & PAGE_MASK;
> +	phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size);
> +	pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER);
> +
> +	assert(sizeof(long) == 8 || !(phys_addr >> 32));
> +
> +	mmu_set_persistent_range(paddr_aligned, paddr_aligned, paddr_end,
> +				 prot, false);
> +
> +	if (mmu_enabled()) {
> +		pgd_t *pgtable = current_thread_info()->pgtable;
> +		mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned,
> +				   paddr_end, prot);
> +	}
> +
> +	return (void __iomem *)(unsigned long)phys_addr;
> +}
> +
>   phys_addr_t __virt_to_phys(unsigned long addr)
>   {
>   	if (mmu_enabled()) {
> diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h
> index e0a03b250d5b..be19f471c0fa 100644
> --- a/lib/arm64/asm/io.h
> +++ b/lib/arm64/asm/io.h
> @@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
>   	return val;
>   }
>   
> +#define ioremap ioremap
> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	return __ioremap(phys_addr, size);
> +}
> +
>   #define virt_to_phys virt_to_phys
>   static inline phys_addr_t virt_to_phys(const volatile void *x)
>   {
> diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h
> index 72d75eafc882..72371b2d9fe3 100644
> --- a/lib/arm64/asm/mmu.h
> +++ b/lib/arm64/asm/mmu.h
> @@ -8,6 +8,7 @@
>   #include <asm/barrier.h>
>   
>   #define PMD_SECT_UNCACHED	PMD_ATTRINDX(MT_DEVICE_nGnRE)
> +#define PTE_UNCACHED		PTE_ATTRINDX(MT_DEVICE_nGnRE)
>   #define PTE_WBWA		PTE_ATTRINDX(MT_NORMAL)
>   
>   static inline void flush_tlb_all(void)
> diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h
> index ae4484b22114..d0fac6ea563d 100644
> --- a/lib/arm64/asm/page.h
> +++ b/lib/arm64/asm/page.h
> @@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>   extern phys_addr_t __virt_to_phys(unsigned long addr);
>   extern unsigned long __phys_to_virt(phys_addr_t addr);
>   
> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
> +
>   #endif /* !__ASSEMBLY__ */
>   #endif /* _ASMARM64_PAGE_H_ */
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 3/8] pci-testdev: ioremap regions
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 3/8] pci-testdev: ioremap regions Andrew Jones
@ 2021-04-13 14:12   ` Nikos Nikoleris
  0 siblings, 0 replies; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-13 14:12 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> Don't assume the physical addresses used with PCI have already been
> identity mapped.
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>

This makes more sense now that I had a look at the next patch (4/8).

Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

> ---
>   lib/pci-host-generic.c | 5 ++---
>   lib/pci-host-generic.h | 4 ++--
>   lib/pci-testdev.c      | 4 ++++
>   3 files changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/pci-host-generic.c b/lib/pci-host-generic.c
> index 818150dc0a66..de93b8feac39 100644
> --- a/lib/pci-host-generic.c
> +++ b/lib/pci-host-generic.c
> @@ -122,7 +122,7 @@ static struct pci_host_bridge *pci_dt_probe(void)
>   		      sizeof(host->addr_space[0]) * nr_addr_spaces);
>   	assert(host != NULL);
>   
> -	host->start		= base.addr;
> +	host->start		= ioremap(base.addr, base.size);
>   	host->size		= base.size;
>   	host->bus		= bus;
>   	host->bus_max		= bus_max;
> @@ -279,8 +279,7 @@ phys_addr_t pci_host_bridge_get_paddr(u64 pci_addr)
>   
>   static void __iomem *pci_get_dev_conf(struct pci_host_bridge *host, int devfn)
>   {
> -	return (void __iomem *)(unsigned long)
> -		host->start + (devfn << PCI_ECAM_DEVFN_SHIFT);
> +	return (void __iomem *)host->start + (devfn << PCI_ECAM_DEVFN_SHIFT);
>   }
>   
>   u8 pci_config_readb(pcidevaddr_t dev, u8 off)
> diff --git a/lib/pci-host-generic.h b/lib/pci-host-generic.h
> index fd30e7c74ed8..0ffe6380ec8f 100644
> --- a/lib/pci-host-generic.h
> +++ b/lib/pci-host-generic.h
> @@ -18,8 +18,8 @@ struct pci_addr_space {
>   };
>   
>   struct pci_host_bridge {
> -	phys_addr_t		start;
> -	phys_addr_t		size;
> +	void __iomem		*start;
> +	size_t			size;
>   	int			bus;
>   	int			bus_max;
>   	int			nr_addr_spaces;
> diff --git a/lib/pci-testdev.c b/lib/pci-testdev.c
> index 039bb44781c1..4f2e5663b2d6 100644
> --- a/lib/pci-testdev.c
> +++ b/lib/pci-testdev.c
> @@ -185,7 +185,11 @@ int pci_testdev(void)
>   	mem = ioremap(addr, PAGE_SIZE);
>   
>   	addr = pci_bar_get_addr(&pci_dev, 1);
> +#if defined(__i386__) || defined(__x86_64__)
>   	io = (void *)(unsigned long)addr;
> +#else
> +	io = ioremap(addr, PAGE_SIZE);
> +#endif
>   
>   	nr_tests += pci_testdev_all(mem, &pci_testdev_mem_ops);
>   	nr_tests += pci_testdev_all(io, &pci_testdev_io_ops);
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions Andrew Jones
@ 2021-04-13 14:27   ` Nikos Nikoleris
  2021-04-15 15:48   ` Alexandru Elisei
  1 sibling, 0 replies; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-13 14:27 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> Rather than making too many assumptions about the memory layout
> in mmu code, just set up the page tables per the memory regions
> (which means putting all the memory layout assumptions in setup).
> To ensure we get the right default flags set we need to split the
> primary region into two regions for code and data.
> 
> We still only expect the primary regions to be present, but the
> next patch will remove that assumption too.
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>

Looks good to me.

Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

> ---
>   lib/arm/asm/setup.h |  1 +
>   lib/arm/mmu.c       | 26 +++++++++++++++-----------
>   lib/arm/setup.c     | 22 ++++++++++++++--------
>   3 files changed, 30 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
> index c8afb2493f8d..210c14f818fb 100644
> --- a/lib/arm/asm/setup.h
> +++ b/lib/arm/asm/setup.h
> @@ -15,6 +15,7 @@ extern int nr_cpus;
>   
>   #define MR_F_PRIMARY		(1U << 0)
>   #define MR_F_IO			(1U << 1)
> +#define MR_F_CODE		(1U << 2)
>   #define MR_F_UNKNOWN		(1U << 31)
>   
>   struct mem_region {
> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> index a7b7ae51afe3..edd2b9da809b 100644
> --- a/lib/arm/mmu.c
> +++ b/lib/arm/mmu.c
> @@ -20,8 +20,6 @@
>   
>   #include <linux/compiler.h>
>   
> -extern unsigned long etext;
> -
>   #define MMU_MAX_PERSISTENT_MAPS 64
>   
>   struct mmu_persistent_map {
> @@ -208,7 +206,7 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>   
>   void *setup_mmu(phys_addr_t phys_end)
>   {
> -	uintptr_t code_end = (uintptr_t)&etext;
> +	struct mem_region *r;
>   
>   	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
>   	if (phys_end > (3ul << 30))
> @@ -223,14 +221,20 @@ void *setup_mmu(phys_addr_t phys_end)
>   
>   	mmu_idmap = alloc_page();
>   
> -	/* armv8 requires code shared between EL1 and EL0 to be read-only */
> -	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
> -		PHYS_OFFSET, code_end,
> -		__pgprot(PTE_WBWA | PTE_RDONLY | PTE_USER));
> -
> -	mmu_set_range_ptes(mmu_idmap, code_end,
> -		code_end, phys_end,
> -		__pgprot(PTE_WBWA | PTE_USER));
> +	for (r = mem_regions; r->end; ++r) {
> +		if (r->flags & MR_F_IO) {
> +			continue;
> +		} else if (r->flags & MR_F_CODE) {
> +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
> +			/* armv8 requires code shared between EL1 and EL0 to be read-only */
> +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> +					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
> +		} else {
> +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
> +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> +					   __pgprot(PTE_WBWA | PTE_USER));
> +		}
> +	}
>   
>   	mmu_set_persistent_maps(mmu_idmap);
>   
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 9c16f6004e9f..9da5d24b0be9 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -31,6 +31,7 @@
>   #define NR_INITIAL_MEM_REGIONS 16
>   
>   extern unsigned long stacktop;
> +extern unsigned long etext;
>   
>   struct timer_state __timer_state;
>   
> @@ -88,10 +89,12 @@ unsigned int mem_region_get_flags(phys_addr_t paddr)
>   
>   static void mem_init(phys_addr_t freemem_start)
>   {
> +	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
>   	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
> -	struct mem_region primary, mem = {
> +	struct mem_region mem = {
>   		.start = (phys_addr_t)-1,
>   	};
> +	struct mem_region *primary = NULL;
>   	phys_addr_t base, top;
>   	int nr_regs, nr_io = 0, i;
>   
> @@ -110,8 +113,6 @@ static void mem_init(phys_addr_t freemem_start)
>   	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
>   	assert(nr_regs > 0);
>   
> -	primary = (struct mem_region){ 0 };
> -
>   	for (i = 0; i < nr_regs; ++i) {
>   		struct mem_region *r = &mem_regions[nr_io + i];
>   
> @@ -123,7 +124,7 @@ static void mem_init(phys_addr_t freemem_start)
>   		 */
>   		if (freemem_start >= r->start && freemem_start < r->end) {
>   			r->flags |= MR_F_PRIMARY;
> -			primary = *r;
> +			primary = r;
>   		}
>   
>   		/*
> @@ -135,13 +136,18 @@ static void mem_init(phys_addr_t freemem_start)
>   		if (r->end > mem.end)
>   			mem.end = r->end;
>   	}
> -	assert(primary.end != 0);
> +	assert(primary);
>   	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
>   
> -	__phys_offset = primary.start;	/* PHYS_OFFSET */
> -	__phys_end = primary.end;	/* PHYS_END */
> +	__phys_offset = primary->start;	/* PHYS_OFFSET */
> +	__phys_end = primary->end;	/* PHYS_END */
> +
> +	/* Split the primary region into two regions; code and data */
> +	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
> +	mem_regions[nr_io + i] = mem;
> +	primary->end = code_end, primary->flags |= MR_F_CODE;
>   
> -	phys_alloc_init(freemem_start, primary.end - freemem_start);
> +	phys_alloc_init(freemem_start, __phys_end - freemem_start);
>   	phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES);
>   
>   	phys_alloc_get_unused(&base, &top);
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler Andrew Jones
  2021-04-09 17:18   ` Nikos Nikoleris
@ 2021-04-13 16:34   ` Alexandru Elisei
  2021-04-14  8:59     ` Andrew Jones
  1 sibling, 1 reply; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-13 16:34 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/7/21 7:59 PM, Andrew Jones wrote:
> Move secondary_entry helper functions out of .init and into .text,
> since secondary_entry isn't run at "init" time.

The tests aren't loaded using the loader, so as far as I can tell the reason for
having an .init section is to make sure the code from the start label is put at
offset 0 in the test binary. As long as the start label is kept at the beginning
of the .init section, and the loader script places the section first, I don't see
any issues with this change.

The only hypothetical problem that I can think of is that the code from .init
calls code from .text, and if the text section grows very large we might end up
with a PC offset larger than what can be encoded in the BL instruction. That's
unlikely to happen (the offset is 16MB for arm and 64MB for arm64), and the .init
code already calls other functions (like setup) which are in .text, so we would
have this problem regardless of this change. And the compiler will emit an error
if that happens.

>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  arm/cstart.S   | 62 +++++++++++++++++++++++++++-----------------------
>  arm/cstart64.S | 22 +++++++++++-------
>  2 files changed, 48 insertions(+), 36 deletions(-)
>
> diff --git a/arm/cstart.S b/arm/cstart.S
> index d88a98362940..653ab1e8a141 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -96,32 +96,7 @@ start:
>  	bl	exit
>  	b	halt
>  
> -
> -.macro set_mode_stack mode, stack
> -	add	\stack, #S_FRAME_SIZE
> -	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> -	isb
> -	mov	sp, \stack
> -.endm
> -
> -exceptions_init:
> -	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> -	bic	r2, #CR_V		@ SCTLR.V := 0
> -	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> -	ldr	r2, =vector_table
> -	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> -
> -	mrs	r2, cpsr
> -
> -	/* first frame reserved for svc mode */
> -	set_mode_stack	UND_MODE, r0
> -	set_mode_stack	ABT_MODE, r0
> -	set_mode_stack	IRQ_MODE, r0
> -	set_mode_stack	FIQ_MODE, r0
> -
> -	msr	cpsr_cxsf, r2		@ back to svc mode
> -	isb
> -	mov	pc, lr
> +.text

Hm... now we've moved enable_vfp from .init to .text, and enable_vfp *is* called
from .init code, which doesn't fully match up with the commit message. Is the
actual reason for this change that the linker script for EFI will discard the
.init section? Maybe it's worth mentioning that in the commit message, because it
will explain this change better. Or is it to align arm with arm64, where only
start is in the .init section?

>  
>  enable_vfp:
>  	/* Enable full access to CP10 and CP11: */
> @@ -133,8 +108,6 @@ enable_vfp:
>  	vmsr	fpexc, r0
>  	mov	pc, lr
>  
> -.text
> -
>  .global get_mmu_off
>  get_mmu_off:
>  	ldr	r0, =auxinfo
> @@ -235,6 +208,39 @@ asm_mmu_disable:
>  
>  	mov     pc, lr
>  
> +/*
> + * Vectors
> + */
> +
> +.macro set_mode_stack mode, stack
> +	add	\stack, #S_FRAME_SIZE
> +	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> +	isb
> +	mov	sp, \stack
> +.endm
> +
> +exceptions_init:
> +	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> +	bic	r2, #CR_V		@ SCTLR.V := 0
> +	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> +	ldr	r2, =vector_table
> +	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> +
> +	mrs	r2, cpsr
> +
> +	/*
> +	 * Input r0 is the stack top, which is the exception stacks base
> +	 * The first frame is reserved for svc mode
> +	 */
> +	set_mode_stack	UND_MODE, r0
> +	set_mode_stack	ABT_MODE, r0
> +	set_mode_stack	IRQ_MODE, r0
> +	set_mode_stack	FIQ_MODE, r0
> +
> +	msr	cpsr_cxsf, r2		@ back to svc mode
> +	isb
> +	mov	pc, lr
> +
>  /*
>   * Vector stubs
>   * Simplified version of the Linux kernel implementation
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index 0a85338bcdae..d39cf4dfb99c 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -89,10 +89,12 @@ start:
>  	msr	cpacr_el1, x4
>  
>  	/* set up exception handling */
> +	mov	x4, x0				// x0 is the addr of the dtb

I suppose changing exceptions_init to use x0 as a scratch register instead of x4
makes some sense if you look at it from the perspective of it being called from
secondary_entry, where all the functions use x0 as a scratch register. But it's
still called from start, where using x4 as a scratch register is preferred because
of the kernel boot protocol (x0-x3 are reserved).

Is there an actual bug that this is supposed to fix (I looked for it and couldn't
figure it out) or is it just a cosmetic change?

Thanks,

Alex

>  	bl	exceptions_init
>  
>  	/* complete setup */
> -	bl	setup				// x0 is the addr of the dtb
> +	mov	x0, x4				// restore the addr of the dtb
> +	bl	setup
>  	bl	get_mmu_off
>  	cbnz	x0, 1f
>  	bl	setup_vm
> @@ -109,13 +111,6 @@ start:
>  	bl	exit
>  	b	halt
>  
> -exceptions_init:
> -	adrp	x4, vector_table
> -	add	x4, x4, :lo12:vector_table
> -	msr	vbar_el1, x4
> -	isb
> -	ret
> -
>  .text
>  
>  .globl get_mmu_off
> @@ -251,6 +246,17 @@ asm_mmu_disable:
>  
>  /*
>   * Vectors
> + */
> +
> +exceptions_init:
> +	adrp	x0, vector_table
> +	add	x0, x0, :lo12:vector_table
> +	msr	vbar_el1, x0
> +	isb
> +	ret
> +
> +/*
> + * Vector stubs
>   * Adapted from arch/arm64/kernel/entry.S
>   */
>  .macro vector_stub, name, vec

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate " Andrew Jones
@ 2021-04-13 16:41   ` Nikos Nikoleris
  2021-04-14  9:03     ` Andrew Jones
  2021-04-15 16:59   ` Alexandru Elisei
  1 sibling, 1 reply; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-13 16:41 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> Keep as much memory layout assumptions as possible in init::start
> and a single setup function. This prepares us for calling setup()
> from different start functions which have been linked with different
> linker scripts. To do this, stacktop is only referenced from
> init::start, making freemem_start a parameter to setup(). We also
> split mem_init() into three parts, one that populates the mem regions
> per the DT, one that populates the mem regions per assumptions,
> and one that does the mem init. The concept of a primary region
> is dropped, but we add a sanity check for the absence of memory
> holes, because we don't know how to deal with them yet.
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>   arm/cstart.S        |   4 +-
>   arm/cstart64.S      |   2 +
>   arm/flat.lds        |  23 ++++++
>   lib/arm/asm/setup.h |   8 +--
>   lib/arm/mmu.c       |   2 -
>   lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
>   6 files changed, 123 insertions(+), 81 deletions(-)
> 
> diff --git a/arm/cstart.S b/arm/cstart.S
> index 731f841695ce..14444124c43f 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -80,7 +80,9 @@ start:
>   
>   	/* complete setup */
>   	pop	{r0-r1}
> -	bl	setup
> +	mov	r1, #0
> +	ldr	r2, =stacktop		@ r1,r2 is the base of free memory
> +	bl	setup			@ r0 is the addr of the dtb
>   
>   	/* run the test */
>   	ldr	r0, =__argc
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index add60a2b4e74..434723d4b45d 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -94,6 +94,8 @@ start:
>   
>   	/* complete setup */
>   	mov	x0, x4				// restore the addr of the dtb
> +	adrp	x1, stacktop
> +	add	x1, x1, :lo12:stacktop		// x1 is the base of free memory
>   	bl	setup
>   
>   	/* run the test */
> diff --git a/arm/flat.lds b/arm/flat.lds
> index 6ed377c0eaa0..6fb459efb815 100644
> --- a/arm/flat.lds
> +++ b/arm/flat.lds
> @@ -1,3 +1,26 @@
> +/*
> + * init::start will pass stacktop to setup() as the base of free memory.
> + * setup() will then move the FDT and initrd to that base before calling
> + * mem_init(). With those movements and this linker script, we'll end up
> + * having the following memory layout:
> + *
> + *    +----------------------+   <-- top of physical memory
> + *    |                      |
> + *    ~                      ~
> + *    |                      |
> + *    +----------------------+   <-- top of initrd
> + *    |                      |
> + *    +----------------------+   <-- top of FDT
> + *    |                      |
> + *    +----------------------+   <-- top of cpu0's stack
> + *    |                      |
> + *    +----------------------+   <-- top of text/data/bss sections
> + *    |                      |
> + *    |                      |
> + *    +----------------------+   <-- load address
> + *    |                      |
> + *    +----------------------+   <-- physical address 0x0
> + */
>   
>   SECTIONS
>   {
> diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
> index 210c14f818fb..f0e70b119fb0 100644
> --- a/lib/arm/asm/setup.h
> +++ b/lib/arm/asm/setup.h
> @@ -13,9 +13,8 @@
>   extern u64 cpus[NR_CPUS];	/* per-cpu IDs (MPIDRs) */
>   extern int nr_cpus;
>   
> -#define MR_F_PRIMARY		(1U << 0)
> -#define MR_F_IO			(1U << 1)
> -#define MR_F_CODE		(1U << 2)
> +#define MR_F_IO			(1U << 0)
> +#define MR_F_CODE		(1U << 1)
>   #define MR_F_UNKNOWN		(1U << 31)
>   
>   struct mem_region {
> @@ -26,6 +25,7 @@ struct mem_region {
>   extern struct mem_region *mem_regions;
>   extern phys_addr_t __phys_offset, __phys_end;
>   
> +extern struct mem_region *mem_region_find(phys_addr_t paddr);
>   extern unsigned int mem_region_get_flags(phys_addr_t paddr);
>   
>   #define PHYS_OFFSET		(__phys_offset)
> @@ -35,6 +35,6 @@ extern unsigned int mem_region_get_flags(phys_addr_t paddr);
>   #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
>   #define SMP_CACHE_BYTES		L1_CACHE_BYTES
>   
> -void setup(const void *fdt);
> +void setup(const void *fdt, phys_addr_t freemem_start);
>   
>   #endif /* _ASMARM_SETUP_H_ */
> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> index edd2b9da809b..7cff22a12e86 100644
> --- a/lib/arm/mmu.c
> +++ b/lib/arm/mmu.c
> @@ -225,12 +225,10 @@ void *setup_mmu(phys_addr_t phys_end)
>   		if (r->flags & MR_F_IO) {
>   			continue;
>   		} else if (r->flags & MR_F_CODE) {
> -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
>   			/* armv8 requires code shared between EL1 and EL0 to be read-only */
>   			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
>   					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
>   		} else {
> -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
>   			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
>   					   __pgprot(PTE_WBWA | PTE_USER));
>   		}
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 9da5d24b0be9..5cda2d919d2b 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -28,9 +28,9 @@
>   
>   #include "io.h"
>   
> -#define NR_INITIAL_MEM_REGIONS 16
> +#define MAX_DT_MEM_REGIONS	16
> +#define NR_EXTRA_MEM_REGIONS	16
>   
> -extern unsigned long stacktop;
>   extern unsigned long etext;
>   
>   struct timer_state __timer_state;
> @@ -41,7 +41,7 @@ u32 initrd_size;
>   u64 cpus[NR_CPUS] = { [0 ... NR_CPUS-1] = (u64)~0 };
>   int nr_cpus;
>   
> -static struct mem_region __initial_mem_regions[NR_INITIAL_MEM_REGIONS + 1];
> +static struct mem_region __initial_mem_regions[MAX_DT_MEM_REGIONS + NR_EXTRA_MEM_REGIONS];
>   struct mem_region *mem_regions = __initial_mem_regions;
>   phys_addr_t __phys_offset, __phys_end;
>   
> @@ -75,28 +75,62 @@ static void cpu_init(void)
>   	set_cpu_online(0, true);
>   }
>   
> -unsigned int mem_region_get_flags(phys_addr_t paddr)
> +static int mem_regions_next_index(void)

Wouldn't it be better if we added a mem_regions_add() function instead? 
I don't really mind this solution but it would hide the underlying 
implementation of mem_regions.

The rest of the change looks good to me. With the above comment:

Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

>   {
>   	struct mem_region *r;
> +	int n;
>   
> -	for (r = mem_regions; r->end; ++r) {
> -		if (paddr >= r->start && paddr < r->end)
> -			return r->flags;
> +	for (r = mem_regions, n = 0; r->end; ++r, ++n)
> +		;
> +	return n;
> +}
> +
> +static void mem_regions_get_dt_regions(void)
> +{
> +	struct dt_pbus_reg regs[MAX_DT_MEM_REGIONS];
> +	int nr_regs, i, n;
> +
> +	nr_regs = dt_get_memory_params(regs, MAX_DT_MEM_REGIONS);
> +	assert(nr_regs > 0);
> +
> +	n = mem_regions_next_index();
> +
> +	for (i = 0; i < nr_regs; ++i) {
> +		struct mem_region *r = &mem_regions[n + i];
> +		r->start = regs[i].addr;
> +		r->end = regs[i].addr + regs[i].size;
>   	}
> +}
> +
> +struct mem_region *mem_region_find(phys_addr_t paddr)
> +{
> +	struct mem_region *r;
> +
> +	for (r = mem_regions; r->end; ++r)
> +		if (paddr >= r->start && paddr < r->end)
> +			return r;
> +	return NULL;
> +}
>   
> -	return MR_F_UNKNOWN;
> +unsigned int mem_region_get_flags(phys_addr_t paddr)
> +{
> +	struct mem_region *r = mem_region_find(paddr);
> +	return r ? r->flags : MR_F_UNKNOWN;
>   }
>   
> -static void mem_init(phys_addr_t freemem_start)
> +static void mem_regions_add_assumed(void)
>   {
>   	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
> -	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
> -	struct mem_region mem = {
> -		.start = (phys_addr_t)-1,
> -	};
> -	struct mem_region *primary = NULL;
> -	phys_addr_t base, top;
> -	int nr_regs, nr_io = 0, i;
> +	int n = mem_regions_next_index();
> +	struct mem_region mem = {0}, *r;
> +
> +	r = mem_region_find(code_end - 1);
> +	assert(r);
> +
> +	/* Split the region with the code into two regions; code and data */
> +	mem.start = code_end, mem.end = r->end;
> +	mem_regions[n++] = mem;
> +	r->end = code_end, r->flags = MR_F_CODE;
>   
>   	/*
>   	 * mach-virt I/O regions:
> @@ -104,50 +138,47 @@ static void mem_init(phys_addr_t freemem_start)
>   	 *   - 512M at 256G (arm64, arm uses highmem=off)
>   	 *   - 512G at 512G (arm64, arm uses highmem=off)
>   	 */
> -	mem_regions[nr_io++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
> +	mem_regions[n++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
>   #ifdef __aarch64__
> -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
> +	mem_regions[n++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> +	mem_regions[n++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
>   #endif
> +}
>   
> -	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
> -	assert(nr_regs > 0);
> -
> -	for (i = 0; i < nr_regs; ++i) {
> -		struct mem_region *r = &mem_regions[nr_io + i];
> +static void mem_init(phys_addr_t freemem_start)
> +{
> +	phys_addr_t base, top;
> +	struct mem_region *freemem, *r, mem = {
> +		.start = (phys_addr_t)-1,
> +	};
>   
> -		r->start = regs[i].addr;
> -		r->end = regs[i].addr + regs[i].size;
> +	freemem = mem_region_find(freemem_start);
> +	assert(freemem && !(freemem->flags & (MR_F_IO | MR_F_CODE)));
>   
> -		/*
> -		 * pick the region we're in for our primary region
> -		 */
> -		if (freemem_start >= r->start && freemem_start < r->end) {
> -			r->flags |= MR_F_PRIMARY;
> -			primary = r;
> +	for (r = mem_regions; r->end; ++r) {
> +		assert(!(r->start & ~PHYS_MASK) && !((r->end - 1) & ~PHYS_MASK));
> +		if (!(r->flags & MR_F_IO)) {
> +			if (r->start < mem.start)
> +				mem.start = r->start;
> +			if (r->end > mem.end)
> +				mem.end = r->end;
>   		}
> -
> -		/*
> -		 * set the lowest and highest addresses found,
> -		 * ignoring potential gaps
> -		 */
> -		if (r->start < mem.start)
> -			mem.start = r->start;
> -		if (r->end > mem.end)
> -			mem.end = r->end;
>   	}
> -	assert(primary);
> -	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
> +	assert(mem.end);
> +
> +	/* Check for holes */
> +	r = mem_region_find(mem.start);
> +	while (r && r->end != mem.end)
> +		r = mem_region_find(r->end);
> +	assert(r);
>   
> -	__phys_offset = primary->start;	/* PHYS_OFFSET */
> -	__phys_end = primary->end;	/* PHYS_END */
> +	/* Ensure our selected freemem region is somewhere in our full range */
> +	assert(freemem_start >= mem.start && freemem->end <= mem.end);
>   
> -	/* Split the primary region into two regions; code and data */
> -	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
> -	mem_regions[nr_io + i] = mem;
> -	primary->end = code_end, primary->flags |= MR_F_CODE;
> +	__phys_offset = mem.start;	/* PHYS_OFFSET */
> +	__phys_end = mem.end;		/* PHYS_END */
>   
> -	phys_alloc_init(freemem_start, __phys_end - freemem_start);
> +	phys_alloc_init(freemem_start, freemem->end - freemem_start);
>   	phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES);
>   
>   	phys_alloc_get_unused(&base, &top);
> @@ -197,35 +228,17 @@ static void timer_save_state(void)
>   	__timer_state.vtimer.irq_flags = fdt32_to_cpu(data[8]);
>   }
>   
> -void setup(const void *fdt)
> +void setup(const void *fdt, phys_addr_t freemem_start)
>   {
> -	void *freemem = &stacktop;
> +	void *freemem;
>   	const char *bootargs, *tmp;
>   	u32 fdt_size;
>   	int ret;
>   
> -	/*
> -	 * Before calling mem_init we need to move the fdt and initrd
> -	 * to safe locations. We move them to construct the memory
> -	 * map illustrated below:
> -	 *
> -	 *    +----------------------+   <-- top of physical memory
> -	 *    |                      |
> -	 *    ~                      ~
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of initrd
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of FDT
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of cpu0's stack
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of text/data/bss sections,
> -	 *    |                      |       see arm/flat.lds
> -	 *    |                      |
> -	 *    +----------------------+   <-- load address
> -	 *    |                      |
> -	 *    +----------------------+
> -	 */
> +	assert(sizeof(long) == 8 || freemem_start < (3ul << 30));
> +	freemem = (void *)(unsigned long)freemem_start;
> +
> +	/* Move the FDT to the base of free memory */
>   	fdt_size = fdt_totalsize(fdt);
>   	ret = fdt_move(fdt, freemem, fdt_size);
>   	assert(ret == 0);
> @@ -233,6 +246,7 @@ void setup(const void *fdt)
>   	assert(ret == 0);
>   	freemem += fdt_size;
>   
> +	/* Move the initrd to the top of the FDT */
>   	ret = dt_get_initrd(&tmp, &initrd_size);
>   	assert(ret == 0 || ret == -FDT_ERR_NOTFOUND);
>   	if (ret == 0) {
> @@ -241,7 +255,10 @@ void setup(const void *fdt)
>   		freemem += initrd_size;
>   	}
>   
> +	mem_regions_get_dt_regions();
> +	mem_regions_add_assumed();
>   	mem_init(PAGE_ALIGN((unsigned long)freemem));
> +
>   	cpu_init();
>   
>   	/* cpu_init must be called before thread_info_init */
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init Andrew Jones
@ 2021-04-13 16:42   ` Nikos Nikoleris
  2021-04-15 17:03   ` Alexandru Elisei
  1 sibling, 0 replies; 38+ messages in thread
From: Nikos Nikoleris @ 2021-04-13 16:42 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: alexandru.elisei, andre.przywara, eric.auger

On 07/04/2021 19:59, Andrew Jones wrote:
> If there's no virtio-console / chr-testdev configured, then the user
> probably didn't want them. Just silently fail rather than stating
> the obvious.
> 
> Signed-off-by: Andrew Jones <drjones@redhat.com>

Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

> ---
>   lib/chr-testdev.c | 5 +----
>   1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/lib/chr-testdev.c b/lib/chr-testdev.c
> index 6890f63c8b29..b3c641a833fe 100644
> --- a/lib/chr-testdev.c
> +++ b/lib/chr-testdev.c
> @@ -54,11 +54,8 @@ void chr_testdev_init(void)
>   	int ret;
>   
>   	vcon = virtio_bind(VIRTIO_ID_CONSOLE);
> -	if (vcon == NULL) {
> -		printf("%s: %s: can't find a virtio-console\n",
> -				__func__, TESTDEV_NAME);
> +	if (vcon == NULL)
>   		return;
> -	}
>   
>   	ret = vcon->config->find_vqs(vcon, 2, vqs, NULL, io_names);
>   	if (ret < 0) {
> 

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler
  2021-04-13 16:34   ` Alexandru Elisei
@ 2021-04-14  8:59     ` Andrew Jones
  2021-04-14 15:15       ` Alexandru Elisei
  0 siblings, 1 reply; 38+ messages in thread
From: Andrew Jones @ 2021-04-14  8:59 UTC (permalink / raw)
  To: Alexandru Elisei; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

On Tue, Apr 13, 2021 at 05:34:24PM +0100, Alexandru Elisei wrote:
> Hi Drew,
> 
> On 4/7/21 7:59 PM, Andrew Jones wrote:
> > Move secondary_entry helper functions out of .init and into .text,
> > since secondary_entry isn't run at "init" time.
> 
> The tests aren't loaded using the loader, so as far as I can tell the reason for
> having an .init section is to make sure the code from the start label is put at
> offset 0 in the test binary. As long as the start label is kept at the beginning
> of the .init section, and the loader script places the section first, I don't see
> any issues with this change.
> 
> The only hypothetical problem that I can think of is that the code from .init
> calls code from .text, and if the text section grows very large we might end up
> with a PC offset larger than what can be encoded in the BL instruction. That's
> unlikely to happen (the offset is 16MB for arm and 64MB for arm64), and the .init
> code already calls other functions (like setup) which are in .text, so we would
> have this problem regardless of this change. And the compiler will emit an error
> if that happens.
> 
> >
> > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > ---
> >  arm/cstart.S   | 62 +++++++++++++++++++++++++++-----------------------
> >  arm/cstart64.S | 22 +++++++++++-------
> >  2 files changed, 48 insertions(+), 36 deletions(-)
> >
> > diff --git a/arm/cstart.S b/arm/cstart.S
> > index d88a98362940..653ab1e8a141 100644
> > --- a/arm/cstart.S
> > +++ b/arm/cstart.S
> > @@ -96,32 +96,7 @@ start:
> >  	bl	exit
> >  	b	halt
> >  
> > -
> > -.macro set_mode_stack mode, stack
> > -	add	\stack, #S_FRAME_SIZE
> > -	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> > -	isb
> > -	mov	sp, \stack
> > -.endm
> > -
> > -exceptions_init:
> > -	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> > -	bic	r2, #CR_V		@ SCTLR.V := 0
> > -	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> > -	ldr	r2, =vector_table
> > -	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> > -
> > -	mrs	r2, cpsr
> > -
> > -	/* first frame reserved for svc mode */
> > -	set_mode_stack	UND_MODE, r0
> > -	set_mode_stack	ABT_MODE, r0
> > -	set_mode_stack	IRQ_MODE, r0
> > -	set_mode_stack	FIQ_MODE, r0
> > -
> > -	msr	cpsr_cxsf, r2		@ back to svc mode
> > -	isb
> > -	mov	pc, lr
> > +.text
> 
> Hm... now we've moved enable_vfp from .init to .text, and enable_vfp *is* called
> from .init code, which doesn't fully match up with the commit message. Is the
> actual reason for this change that the linker script for EFI will discard the
> .init section? Maybe it's worth mentioning that in the commit message, because it
> will explain this change better.

Right, the .init section may not exist when linking with other linker
scripts. I'll make the commit message more clear.

> Or is it to align arm with arm64, where only
> start is in the .init section?
> 
> >  
> >  enable_vfp:
> >  	/* Enable full access to CP10 and CP11: */
> > @@ -133,8 +108,6 @@ enable_vfp:
> >  	vmsr	fpexc, r0
> >  	mov	pc, lr
> >  
> > -.text
> > -
> >  .global get_mmu_off
> >  get_mmu_off:
> >  	ldr	r0, =auxinfo
> > @@ -235,6 +208,39 @@ asm_mmu_disable:
> >  
> >  	mov     pc, lr
> >  
> > +/*
> > + * Vectors
> > + */
> > +
> > +.macro set_mode_stack mode, stack
> > +	add	\stack, #S_FRAME_SIZE
> > +	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> > +	isb
> > +	mov	sp, \stack
> > +.endm
> > +
> > +exceptions_init:
> > +	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> > +	bic	r2, #CR_V		@ SCTLR.V := 0
> > +	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> > +	ldr	r2, =vector_table
> > +	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> > +
> > +	mrs	r2, cpsr
> > +
> > +	/*
> > +	 * Input r0 is the stack top, which is the exception stacks base
> > +	 * The first frame is reserved for svc mode
> > +	 */
> > +	set_mode_stack	UND_MODE, r0
> > +	set_mode_stack	ABT_MODE, r0
> > +	set_mode_stack	IRQ_MODE, r0
> > +	set_mode_stack	FIQ_MODE, r0
> > +
> > +	msr	cpsr_cxsf, r2		@ back to svc mode
> > +	isb
> > +	mov	pc, lr
> > +
> >  /*
> >   * Vector stubs
> >   * Simplified version of the Linux kernel implementation
> > diff --git a/arm/cstart64.S b/arm/cstart64.S
> > index 0a85338bcdae..d39cf4dfb99c 100644
> > --- a/arm/cstart64.S
> > +++ b/arm/cstart64.S
> > @@ -89,10 +89,12 @@ start:
> >  	msr	cpacr_el1, x4
> >  
> >  	/* set up exception handling */
> > +	mov	x4, x0				// x0 is the addr of the dtb
> 
> I suppose changing exceptions_init to use x0 as a scratch register instead of x4
> makes some sense if you look at it from the perspective of it being called from
> secondary_entry, where all the functions use x0 as a scratch register. But it's
> still called from start, where using x4 as a scratch register is preferred because
> of the kernel boot protocol (x0-x3 are reserved).
> 
> Is there an actual bug that this is supposed to fix (I looked for it and couldn't
> figure it out) or is it just a cosmetic change?

Now that exceptions_init isn't a private function of start (actually it
hasn't been in a long time, considering secondary_entry calls it) I would
like it to better conform to calling conventions. I guess I should have
used x19 here instead of x4 to be 100% correct. Or, would you rather I
just continue using x4 in exceptions_init in order to avoid the
save/restore?

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-13 16:41   ` Nikos Nikoleris
@ 2021-04-14  9:03     ` Andrew Jones
  0 siblings, 0 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-14  9:03 UTC (permalink / raw)
  To: Nikos Nikoleris; +Cc: kvm, alexandru.elisei, andre.przywara, eric.auger

On Tue, Apr 13, 2021 at 05:41:01PM +0100, Nikos Nikoleris wrote:
> On 07/04/2021 19:59, Andrew Jones wrote:
> > Keep as much memory layout assumptions as possible in init::start
> > and a single setup function. This prepares us for calling setup()
> > from different start functions which have been linked with different
> > linker scripts. To do this, stacktop is only referenced from
> > init::start, making freemem_start a parameter to setup(). We also
> > split mem_init() into three parts, one that populates the mem regions
> > per the DT, one that populates the mem regions per assumptions,
> > and one that does the mem init. The concept of a primary region
> > is dropped, but we add a sanity check for the absence of memory
> > holes, because we don't know how to deal with them yet.
> > 
> > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > ---
> >   arm/cstart.S        |   4 +-
> >   arm/cstart64.S      |   2 +
> >   arm/flat.lds        |  23 ++++++
> >   lib/arm/asm/setup.h |   8 +--
> >   lib/arm/mmu.c       |   2 -
> >   lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
> >   6 files changed, 123 insertions(+), 81 deletions(-)
> > 
> > diff --git a/arm/cstart.S b/arm/cstart.S
> > index 731f841695ce..14444124c43f 100644
> > --- a/arm/cstart.S
> > +++ b/arm/cstart.S
> > @@ -80,7 +80,9 @@ start:
> >   	/* complete setup */
> >   	pop	{r0-r1}
> > -	bl	setup
> > +	mov	r1, #0
> > +	ldr	r2, =stacktop		@ r1,r2 is the base of free memory
> > +	bl	setup			@ r0 is the addr of the dtb
> >   	/* run the test */
> >   	ldr	r0, =__argc
> > diff --git a/arm/cstart64.S b/arm/cstart64.S
> > index add60a2b4e74..434723d4b45d 100644
> > --- a/arm/cstart64.S
> > +++ b/arm/cstart64.S
> > @@ -94,6 +94,8 @@ start:
> >   	/* complete setup */
> >   	mov	x0, x4				// restore the addr of the dtb
> > +	adrp	x1, stacktop
> > +	add	x1, x1, :lo12:stacktop		// x1 is the base of free memory
> >   	bl	setup
> >   	/* run the test */
> > diff --git a/arm/flat.lds b/arm/flat.lds
> > index 6ed377c0eaa0..6fb459efb815 100644
> > --- a/arm/flat.lds
> > +++ b/arm/flat.lds
> > @@ -1,3 +1,26 @@
> > +/*
> > + * init::start will pass stacktop to setup() as the base of free memory.
> > + * setup() will then move the FDT and initrd to that base before calling
> > + * mem_init(). With those movements and this linker script, we'll end up
> > + * having the following memory layout:
> > + *
> > + *    +----------------------+   <-- top of physical memory
> > + *    |                      |
> > + *    ~                      ~
> > + *    |                      |
> > + *    +----------------------+   <-- top of initrd
> > + *    |                      |
> > + *    +----------------------+   <-- top of FDT
> > + *    |                      |
> > + *    +----------------------+   <-- top of cpu0's stack
> > + *    |                      |
> > + *    +----------------------+   <-- top of text/data/bss sections
> > + *    |                      |
> > + *    |                      |
> > + *    +----------------------+   <-- load address
> > + *    |                      |
> > + *    +----------------------+   <-- physical address 0x0
> > + */
> >   SECTIONS
> >   {
> > diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
> > index 210c14f818fb..f0e70b119fb0 100644
> > --- a/lib/arm/asm/setup.h
> > +++ b/lib/arm/asm/setup.h
> > @@ -13,9 +13,8 @@
> >   extern u64 cpus[NR_CPUS];	/* per-cpu IDs (MPIDRs) */
> >   extern int nr_cpus;
> > -#define MR_F_PRIMARY		(1U << 0)
> > -#define MR_F_IO			(1U << 1)
> > -#define MR_F_CODE		(1U << 2)
> > +#define MR_F_IO			(1U << 0)
> > +#define MR_F_CODE		(1U << 1)
> >   #define MR_F_UNKNOWN		(1U << 31)
> >   struct mem_region {
> > @@ -26,6 +25,7 @@ struct mem_region {
> >   extern struct mem_region *mem_regions;
> >   extern phys_addr_t __phys_offset, __phys_end;
> > +extern struct mem_region *mem_region_find(phys_addr_t paddr);
> >   extern unsigned int mem_region_get_flags(phys_addr_t paddr);
> >   #define PHYS_OFFSET		(__phys_offset)
> > @@ -35,6 +35,6 @@ extern unsigned int mem_region_get_flags(phys_addr_t paddr);
> >   #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
> >   #define SMP_CACHE_BYTES		L1_CACHE_BYTES
> > -void setup(const void *fdt);
> > +void setup(const void *fdt, phys_addr_t freemem_start);
> >   #endif /* _ASMARM_SETUP_H_ */
> > diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> > index edd2b9da809b..7cff22a12e86 100644
> > --- a/lib/arm/mmu.c
> > +++ b/lib/arm/mmu.c
> > @@ -225,12 +225,10 @@ void *setup_mmu(phys_addr_t phys_end)
> >   		if (r->flags & MR_F_IO) {
> >   			continue;
> >   		} else if (r->flags & MR_F_CODE) {
> > -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
> >   			/* armv8 requires code shared between EL1 and EL0 to be read-only */
> >   			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> >   					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
> >   		} else {
> > -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
> >   			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> >   					   __pgprot(PTE_WBWA | PTE_USER));
> >   		}
> > diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> > index 9da5d24b0be9..5cda2d919d2b 100644
> > --- a/lib/arm/setup.c
> > +++ b/lib/arm/setup.c
> > @@ -28,9 +28,9 @@
> >   #include "io.h"
> > -#define NR_INITIAL_MEM_REGIONS 16
> > +#define MAX_DT_MEM_REGIONS	16
> > +#define NR_EXTRA_MEM_REGIONS	16
> > -extern unsigned long stacktop;
> >   extern unsigned long etext;
> >   struct timer_state __timer_state;
> > @@ -41,7 +41,7 @@ u32 initrd_size;
> >   u64 cpus[NR_CPUS] = { [0 ... NR_CPUS-1] = (u64)~0 };
> >   int nr_cpus;
> > -static struct mem_region __initial_mem_regions[NR_INITIAL_MEM_REGIONS + 1];
> > +static struct mem_region __initial_mem_regions[MAX_DT_MEM_REGIONS + NR_EXTRA_MEM_REGIONS];
> >   struct mem_region *mem_regions = __initial_mem_regions;
> >   phys_addr_t __phys_offset, __phys_end;
> > @@ -75,28 +75,62 @@ static void cpu_init(void)
> >   	set_cpu_online(0, true);
> >   }
> > -unsigned int mem_region_get_flags(phys_addr_t paddr)
> > +static int mem_regions_next_index(void)
> 
> Wouldn't it be better if we added a mem_regions_add() function instead? I
> don't really mind this solution but it would hide the underlying
> implementation of mem_regions.

Hi Nikos,

I'll take a look at implementing a mem_regions_add() for v2 to see if I
can come up with something a bit nicer.

> 
> The rest of the change looks good to me. With the above comment:
> 
> Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

Thanks!
drew

> 
> >   {
> >   	struct mem_region *r;
> > +	int n;
> > -	for (r = mem_regions; r->end; ++r) {
> > -		if (paddr >= r->start && paddr < r->end)
> > -			return r->flags;
> > +	for (r = mem_regions, n = 0; r->end; ++r, ++n)
> > +		;
> > +	return n;
> > +}
> > +
> > +static void mem_regions_get_dt_regions(void)
> > +{
> > +	struct dt_pbus_reg regs[MAX_DT_MEM_REGIONS];
> > +	int nr_regs, i, n;
> > +
> > +	nr_regs = dt_get_memory_params(regs, MAX_DT_MEM_REGIONS);
> > +	assert(nr_regs > 0);
> > +
> > +	n = mem_regions_next_index();
> > +
> > +	for (i = 0; i < nr_regs; ++i) {
> > +		struct mem_region *r = &mem_regions[n + i];
> > +		r->start = regs[i].addr;
> > +		r->end = regs[i].addr + regs[i].size;
> >   	}
> > +}
> > +
> > +struct mem_region *mem_region_find(phys_addr_t paddr)
> > +{
> > +	struct mem_region *r;
> > +
> > +	for (r = mem_regions; r->end; ++r)
> > +		if (paddr >= r->start && paddr < r->end)
> > +			return r;
> > +	return NULL;
> > +}
> > -	return MR_F_UNKNOWN;
> > +unsigned int mem_region_get_flags(phys_addr_t paddr)
> > +{
> > +	struct mem_region *r = mem_region_find(paddr);
> > +	return r ? r->flags : MR_F_UNKNOWN;
> >   }
> > -static void mem_init(phys_addr_t freemem_start)
> > +static void mem_regions_add_assumed(void)
> >   {
> >   	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
> > -	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
> > -	struct mem_region mem = {
> > -		.start = (phys_addr_t)-1,
> > -	};
> > -	struct mem_region *primary = NULL;
> > -	phys_addr_t base, top;
> > -	int nr_regs, nr_io = 0, i;
> > +	int n = mem_regions_next_index();
> > +	struct mem_region mem = {0}, *r;
> > +
> > +	r = mem_region_find(code_end - 1);
> > +	assert(r);
> > +
> > +	/* Split the region with the code into two regions; code and data */
> > +	mem.start = code_end, mem.end = r->end;
> > +	mem_regions[n++] = mem;
> > +	r->end = code_end, r->flags = MR_F_CODE;
> >   	/*
> >   	 * mach-virt I/O regions:
> > @@ -104,50 +138,47 @@ static void mem_init(phys_addr_t freemem_start)
> >   	 *   - 512M at 256G (arm64, arm uses highmem=off)
> >   	 *   - 512G at 512G (arm64, arm uses highmem=off)
> >   	 */
> > -	mem_regions[nr_io++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
> > +	mem_regions[n++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
> >   #ifdef __aarch64__
> > -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> > -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
> > +	mem_regions[n++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> > +	mem_regions[n++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
> >   #endif
> > +}
> > -	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
> > -	assert(nr_regs > 0);
> > -
> > -	for (i = 0; i < nr_regs; ++i) {
> > -		struct mem_region *r = &mem_regions[nr_io + i];
> > +static void mem_init(phys_addr_t freemem_start)
> > +{
> > +	phys_addr_t base, top;
> > +	struct mem_region *freemem, *r, mem = {
> > +		.start = (phys_addr_t)-1,
> > +	};
> > -		r->start = regs[i].addr;
> > -		r->end = regs[i].addr + regs[i].size;
> > +	freemem = mem_region_find(freemem_start);
> > +	assert(freemem && !(freemem->flags & (MR_F_IO | MR_F_CODE)));
> > -		/*
> > -		 * pick the region we're in for our primary region
> > -		 */
> > -		if (freemem_start >= r->start && freemem_start < r->end) {
> > -			r->flags |= MR_F_PRIMARY;
> > -			primary = r;
> > +	for (r = mem_regions; r->end; ++r) {
> > +		assert(!(r->start & ~PHYS_MASK) && !((r->end - 1) & ~PHYS_MASK));
> > +		if (!(r->flags & MR_F_IO)) {
> > +			if (r->start < mem.start)
> > +				mem.start = r->start;
> > +			if (r->end > mem.end)
> > +				mem.end = r->end;
> >   		}
> > -
> > -		/*
> > -		 * set the lowest and highest addresses found,
> > -		 * ignoring potential gaps
> > -		 */
> > -		if (r->start < mem.start)
> > -			mem.start = r->start;
> > -		if (r->end > mem.end)
> > -			mem.end = r->end;
> >   	}
> > -	assert(primary);
> > -	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
> > +	assert(mem.end);
> > +
> > +	/* Check for holes */
> > +	r = mem_region_find(mem.start);
> > +	while (r && r->end != mem.end)
> > +		r = mem_region_find(r->end);
> > +	assert(r);
> > -	__phys_offset = primary->start;	/* PHYS_OFFSET */
> > -	__phys_end = primary->end;	/* PHYS_END */
> > +	/* Ensure our selected freemem region is somewhere in our full range */
> > +	assert(freemem_start >= mem.start && freemem->end <= mem.end);
> > -	/* Split the primary region into two regions; code and data */
> > -	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
> > -	mem_regions[nr_io + i] = mem;
> > -	primary->end = code_end, primary->flags |= MR_F_CODE;
> > +	__phys_offset = mem.start;	/* PHYS_OFFSET */
> > +	__phys_end = mem.end;		/* PHYS_END */
> > -	phys_alloc_init(freemem_start, __phys_end - freemem_start);
> > +	phys_alloc_init(freemem_start, freemem->end - freemem_start);
> >   	phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES);
> >   	phys_alloc_get_unused(&base, &top);
> > @@ -197,35 +228,17 @@ static void timer_save_state(void)
> >   	__timer_state.vtimer.irq_flags = fdt32_to_cpu(data[8]);
> >   }
> > -void setup(const void *fdt)
> > +void setup(const void *fdt, phys_addr_t freemem_start)
> >   {
> > -	void *freemem = &stacktop;
> > +	void *freemem;
> >   	const char *bootargs, *tmp;
> >   	u32 fdt_size;
> >   	int ret;
> > -	/*
> > -	 * Before calling mem_init we need to move the fdt and initrd
> > -	 * to safe locations. We move them to construct the memory
> > -	 * map illustrated below:
> > -	 *
> > -	 *    +----------------------+   <-- top of physical memory
> > -	 *    |                      |
> > -	 *    ~                      ~
> > -	 *    |                      |
> > -	 *    +----------------------+   <-- top of initrd
> > -	 *    |                      |
> > -	 *    +----------------------+   <-- top of FDT
> > -	 *    |                      |
> > -	 *    +----------------------+   <-- top of cpu0's stack
> > -	 *    |                      |
> > -	 *    +----------------------+   <-- top of text/data/bss sections,
> > -	 *    |                      |       see arm/flat.lds
> > -	 *    |                      |
> > -	 *    +----------------------+   <-- load address
> > -	 *    |                      |
> > -	 *    +----------------------+
> > -	 */
> > +	assert(sizeof(long) == 8 || freemem_start < (3ul << 30));
> > +	freemem = (void *)(unsigned long)freemem_start;
> > +
> > +	/* Move the FDT to the base of free memory */
> >   	fdt_size = fdt_totalsize(fdt);
> >   	ret = fdt_move(fdt, freemem, fdt_size);
> >   	assert(ret == 0);
> > @@ -233,6 +246,7 @@ void setup(const void *fdt)
> >   	assert(ret == 0);
> >   	freemem += fdt_size;
> > +	/* Move the initrd to the top of the FDT */
> >   	ret = dt_get_initrd(&tmp, &initrd_size);
> >   	assert(ret == 0 || ret == -FDT_ERR_NOTFOUND);
> >   	if (ret == 0) {
> > @@ -241,7 +255,10 @@ void setup(const void *fdt)
> >   		freemem += initrd_size;
> >   	}
> > +	mem_regions_get_dt_regions();
> > +	mem_regions_add_assumed();
> >   	mem_init(PAGE_ALIGN((unsigned long)freemem));
> > +
> >   	cpu_init();
> >   	/* cpu_init must be called before thread_info_init */
> > 
> 


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc
  2021-04-09 17:46   ` Nikos Nikoleris
@ 2021-04-14  9:06     ` Andrew Jones
  0 siblings, 0 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-14  9:06 UTC (permalink / raw)
  To: Nikos Nikoleris; +Cc: kvm, alexandru.elisei, andre.przywara, eric.auger

On Fri, Apr 09, 2021 at 10:46:33AM -0700, Nikos Nikoleris wrote:
> On 07/04/2021 19:59, Andrew Jones wrote:
> > The method can also be smc and it will be when running on bare metal.
> > 
> > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > ---
> >   arm/selftest.c     | 34 +++++++---------------------------
> >   lib/arm/asm/psci.h |  9 +++++++--
> >   lib/arm/psci.c     | 17 +++++++++++++++--
> >   lib/arm/setup.c    | 22 ++++++++++++++++++++++
> >   4 files changed, 51 insertions(+), 31 deletions(-)
> > 
> > diff --git a/arm/selftest.c b/arm/selftest.c
> > index 4495b161cdd5..9f459ed3d571 100644
> > --- a/arm/selftest.c
> > +++ b/arm/selftest.c
> > @@ -400,33 +400,13 @@ static void check_vectors(void *arg __unused)
> >   	exit(report_summary());
> >   }
> > -static bool psci_check(void)
> > +static void psci_print(void)
> >   {
> > -	const struct fdt_property *method;
> > -	int node, len, ver;
> > -
> > -	node = fdt_node_offset_by_compatible(dt_fdt(), -1, "arm,psci-0.2");
> > -	if (node < 0) {
> > -		printf("PSCI v0.2 compatibility required\n");
> > -		return false;
> > -	}
> > -
> > -	method = fdt_get_property(dt_fdt(), node, "method", &len);
> > -	if (method == NULL) {
> > -		printf("bad psci device tree node\n");
> > -		return false;
> > -	}
> > -
> > -	if (len < 4 || strcmp(method->data, "hvc") != 0) {
> > -		printf("psci method must be hvc\n");
> > -		return false;
> > -	}
> > -
> > -	ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
> > -	printf("PSCI version %d.%d\n", PSCI_VERSION_MAJOR(ver),
> > -				       PSCI_VERSION_MINOR(ver));
> > -
> > -	return true;
> > +	int ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
> > +	report_info("PSCI version: %d.%d", PSCI_VERSION_MAJOR(ver),
> > +					  PSCI_VERSION_MINOR(ver));
> > +	report_info("PSCI method: %s", psci_invoke == psci_invoke_hvc ?
> > +				       "hvc" : "smc");
> >   }
> >   static void cpu_report(void *data __unused)
> > @@ -465,7 +445,7 @@ int main(int argc, char **argv)
> >   	} else if (strcmp(argv[1], "smp") == 0) {
> > -		report(psci_check(), "PSCI version");
> > +		psci_print();
> >   		on_cpus(cpu_report, NULL);
> >   		while (!cpumask_full(&ready))
> >   			cpu_relax();
> > diff --git a/lib/arm/asm/psci.h b/lib/arm/asm/psci.h
> > index 7b956bf5987d..e385ce27f5d1 100644
> > --- a/lib/arm/asm/psci.h
> > +++ b/lib/arm/asm/psci.h
> > @@ -3,8 +3,13 @@
> >   #include <libcflat.h>
> >   #include <linux/psci.h>
> > -extern int psci_invoke(unsigned long function_id, unsigned long arg0,
> > -		       unsigned long arg1, unsigned long arg2);
> > +typedef int (*psci_invoke_fn)(unsigned long function_id, unsigned long arg0,
> > +			      unsigned long arg1, unsigned long arg2);
> > +extern psci_invoke_fn psci_invoke;
> > +extern int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
> > +			   unsigned long arg1, unsigned long arg2);
> > +extern int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
> > +			   unsigned long arg1, unsigned long arg2);
> >   extern int psci_cpu_on(unsigned long cpuid, unsigned long entry_point);
> >   extern void psci_system_reset(void);
> >   extern int cpu_psci_cpu_boot(unsigned int cpu);
> > diff --git a/lib/arm/psci.c b/lib/arm/psci.c
> > index 936c83948b6a..46300f30822c 100644
> > --- a/lib/arm/psci.c
> > +++ b/lib/arm/psci.c
> > @@ -11,9 +11,11 @@
> >   #include <asm/page.h>
> >   #include <asm/smp.h>
> > +psci_invoke_fn psci_invoke;
> > +
> >   __attribute__((noinline))
> > -int psci_invoke(unsigned long function_id, unsigned long arg0,
> > -		unsigned long arg1, unsigned long arg2)
> > +int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
> > +		    unsigned long arg1, unsigned long arg2)
> >   {
> >   	asm volatile(
> >   		"hvc #0"
> > @@ -22,6 +24,17 @@ int psci_invoke(unsigned long function_id, unsigned long arg0,
> >   	return function_id;
> >   }
> > +__attribute__((noinline))
> 
> Is noinline necessary? We shouldn't be calling psci_invoke_smc and
> psci_invoke_hmc directly, it's unlikely that the compiler will have a chance
> to inline them. But I might be missing something here because I don't see
> why it was there in psci_invoke either.

The noinline ensures that function_id,arg0,arg1,arg2 are in r0-r3 without
us having to do something like

 "mov r0, %0"
 "mov r1, %1"
 "mov r2, %2"
 "mov r3, %3"

in the asm().

> 
> Otherwise Reviewed-by: Nikos Nikoleris <nikos.nikoleris@arm.com>

Thanks!
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler
  2021-04-14  8:59     ` Andrew Jones
@ 2021-04-14 15:15       ` Alexandru Elisei
  2021-04-15 13:03         ` Andrew Jones
  0 siblings, 1 reply; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-14 15:15 UTC (permalink / raw)
  To: Andrew Jones; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/14/21 9:59 AM, Andrew Jones wrote:
> On Tue, Apr 13, 2021 at 05:34:24PM +0100, Alexandru Elisei wrote:
>> Hi Drew,
>>
>> On 4/7/21 7:59 PM, Andrew Jones wrote:
>>> Move secondary_entry helper functions out of .init and into .text,
>>> since secondary_entry isn't run at "init" time.
>> The tests aren't loaded using the loader, so as far as I can tell the reason for
>> having an .init section is to make sure the code from the start label is put at
>> offset 0 in the test binary. As long as the start label is kept at the beginning
>> of the .init section, and the loader script places the section first, I don't see
>> any issues with this change.
>>
>> The only hypothetical problem that I can think of is that the code from .init
>> calls code from .text, and if the text section grows very large we might end up
>> with a PC offset larger than what can be encoded in the BL instruction. That's
>> unlikely to happen (the offset is 16MB for arm and 64MB for arm64), and the .init
>> code already calls other functions (like setup) which are in .text, so we would
>> have this problem regardless of this change. And the compiler will emit an error
>> if that happens.
>>
>>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>>> ---
>>>  arm/cstart.S   | 62 +++++++++++++++++++++++++++-----------------------
>>>  arm/cstart64.S | 22 +++++++++++-------
>>>  2 files changed, 48 insertions(+), 36 deletions(-)
>>>
>>> diff --git a/arm/cstart.S b/arm/cstart.S
>>> index d88a98362940..653ab1e8a141 100644
>>> --- a/arm/cstart.S
>>> +++ b/arm/cstart.S
>>> @@ -96,32 +96,7 @@ start:
>>>  	bl	exit
>>>  	b	halt
>>>  
>>> -
>>> -.macro set_mode_stack mode, stack
>>> -	add	\stack, #S_FRAME_SIZE
>>> -	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
>>> -	isb
>>> -	mov	sp, \stack
>>> -.endm
>>> -
>>> -exceptions_init:
>>> -	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
>>> -	bic	r2, #CR_V		@ SCTLR.V := 0
>>> -	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
>>> -	ldr	r2, =vector_table
>>> -	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
>>> -
>>> -	mrs	r2, cpsr
>>> -
>>> -	/* first frame reserved for svc mode */
>>> -	set_mode_stack	UND_MODE, r0
>>> -	set_mode_stack	ABT_MODE, r0
>>> -	set_mode_stack	IRQ_MODE, r0
>>> -	set_mode_stack	FIQ_MODE, r0
>>> -
>>> -	msr	cpsr_cxsf, r2		@ back to svc mode
>>> -	isb
>>> -	mov	pc, lr
>>> +.text
>> Hm... now we've moved enable_vfp from .init to .text, and enable_vfp *is* called
>> from .init code, which doesn't fully match up with the commit message. Is the
>> actual reason for this change that the linker script for EFI will discard the
>> .init section? Maybe it's worth mentioning that in the commit message, because it
>> will explain this change better.
> Right, the .init section may not exist when linking with other linker
> scripts. I'll make the commit message more clear.
>
>> Or is it to align arm with arm64, where only
>> start is in the .init section?
>>
>>>  
>>>  enable_vfp:
>>>  	/* Enable full access to CP10 and CP11: */
>>> @@ -133,8 +108,6 @@ enable_vfp:
>>>  	vmsr	fpexc, r0
>>>  	mov	pc, lr
>>>  
>>> -.text
>>> -
>>>  .global get_mmu_off
>>>  get_mmu_off:
>>>  	ldr	r0, =auxinfo
>>> @@ -235,6 +208,39 @@ asm_mmu_disable:
>>>  
>>>  	mov     pc, lr
>>>  
>>> +/*
>>> + * Vectors
>>> + */
>>> +
>>> +.macro set_mode_stack mode, stack
>>> +	add	\stack, #S_FRAME_SIZE
>>> +	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
>>> +	isb
>>> +	mov	sp, \stack
>>> +.endm
>>> +
>>> +exceptions_init:
>>> +	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
>>> +	bic	r2, #CR_V		@ SCTLR.V := 0
>>> +	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
>>> +	ldr	r2, =vector_table
>>> +	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
>>> +
>>> +	mrs	r2, cpsr
>>> +
>>> +	/*
>>> +	 * Input r0 is the stack top, which is the exception stacks base
>>> +	 * The first frame is reserved for svc mode
>>> +	 */
>>> +	set_mode_stack	UND_MODE, r0
>>> +	set_mode_stack	ABT_MODE, r0
>>> +	set_mode_stack	IRQ_MODE, r0
>>> +	set_mode_stack	FIQ_MODE, r0
>>> +
>>> +	msr	cpsr_cxsf, r2		@ back to svc mode
>>> +	isb
>>> +	mov	pc, lr
>>> +
>>>  /*
>>>   * Vector stubs
>>>   * Simplified version of the Linux kernel implementation
>>> diff --git a/arm/cstart64.S b/arm/cstart64.S
>>> index 0a85338bcdae..d39cf4dfb99c 100644
>>> --- a/arm/cstart64.S
>>> +++ b/arm/cstart64.S
>>> @@ -89,10 +89,12 @@ start:
>>>  	msr	cpacr_el1, x4
>>>  
>>>  	/* set up exception handling */
>>> +	mov	x4, x0				// x0 is the addr of the dtb
>> I suppose changing exceptions_init to use x0 as a scratch register instead of x4
>> makes some sense if you look at it from the perspective of it being called from
>> secondary_entry, where all the functions use x0 as a scratch register. But it's
>> still called from start, where using x4 as a scratch register is preferred because
>> of the kernel boot protocol (x0-x3 are reserved).
>>
>> Is there an actual bug that this is supposed to fix (I looked for it and couldn't
>> figure it out) or is it just a cosmetic change?
> Now that exceptions_init isn't a private function of start (actually it
> hasn't been in a long time, considering secondary_entry calls it) I would
> like it to better conform to calling conventions. I guess I should have
> used x19 here instead of x4 to be 100% correct. Or, would you rather I
> just continue using x4 in exceptions_init in order to avoid the
> save/restore?

To be honest, for this patch, I think it would be best to leave exceptions_init
unchanged:

- We switch to using x0 like the rest of the code from secondary_entry, but
because of that we need to save and restore the DTB address from x0 in start, so I
don't think we've gained anything.
- It makes the diff larger.
- It runs the risk of introducing regressions (like all changes).

Maybe this can be left for a separate patch that changes code called from C to
follow aapcs64.

Thanks,
Alex

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup Andrew Jones
  2021-04-09 17:24   ` Nikos Nikoleris
@ 2021-04-14 15:19   ` Alexandru Elisei
  1 sibling, 0 replies; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-14 15:19 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/7/21 7:59 PM, Andrew Jones wrote:
> Consolidate our setup calls to reduce the amount we need to do from
> init::start. Also remove a couple of pointless comments from setup().

This is a good idea. Looks good to me, and I compiled the code for arm and arm64,
so it must be correct:

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>

Thanks,

Alex

>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  arm/cstart.S    | 6 ------
>  arm/cstart64.S  | 5 -----
>  lib/arm/setup.c | 7 +++++--
>  3 files changed, 5 insertions(+), 13 deletions(-)
>
> diff --git a/arm/cstart.S b/arm/cstart.S
> index 653ab1e8a141..731f841695ce 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -81,12 +81,7 @@ start:
>  	/* complete setup */
>  	pop	{r0-r1}
>  	bl	setup
> -	bl	get_mmu_off
> -	cmp	r0, #0
> -	bne	1f
> -	bl	setup_vm
>  
> -1:
>  	/* run the test */
>  	ldr	r0, =__argc
>  	ldr	r0, [r0]
> @@ -108,7 +103,6 @@ enable_vfp:
>  	vmsr	fpexc, r0
>  	mov	pc, lr
>  
> -.global get_mmu_off
>  get_mmu_off:
>  	ldr	r0, =auxinfo
>  	ldr	r0, [r0, #4]
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index d39cf4dfb99c..add60a2b4e74 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -95,11 +95,7 @@ start:
>  	/* complete setup */
>  	mov	x0, x4				// restore the addr of the dtb
>  	bl	setup
> -	bl	get_mmu_off
> -	cbnz	x0, 1f
> -	bl	setup_vm
>  
> -1:
>  	/* run the test */
>  	adrp	x0, __argc
>  	ldr	w0, [x0, :lo12:__argc]
> @@ -113,7 +109,6 @@ start:
>  
>  .text
>  
> -.globl get_mmu_off
>  get_mmu_off:
>  	adrp	x0, auxinfo
>  	ldr	x0, [x0, :lo12:auxinfo + 8]
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 751ba980000a..9c16f6004e9f 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -16,6 +16,8 @@
>  #include <alloc.h>
>  #include <alloc_phys.h>
>  #include <alloc_page.h>
> +#include <vmalloc.h>
> +#include <auxinfo.h>
>  #include <argv.h>
>  #include <asm/thread_info.h>
>  #include <asm/setup.h>
> @@ -233,7 +235,6 @@ void setup(const void *fdt)
>  		freemem += initrd_size;
>  	}
>  
> -	/* call init functions */
>  	mem_init(PAGE_ALIGN((unsigned long)freemem));
>  	cpu_init();
>  
> @@ -243,7 +244,6 @@ void setup(const void *fdt)
>  	/* mem_init must be called before io_init */
>  	io_init();
>  
> -	/* finish setup */
>  	timer_save_state();
>  
>  	ret = dt_get_bootargs(&bootargs);
> @@ -256,4 +256,7 @@ void setup(const void *fdt)
>  		memcpy(env, initrd, initrd_size);
>  		setup_env(env, initrd_size);
>  	}
> +
> +	if (!(auxinfo.flags & AUXINFO_MMU_OFF))
> +		setup_vm();
>  }

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region Andrew Jones
  2021-04-13 14:06   ` Nikos Nikoleris
@ 2021-04-14 15:42   ` Alexandru Elisei
  2021-04-15 13:09     ` Andrew Jones
  1 sibling, 1 reply; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-14 15:42 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/7/21 7:59 PM, Andrew Jones wrote:
> By providing a proper ioremap function, we can just rely on devices
> calling it for each region they need (as they already do) instead of
> mapping a big assumed I/O range. The persistent maps weirdness allows
> us to call setup_vm after io_init. Why don't we just call setup_vm
> before io_init, I hear you ask? Well, that's because tests like sieve
> want to start with the MMU off and later call setup_vm, and all the
> while have working I/O. Some unit tests are just really demanding...
>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  lib/arm/asm/io.h      |  6 ++++
>  lib/arm/asm/mmu-api.h |  1 +
>  lib/arm/asm/mmu.h     |  1 +
>  lib/arm/asm/page.h    |  2 ++
>  lib/arm/mmu.c         | 82 ++++++++++++++++++++++++++++++++++++++-----
>  lib/arm64/asm/io.h    |  6 ++++
>  lib/arm64/asm/mmu.h   |  1 +
>  lib/arm64/asm/page.h  |  2 ++
>  8 files changed, 93 insertions(+), 8 deletions(-)
>
> diff --git a/lib/arm/asm/io.h b/lib/arm/asm/io.h
> index ba3b0b2412ad..e4caa6ff5d1e 100644
> --- a/lib/arm/asm/io.h
> +++ b/lib/arm/asm/io.h
> @@ -77,6 +77,12 @@ static inline void __raw_writel(u32 val, volatile void __iomem *addr)
>  		     : "r" (val));
>  }
>  
> +#define ioremap ioremap
> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	return __ioremap(phys_addr, size);
> +}
> +
>  #define virt_to_phys virt_to_phys
>  static inline phys_addr_t virt_to_phys(const volatile void *x)
>  {
> diff --git a/lib/arm/asm/mmu-api.h b/lib/arm/asm/mmu-api.h
> index 05fc12b5afb8..b9a5a8f6b3c1 100644
> --- a/lib/arm/asm/mmu-api.h
> +++ b/lib/arm/asm/mmu-api.h
> @@ -17,6 +17,7 @@ extern void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>  extern void mmu_set_range_ptes(pgd_t *pgtable, uintptr_t virt_offset,
>  			       phys_addr_t phys_start, phys_addr_t phys_end,
>  			       pgprot_t prot);
> +extern void mmu_set_persistent_maps(pgd_t *pgtable);
>  extern pteval_t *mmu_get_pte(pgd_t *pgtable, uintptr_t vaddr);
>  extern void mmu_clear_user(pgd_t *pgtable, unsigned long vaddr);
>  #endif
> diff --git a/lib/arm/asm/mmu.h b/lib/arm/asm/mmu.h
> index 122874b8aebe..d88a4f16df42 100644
> --- a/lib/arm/asm/mmu.h
> +++ b/lib/arm/asm/mmu.h
> @@ -12,6 +12,7 @@
>  #define PTE_SHARED		L_PTE_SHARED
>  #define PTE_AF			PTE_EXT_AF
>  #define PTE_WBWA		L_PTE_MT_WRITEALLOC
> +#define PTE_UNCACHED		L_PTE_MT_UNCACHED
>  
>  /* See B3.18.7 TLB maintenance operations */
>  
> diff --git a/lib/arm/asm/page.h b/lib/arm/asm/page.h
> index 1fb5cd26ac66..8eb4a883808e 100644
> --- a/lib/arm/asm/page.h
> +++ b/lib/arm/asm/page.h
> @@ -47,5 +47,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>  extern phys_addr_t __virt_to_phys(unsigned long addr);
>  extern unsigned long __phys_to_virt(phys_addr_t addr);
>  
> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
> +
>  #endif /* !__ASSEMBLY__ */
>  #endif /* _ASMARM_PAGE_H_ */
> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> index 15eef007f256..a7b7ae51afe3 100644
> --- a/lib/arm/mmu.c
> +++ b/lib/arm/mmu.c
> @@ -11,6 +11,7 @@
>  #include <asm/mmu.h>
>  #include <asm/setup.h>
>  #include <asm/page.h>
> +#include <asm/io.h>
>  
>  #include "alloc_page.h"
>  #include "vmalloc.h"
> @@ -21,6 +22,57 @@
>  
>  extern unsigned long etext;
>  
> +#define MMU_MAX_PERSISTENT_MAPS 64
> +
> +struct mmu_persistent_map {
> +	uintptr_t virt_offset;
> +	phys_addr_t phys_start;
> +	phys_addr_t phys_end;
> +	pgprot_t prot;
> +	bool sect;
> +};
> +
> +static struct mmu_persistent_map mmu_persistent_maps[MMU_MAX_PERSISTENT_MAPS];
> +
> +static void
> +mmu_set_persistent_range(uintptr_t virt_offset, phys_addr_t phys_start,
> +			 phys_addr_t phys_end, pgprot_t prot, bool sect)
> +{
> +	int i;
> +
> +	assert(phys_end);
> +
> +	for (i = 0; i < MMU_MAX_PERSISTENT_MAPS; ++i) {
> +		if (!mmu_persistent_maps[i].phys_end)
> +			break;
> +	}
> +	assert(i < MMU_MAX_PERSISTENT_MAPS);
> +
> +	mmu_persistent_maps[i] = (struct mmu_persistent_map){
> +		.virt_offset = virt_offset,
> +		.phys_start = phys_start,
> +		.phys_end = phys_end,
> +		.prot = prot,
> +		.sect = sect,
> +	};
> +}
> +
> +void mmu_set_persistent_maps(pgd_t *pgtable)
> +{
> +	struct mmu_persistent_map *map;
> +
> +	for (map = &mmu_persistent_maps[0]; map->phys_end; ++map) {
> +		if (map->sect)
> +			mmu_set_range_sect(pgtable, map->virt_offset,
> +					   map->phys_start, map->phys_end,
> +					   map->prot);
> +		else
> +			mmu_set_range_ptes(pgtable, map->virt_offset,
> +					   map->phys_start, map->phys_end,
> +					   map->prot);
> +	}
> +}

I assume the purpose of all of this machinery is to add mappings to idmap that
were created before setup_mmu(). Or are you planning to use it for something else?

Why not allocate the idmap in __ioremap (if it's NULL) and add entries to it in
that function? Then setup_mmu() can allocate the idmap only if it's NULL, and the
mappings added by __ioremap would still be there.

Thanks,

Alex

> +
>  pgd_t *mmu_idmap;
>  
>  /* CPU 0 starts with disabled MMU */
> @@ -157,7 +209,6 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>  void *setup_mmu(phys_addr_t phys_end)
>  {
>  	uintptr_t code_end = (uintptr_t)&etext;
> -	struct mem_region *r;
>  
>  	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
>  	if (phys_end > (3ul << 30))
> @@ -172,13 +223,6 @@ void *setup_mmu(phys_addr_t phys_end)
>  
>  	mmu_idmap = alloc_page();
>  
> -	for (r = mem_regions; r->end; ++r) {
> -		if (!(r->flags & MR_F_IO))
> -			continue;
> -		mmu_set_range_sect(mmu_idmap, r->start, r->start, r->end,
> -				   __pgprot(PMD_SECT_UNCACHED | PMD_SECT_USER));
> -	}
> -
>  	/* armv8 requires code shared between EL1 and EL0 to be read-only */
>  	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
>  		PHYS_OFFSET, code_end,
> @@ -188,10 +232,32 @@ void *setup_mmu(phys_addr_t phys_end)
>  		code_end, phys_end,
>  		__pgprot(PTE_WBWA | PTE_USER));
>  
> +	mmu_set_persistent_maps(mmu_idmap);
> +
>  	mmu_enable(mmu_idmap);
>  	return mmu_idmap;
>  }
>  
> +void __iomem *__ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	phys_addr_t paddr_aligned = phys_addr & PAGE_MASK;
> +	phys_addr_t paddr_end = PAGE_ALIGN(phys_addr + size);
> +	pgprot_t prot = __pgprot(PTE_UNCACHED | PTE_USER);
> +
> +	assert(sizeof(long) == 8 || !(phys_addr >> 32));
> +
> +	mmu_set_persistent_range(paddr_aligned, paddr_aligned, paddr_end,
> +				 prot, false);
> +
> +	if (mmu_enabled()) {
> +		pgd_t *pgtable = current_thread_info()->pgtable;
> +		mmu_set_range_ptes(pgtable, paddr_aligned, paddr_aligned,
> +				   paddr_end, prot);
> +	}
> +
> +	return (void __iomem *)(unsigned long)phys_addr;
> +}
> +
>  phys_addr_t __virt_to_phys(unsigned long addr)
>  {
>  	if (mmu_enabled()) {
> diff --git a/lib/arm64/asm/io.h b/lib/arm64/asm/io.h
> index e0a03b250d5b..be19f471c0fa 100644
> --- a/lib/arm64/asm/io.h
> +++ b/lib/arm64/asm/io.h
> @@ -71,6 +71,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
>  	return val;
>  }
>  
> +#define ioremap ioremap
> +static inline void __iomem *ioremap(phys_addr_t phys_addr, size_t size)
> +{
> +	return __ioremap(phys_addr, size);
> +}
> +
>  #define virt_to_phys virt_to_phys
>  static inline phys_addr_t virt_to_phys(const volatile void *x)
>  {
> diff --git a/lib/arm64/asm/mmu.h b/lib/arm64/asm/mmu.h
> index 72d75eafc882..72371b2d9fe3 100644
> --- a/lib/arm64/asm/mmu.h
> +++ b/lib/arm64/asm/mmu.h
> @@ -8,6 +8,7 @@
>  #include <asm/barrier.h>
>  
>  #define PMD_SECT_UNCACHED	PMD_ATTRINDX(MT_DEVICE_nGnRE)
> +#define PTE_UNCACHED		PTE_ATTRINDX(MT_DEVICE_nGnRE)
>  #define PTE_WBWA		PTE_ATTRINDX(MT_NORMAL)
>  
>  static inline void flush_tlb_all(void)
> diff --git a/lib/arm64/asm/page.h b/lib/arm64/asm/page.h
> index ae4484b22114..d0fac6ea563d 100644
> --- a/lib/arm64/asm/page.h
> +++ b/lib/arm64/asm/page.h
> @@ -72,5 +72,7 @@ typedef struct { pteval_t pgprot; } pgprot_t;
>  extern phys_addr_t __virt_to_phys(unsigned long addr);
>  extern unsigned long __phys_to_virt(phys_addr_t addr);
>  
> +extern void *__ioremap(phys_addr_t phys_addr, size_t size);
> +
>  #endif /* !__ASSEMBLY__ */
>  #endif /* _ASMARM64_PAGE_H_ */

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler
  2021-04-14 15:15       ` Alexandru Elisei
@ 2021-04-15 13:03         ` Andrew Jones
  0 siblings, 0 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-15 13:03 UTC (permalink / raw)
  To: Alexandru Elisei; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

On Wed, Apr 14, 2021 at 04:15:10PM +0100, Alexandru Elisei wrote:
> Hi Drew,
> 
> On 4/14/21 9:59 AM, Andrew Jones wrote:
> > On Tue, Apr 13, 2021 at 05:34:24PM +0100, Alexandru Elisei wrote:
> >> Hi Drew,
> >>
> >> On 4/7/21 7:59 PM, Andrew Jones wrote:
> >>> Move secondary_entry helper functions out of .init and into .text,
> >>> since secondary_entry isn't run at "init" time.
> >> The tests aren't loaded using the loader, so as far as I can tell the reason for
> >> having an .init section is to make sure the code from the start label is put at
> >> offset 0 in the test binary. As long as the start label is kept at the beginning
> >> of the .init section, and the loader script places the section first, I don't see
> >> any issues with this change.
> >>
> >> The only hypothetical problem that I can think of is that the code from .init
> >> calls code from .text, and if the text section grows very large we might end up
> >> with a PC offset larger than what can be encoded in the BL instruction. That's
> >> unlikely to happen (the offset is 16MB for arm and 64MB for arm64), and the .init
> >> code already calls other functions (like setup) which are in .text, so we would
> >> have this problem regardless of this change. And the compiler will emit an error
> >> if that happens.
> >>
> >>> Signed-off-by: Andrew Jones <drjones@redhat.com>
> >>> ---
> >>>  arm/cstart.S   | 62 +++++++++++++++++++++++++++-----------------------
> >>>  arm/cstart64.S | 22 +++++++++++-------
> >>>  2 files changed, 48 insertions(+), 36 deletions(-)
> >>>
> >>> diff --git a/arm/cstart.S b/arm/cstart.S
> >>> index d88a98362940..653ab1e8a141 100644
> >>> --- a/arm/cstart.S
> >>> +++ b/arm/cstart.S
> >>> @@ -96,32 +96,7 @@ start:
> >>>  	bl	exit
> >>>  	b	halt
> >>>  
> >>> -
> >>> -.macro set_mode_stack mode, stack
> >>> -	add	\stack, #S_FRAME_SIZE
> >>> -	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> >>> -	isb
> >>> -	mov	sp, \stack
> >>> -.endm
> >>> -
> >>> -exceptions_init:
> >>> -	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> >>> -	bic	r2, #CR_V		@ SCTLR.V := 0
> >>> -	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> >>> -	ldr	r2, =vector_table
> >>> -	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> >>> -
> >>> -	mrs	r2, cpsr
> >>> -
> >>> -	/* first frame reserved for svc mode */
> >>> -	set_mode_stack	UND_MODE, r0
> >>> -	set_mode_stack	ABT_MODE, r0
> >>> -	set_mode_stack	IRQ_MODE, r0
> >>> -	set_mode_stack	FIQ_MODE, r0
> >>> -
> >>> -	msr	cpsr_cxsf, r2		@ back to svc mode
> >>> -	isb
> >>> -	mov	pc, lr
> >>> +.text
> >> Hm... now we've moved enable_vfp from .init to .text, and enable_vfp *is* called
> >> from .init code, which doesn't fully match up with the commit message. Is the
> >> actual reason for this change that the linker script for EFI will discard the
> >> .init section? Maybe it's worth mentioning that in the commit message, because it
> >> will explain this change better.
> > Right, the .init section may not exist when linking with other linker
> > scripts. I'll make the commit message more clear.
> >
> >> Or is it to align arm with arm64, where only
> >> start is in the .init section?
> >>
> >>>  
> >>>  enable_vfp:
> >>>  	/* Enable full access to CP10 and CP11: */
> >>> @@ -133,8 +108,6 @@ enable_vfp:
> >>>  	vmsr	fpexc, r0
> >>>  	mov	pc, lr
> >>>  
> >>> -.text
> >>> -
> >>>  .global get_mmu_off
> >>>  get_mmu_off:
> >>>  	ldr	r0, =auxinfo
> >>> @@ -235,6 +208,39 @@ asm_mmu_disable:
> >>>  
> >>>  	mov     pc, lr
> >>>  
> >>> +/*
> >>> + * Vectors
> >>> + */
> >>> +
> >>> +.macro set_mode_stack mode, stack
> >>> +	add	\stack, #S_FRAME_SIZE
> >>> +	msr	cpsr_c, #(\mode | PSR_I_BIT | PSR_F_BIT)
> >>> +	isb
> >>> +	mov	sp, \stack
> >>> +.endm
> >>> +
> >>> +exceptions_init:
> >>> +	mrc	p15, 0, r2, c1, c0, 0	@ read SCTLR
> >>> +	bic	r2, #CR_V		@ SCTLR.V := 0
> >>> +	mcr	p15, 0, r2, c1, c0, 0	@ write SCTLR
> >>> +	ldr	r2, =vector_table
> >>> +	mcr	p15, 0, r2, c12, c0, 0	@ write VBAR
> >>> +
> >>> +	mrs	r2, cpsr
> >>> +
> >>> +	/*
> >>> +	 * Input r0 is the stack top, which is the exception stacks base
> >>> +	 * The first frame is reserved for svc mode
> >>> +	 */
> >>> +	set_mode_stack	UND_MODE, r0
> >>> +	set_mode_stack	ABT_MODE, r0
> >>> +	set_mode_stack	IRQ_MODE, r0
> >>> +	set_mode_stack	FIQ_MODE, r0
> >>> +
> >>> +	msr	cpsr_cxsf, r2		@ back to svc mode
> >>> +	isb
> >>> +	mov	pc, lr
> >>> +
> >>>  /*
> >>>   * Vector stubs
> >>>   * Simplified version of the Linux kernel implementation
> >>> diff --git a/arm/cstart64.S b/arm/cstart64.S
> >>> index 0a85338bcdae..d39cf4dfb99c 100644
> >>> --- a/arm/cstart64.S
> >>> +++ b/arm/cstart64.S
> >>> @@ -89,10 +89,12 @@ start:
> >>>  	msr	cpacr_el1, x4
> >>>  
> >>>  	/* set up exception handling */
> >>> +	mov	x4, x0				// x0 is the addr of the dtb
> >> I suppose changing exceptions_init to use x0 as a scratch register instead of x4
> >> makes some sense if you look at it from the perspective of it being called from
> >> secondary_entry, where all the functions use x0 as a scratch register. But it's
> >> still called from start, where using x4 as a scratch register is preferred because
> >> of the kernel boot protocol (x0-x3 are reserved).
> >>
> >> Is there an actual bug that this is supposed to fix (I looked for it and couldn't
> >> figure it out) or is it just a cosmetic change?
> > Now that exceptions_init isn't a private function of start (actually it
> > hasn't been in a long time, considering secondary_entry calls it) I would
> > like it to better conform to calling conventions. I guess I should have
> > used x19 here instead of x4 to be 100% correct. Or, would you rather I
> > just continue using x4 in exceptions_init in order to avoid the
> > save/restore?
> 
> To be honest, for this patch, I think it would be best to leave exceptions_init
> unchanged:
> 
> - We switch to using x0 like the rest of the code from secondary_entry, but
> because of that we need to save and restore the DTB address from x0 in start, so I
> don't think we've gained anything.
> - It makes the diff larger.
> - It runs the risk of introducing regressions (like all changes).
> 
> Maybe this can be left for a separate patch that changes code called from C to
> follow aapcs64.
>

OK, I'll switch it back to x4 and add a comment.

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region
  2021-04-14 15:42   ` Alexandru Elisei
@ 2021-04-15 13:09     ` Andrew Jones
  0 siblings, 0 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-15 13:09 UTC (permalink / raw)
  To: Alexandru Elisei; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

On Wed, Apr 14, 2021 at 04:42:17PM +0100, Alexandru Elisei wrote:
> On 4/7/21 7:59 PM, Andrew Jones wrote:
> > +void mmu_set_persistent_maps(pgd_t *pgtable)
> > +{
> > +	struct mmu_persistent_map *map;
> > +
> > +	for (map = &mmu_persistent_maps[0]; map->phys_end; ++map) {
> > +		if (map->sect)
> > +			mmu_set_range_sect(pgtable, map->virt_offset,
> > +					   map->phys_start, map->phys_end,
> > +					   map->prot);
> > +		else
> > +			mmu_set_range_ptes(pgtable, map->virt_offset,
> > +					   map->phys_start, map->phys_end,
> > +					   map->prot);
> > +	}
> > +}
> 
> I assume the purpose of all of this machinery is to add mappings to idmap that
> were created before setup_mmu(). Or are you planning to use it for something else?
> 
> Why not allocate the idmap in __ioremap (if it's NULL) and add entries to it in
> that function? Then setup_mmu() can allocate the idmap only if it's NULL, and the
> mappings added by __ioremap would still be there.
>

Hi Alex,

I like your suggestion and will implement it that way for v2. If we ever
do need these mappings for anything else, then we can revisit this,
possibly stashing the mappings at the same time we add them to the idmap.

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions Andrew Jones
  2021-04-13 14:27   ` Nikos Nikoleris
@ 2021-04-15 15:48   ` Alexandru Elisei
  2021-04-15 17:11     ` Andrew Jones
  1 sibling, 1 reply; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-15 15:48 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/7/21 7:59 PM, Andrew Jones wrote:
> Rather than making too many assumptions about the memory layout
> in mmu code, just set up the page tables per the memory regions
> (which means putting all the memory layout assumptions in setup).
> To ensure we get the right default flags set we need to split the
> primary region into two regions for code and data.
>
> We still only expect the primary regions to be present, but the
> next patch will remove that assumption too.

Nitpick, but we still make assumptions about the memory layout:

- In setup_mmu(), we limit the maximum linear address to 3GiB, but on arm64 we can
have memory starting well above that.

- In mem_init(), we still have the predefined I/O regions.

I don't know if this is a rebasing error or intentional. If it's intentional, I
think it should be mentioned in the commit message, if only to say they will be
removed in a later patch (like you do with the primary region).

>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  lib/arm/asm/setup.h |  1 +
>  lib/arm/mmu.c       | 26 +++++++++++++++-----------
>  lib/arm/setup.c     | 22 ++++++++++++++--------
>  3 files changed, 30 insertions(+), 19 deletions(-)
>
> diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
> index c8afb2493f8d..210c14f818fb 100644
> --- a/lib/arm/asm/setup.h
> +++ b/lib/arm/asm/setup.h
> @@ -15,6 +15,7 @@ extern int nr_cpus;
>  
>  #define MR_F_PRIMARY		(1U << 0)
>  #define MR_F_IO			(1U << 1)
> +#define MR_F_CODE		(1U << 2)
>  #define MR_F_UNKNOWN		(1U << 31)
>  
>  struct mem_region {
> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> index a7b7ae51afe3..edd2b9da809b 100644
> --- a/lib/arm/mmu.c
> +++ b/lib/arm/mmu.c
> @@ -20,8 +20,6 @@
>  
>  #include <linux/compiler.h>
>  
> -extern unsigned long etext;
> -
>  #define MMU_MAX_PERSISTENT_MAPS 64
>  
>  struct mmu_persistent_map {
> @@ -208,7 +206,7 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>  
>  void *setup_mmu(phys_addr_t phys_end)
>  {
> -	uintptr_t code_end = (uintptr_t)&etext;
> +	struct mem_region *r;
>  
>  	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
>  	if (phys_end > (3ul << 30))
> @@ -223,14 +221,20 @@ void *setup_mmu(phys_addr_t phys_end)
>  
>  	mmu_idmap = alloc_page();
>  
> -	/* armv8 requires code shared between EL1 and EL0 to be read-only */
> -	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
> -		PHYS_OFFSET, code_end,
> -		__pgprot(PTE_WBWA | PTE_RDONLY | PTE_USER));
> -
> -	mmu_set_range_ptes(mmu_idmap, code_end,
> -		code_end, phys_end,
> -		__pgprot(PTE_WBWA | PTE_USER));
> +	for (r = mem_regions; r->end; ++r) {
> +		if (r->flags & MR_F_IO) {
> +			continue;
> +		} else if (r->flags & MR_F_CODE) {
> +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
> +			/* armv8 requires code shared between EL1 and EL0 to be read-only */
> +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> +					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
> +		} else {
> +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
> +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> +					   __pgprot(PTE_WBWA | PTE_USER));
> +		}
> +	}

This looks good.

>  
>  	mmu_set_persistent_maps(mmu_idmap);
>  
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 9c16f6004e9f..9da5d24b0be9 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -31,6 +31,7 @@
>  #define NR_INITIAL_MEM_REGIONS 16
>  
>  extern unsigned long stacktop;
> +extern unsigned long etext;
>  
>  struct timer_state __timer_state;
>  
> @@ -88,10 +89,12 @@ unsigned int mem_region_get_flags(phys_addr_t paddr)
>  
>  static void mem_init(phys_addr_t freemem_start)
>  {
> +	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
>  	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
> -	struct mem_region primary, mem = {
> +	struct mem_region mem = {
>  		.start = (phys_addr_t)-1,
>  	};
> +	struct mem_region *primary = NULL;
>  	phys_addr_t base, top;
>  	int nr_regs, nr_io = 0, i;
>  
> @@ -110,8 +113,6 @@ static void mem_init(phys_addr_t freemem_start)
>  	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
>  	assert(nr_regs > 0);
>  
> -	primary = (struct mem_region){ 0 };
> -
>  	for (i = 0; i < nr_regs; ++i) {
>  		struct mem_region *r = &mem_regions[nr_io + i];
>  
> @@ -123,7 +124,7 @@ static void mem_init(phys_addr_t freemem_start)
>  		 */
>  		if (freemem_start >= r->start && freemem_start < r->end) {
>  			r->flags |= MR_F_PRIMARY;

Here we mark mem_regions[nr_io + i] as primary...

> -			primary = *r;
> +			primary = r;
>  		}
>  
>  		/*
> @@ -135,13 +136,18 @@ static void mem_init(phys_addr_t freemem_start)
>  		if (r->end > mem.end)
>  			mem.end = r->end;
>  	}
> -	assert(primary.end != 0);
> +	assert(primary);
>  	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
>  
> -	__phys_offset = primary.start;	/* PHYS_OFFSET */
> -	__phys_end = primary.end;	/* PHYS_END */
> +	__phys_offset = primary->start;	/* PHYS_OFFSET */
> +	__phys_end = primary->end;	/* PHYS_END */
> +
> +	/* Split the primary region into two regions; code and data */
> +	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;

Here we mark mem as primary...

> +	mem_regions[nr_io + i] = mem;

And then we set mem_regions[nr_io + nr_regs] to mem, which I think means we can
end up with two primary memory regions. Am I missing something?

> +	primary->end = code_end, primary->flags |= MR_F_CODE;

Please consider splitting the assignments each on its own line, because it makes
the code so hard to read (and I assume really easy to miss if we ever change
something).

Thanks,

Alex

>  
> -	phys_alloc_init(freemem_start, primary.end - freemem_start);
> +	phys_alloc_init(freemem_start, __phys_end - freemem_start);
>  	phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES);
>  
>  	phys_alloc_get_unused(&base, &top);

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate " Andrew Jones
  2021-04-13 16:41   ` Nikos Nikoleris
@ 2021-04-15 16:59   ` Alexandru Elisei
  2021-04-15 17:25     ` Andrew Jones
  1 sibling, 1 reply; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-15 16:59 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/7/21 7:59 PM, Andrew Jones wrote:
> Keep as much memory layout assumptions as possible in init::start
> and a single setup function. This prepares us for calling setup()
> from different start functions which have been linked with different
> linker scripts. To do this, stacktop is only referenced from
> init::start, making freemem_start a parameter to setup(). We also
> split mem_init() into three parts, one that populates the mem regions
> per the DT, one that populates the mem regions per assumptions,
> and one that does the mem init. The concept of a primary region
> is dropped, but we add a sanity check for the absence of memory
> holes, because we don't know how to deal with them yet.
>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  arm/cstart.S        |   4 +-
>  arm/cstart64.S      |   2 +
>  arm/flat.lds        |  23 ++++++
>  lib/arm/asm/setup.h |   8 +--
>  lib/arm/mmu.c       |   2 -
>  lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
>  6 files changed, 123 insertions(+), 81 deletions(-)
>
> diff --git a/arm/cstart.S b/arm/cstart.S
> index 731f841695ce..14444124c43f 100644
> --- a/arm/cstart.S
> +++ b/arm/cstart.S
> @@ -80,7 +80,9 @@ start:
>  
>  	/* complete setup */
>  	pop	{r0-r1}
> -	bl	setup
> +	mov	r1, #0

Doesn't that mean that for arm, the second argument to setup() will be 0 instead
of stacktop?

> +	ldr	r2, =stacktop		@ r1,r2 is the base of free memory
> +	bl	setup			@ r0 is the addr of the dtb
>  
>  	/* run the test */
>  	ldr	r0, =__argc
> diff --git a/arm/cstart64.S b/arm/cstart64.S
> index add60a2b4e74..434723d4b45d 100644
> --- a/arm/cstart64.S
> +++ b/arm/cstart64.S
> @@ -94,6 +94,8 @@ start:
>  
>  	/* complete setup */
>  	mov	x0, x4				// restore the addr of the dtb
> +	adrp	x1, stacktop
> +	add	x1, x1, :lo12:stacktop		// x1 is the base of free memory

I think we already have stacktop in x5.

>  	bl	setup
>  
>  	/* run the test */
> diff --git a/arm/flat.lds b/arm/flat.lds
> index 6ed377c0eaa0..6fb459efb815 100644
> --- a/arm/flat.lds
> +++ b/arm/flat.lds
> @@ -1,3 +1,26 @@
> +/*
> + * init::start will pass stacktop to setup() as the base of free memory.
> + * setup() will then move the FDT and initrd to that base before calling
> + * mem_init(). With those movements and this linker script, we'll end up
> + * having the following memory layout:
> + *
> + *    +----------------------+   <-- top of physical memory
> + *    |                      |
> + *    ~                      ~
> + *    |                      |
> + *    +----------------------+   <-- top of initrd
> + *    |                      |
> + *    +----------------------+   <-- top of FDT
> + *    |                      |
> + *    +----------------------+   <-- top of cpu0's stack
> + *    |                      |
> + *    +----------------------+   <-- top of text/data/bss sections
> + *    |                      |
> + *    |                      |
> + *    +----------------------+   <-- load address
> + *    |                      |
> + *    +----------------------+   <-- physical address 0x0
> + */
>  
>  SECTIONS
>  {
> diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
> index 210c14f818fb..f0e70b119fb0 100644
> --- a/lib/arm/asm/setup.h
> +++ b/lib/arm/asm/setup.h
> @@ -13,9 +13,8 @@
>  extern u64 cpus[NR_CPUS];	/* per-cpu IDs (MPIDRs) */
>  extern int nr_cpus;
>  
> -#define MR_F_PRIMARY		(1U << 0)
> -#define MR_F_IO			(1U << 1)
> -#define MR_F_CODE		(1U << 2)
> +#define MR_F_IO			(1U << 0)
> +#define MR_F_CODE		(1U << 1)
>  #define MR_F_UNKNOWN		(1U << 31)
>  
>  struct mem_region {
> @@ -26,6 +25,7 @@ struct mem_region {
>  extern struct mem_region *mem_regions;
>  extern phys_addr_t __phys_offset, __phys_end;
>  
> +extern struct mem_region *mem_region_find(phys_addr_t paddr);
>  extern unsigned int mem_region_get_flags(phys_addr_t paddr);
>  
>  #define PHYS_OFFSET		(__phys_offset)
> @@ -35,6 +35,6 @@ extern unsigned int mem_region_get_flags(phys_addr_t paddr);
>  #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
>  #define SMP_CACHE_BYTES		L1_CACHE_BYTES
>  
> -void setup(const void *fdt);
> +void setup(const void *fdt, phys_addr_t freemem_start);
>  
>  #endif /* _ASMARM_SETUP_H_ */
> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> index edd2b9da809b..7cff22a12e86 100644
> --- a/lib/arm/mmu.c
> +++ b/lib/arm/mmu.c
> @@ -225,12 +225,10 @@ void *setup_mmu(phys_addr_t phys_end)

What happens now with init_alloc_vpage? We don't make sure that 3-4GiB is not in
the linear map, and from what I can tell when allocating using vmalloc_ops we can
end up changing the VA->PA of an existing linear mapping. I think that can break
code that is already using the VA.

>  		if (r->flags & MR_F_IO) {
>  			continue;
>  		} else if (r->flags & MR_F_CODE) {
> -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
>  			/* armv8 requires code shared between EL1 and EL0 to be read-only */
>  			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
>  					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
>  		} else {
> -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
>  			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
>  					   __pgprot(PTE_WBWA | PTE_USER));
>  		}
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 9da5d24b0be9..5cda2d919d2b 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -28,9 +28,9 @@
>  
>  #include "io.h"
>  
> -#define NR_INITIAL_MEM_REGIONS 16
> +#define MAX_DT_MEM_REGIONS	16
> +#define NR_EXTRA_MEM_REGIONS	16
>  
> -extern unsigned long stacktop;
>  extern unsigned long etext;
>  
>  struct timer_state __timer_state;
> @@ -41,7 +41,7 @@ u32 initrd_size;
>  u64 cpus[NR_CPUS] = { [0 ... NR_CPUS-1] = (u64)~0 };
>  int nr_cpus;
>  
> -static struct mem_region __initial_mem_regions[NR_INITIAL_MEM_REGIONS + 1];
> +static struct mem_region __initial_mem_regions[MAX_DT_MEM_REGIONS + NR_EXTRA_MEM_REGIONS];
>  struct mem_region *mem_regions = __initial_mem_regions;
>  phys_addr_t __phys_offset, __phys_end;
>  
> @@ -75,28 +75,62 @@ static void cpu_init(void)
>  	set_cpu_online(0, true);
>  }
>  
> -unsigned int mem_region_get_flags(phys_addr_t paddr)
> +static int mem_regions_next_index(void)
>  {
>  	struct mem_region *r;
> +	int n;
>  
> -	for (r = mem_regions; r->end; ++r) {
> -		if (paddr >= r->start && paddr < r->end)
> -			return r->flags;
> +	for (r = mem_regions, n = 0; r->end; ++r, ++n)
> +		;
> +	return n;
> +}
> +
> +static void mem_regions_get_dt_regions(void)
> +{
> +	struct dt_pbus_reg regs[MAX_DT_MEM_REGIONS];
> +	int nr_regs, i, n;
> +
> +	nr_regs = dt_get_memory_params(regs, MAX_DT_MEM_REGIONS);
> +	assert(nr_regs > 0);
> +
> +	n = mem_regions_next_index();
> +
> +	for (i = 0; i < nr_regs; ++i) {
> +		struct mem_region *r = &mem_regions[n + i];
> +		r->start = regs[i].addr;
> +		r->end = regs[i].addr + regs[i].size;
>  	}
> +}
> +
> +struct mem_region *mem_region_find(phys_addr_t paddr)
> +{
> +	struct mem_region *r;
> +
> +	for (r = mem_regions; r->end; ++r)

I guess this relies on the fact that from the DT we cannot have more than
MAX_DT_MEM_REGIONS, and from the assumed regions we have at most 5 (code + data +
3 I/O for arm64), but it looks a bit scary not checking for the bounds of a
statically allocated array. Same assumption throughout all the functions that
iterate through the array.

> +		if (paddr >= r->start && paddr < r->end)
> +			return r;
> +	return NULL;
> +}
>  
> -	return MR_F_UNKNOWN;
> +unsigned int mem_region_get_flags(phys_addr_t paddr)
> +{
> +	struct mem_region *r = mem_region_find(paddr);
> +	return r ? r->flags : MR_F_UNKNOWN;
>  }
>  
> -static void mem_init(phys_addr_t freemem_start)
> +static void mem_regions_add_assumed(void)
>  {
>  	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
> -	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
> -	struct mem_region mem = {
> -		.start = (phys_addr_t)-1,
> -	};
> -	struct mem_region *primary = NULL;
> -	phys_addr_t base, top;
> -	int nr_regs, nr_io = 0, i;
> +	int n = mem_regions_next_index();
> +	struct mem_region mem = {0}, *r;
> +
> +	r = mem_region_find(code_end - 1);
> +	assert(r);
> +
> +	/* Split the region with the code into two regions; code and data */
> +	mem.start = code_end, mem.end = r->end;
> +	mem_regions[n++] = mem;
> +	r->end = code_end, r->flags = MR_F_CODE;
>  
>  	/*
>  	 * mach-virt I/O regions:
> @@ -104,50 +138,47 @@ static void mem_init(phys_addr_t freemem_start)
>  	 *   - 512M at 256G (arm64, arm uses highmem=off)
>  	 *   - 512G at 512G (arm64, arm uses highmem=off)
>  	 */
> -	mem_regions[nr_io++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
> +	mem_regions[n++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
>  #ifdef __aarch64__
> -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
> +	mem_regions[n++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> +	mem_regions[n++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
>  #endif
> +}
>  
> -	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
> -	assert(nr_regs > 0);
> -
> -	for (i = 0; i < nr_regs; ++i) {
> -		struct mem_region *r = &mem_regions[nr_io + i];
> +static void mem_init(phys_addr_t freemem_start)
> +{
> +	phys_addr_t base, top;
> +	struct mem_region *freemem, *r, mem = {
> +		.start = (phys_addr_t)-1,
> +	};
>  
> -		r->start = regs[i].addr;
> -		r->end = regs[i].addr + regs[i].size;
> +	freemem = mem_region_find(freemem_start);
> +	assert(freemem && !(freemem->flags & (MR_F_IO | MR_F_CODE)));
>  
> -		/*
> -		 * pick the region we're in for our primary region
> -		 */
> -		if (freemem_start >= r->start && freemem_start < r->end) {
> -			r->flags |= MR_F_PRIMARY;
> -			primary = r;
> +	for (r = mem_regions; r->end; ++r) {
> +		assert(!(r->start & ~PHYS_MASK) && !((r->end - 1) & ~PHYS_MASK));

I don't think kvm-unit-tests needs *all* available memory to be mapped in order to
function correctly. As far as I can tell, setup_mmu() will map freemem->end as
phys_end, so I think the assert is only needed for the freemem region, but I admit
I'm a bit foggy when it comes to the memory allocators.

Thanks,

Alex

> +		if (!(r->flags & MR_F_IO)) {
> +			if (r->start < mem.start)
> +				mem.start = r->start;
> +			if (r->end > mem.end)
> +				mem.end = r->end;
>  		}
> -
> -		/*
> -		 * set the lowest and highest addresses found,
> -		 * ignoring potential gaps
> -		 */
> -		if (r->start < mem.start)
> -			mem.start = r->start;
> -		if (r->end > mem.end)
> -			mem.end = r->end;
>  	}
> -	assert(primary);
> -	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
> +	assert(mem.end);
> +
> +	/* Check for holes */
> +	r = mem_region_find(mem.start);
> +	while (r && r->end != mem.end)
> +		r = mem_region_find(r->end);
> +	assert(r);
>  
> -	__phys_offset = primary->start;	/* PHYS_OFFSET */
> -	__phys_end = primary->end;	/* PHYS_END */
> +	/* Ensure our selected freemem region is somewhere in our full range */
> +	assert(freemem_start >= mem.start && freemem->end <= mem.end);
>  
> -	/* Split the primary region into two regions; code and data */
> -	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
> -	mem_regions[nr_io + i] = mem;
> -	primary->end = code_end, primary->flags |= MR_F_CODE;
> +	__phys_offset = mem.start;	/* PHYS_OFFSET */
> +	__phys_end = mem.end;		/* PHYS_END */
>  
> -	phys_alloc_init(freemem_start, __phys_end - freemem_start);
> +	phys_alloc_init(freemem_start, freemem->end - freemem_start);
>  	phys_alloc_set_minimum_alignment(SMP_CACHE_BYTES);
>  
>  	phys_alloc_get_unused(&base, &top);
> @@ -197,35 +228,17 @@ static void timer_save_state(void)
>  	__timer_state.vtimer.irq_flags = fdt32_to_cpu(data[8]);
>  }
>  
> -void setup(const void *fdt)
> +void setup(const void *fdt, phys_addr_t freemem_start)
>  {
> -	void *freemem = &stacktop;
> +	void *freemem;
>  	const char *bootargs, *tmp;
>  	u32 fdt_size;
>  	int ret;
>  
> -	/*
> -	 * Before calling mem_init we need to move the fdt and initrd
> -	 * to safe locations. We move them to construct the memory
> -	 * map illustrated below:
> -	 *
> -	 *    +----------------------+   <-- top of physical memory
> -	 *    |                      |
> -	 *    ~                      ~
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of initrd
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of FDT
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of cpu0's stack
> -	 *    |                      |
> -	 *    +----------------------+   <-- top of text/data/bss sections,
> -	 *    |                      |       see arm/flat.lds
> -	 *    |                      |
> -	 *    +----------------------+   <-- load address
> -	 *    |                      |
> -	 *    +----------------------+
> -	 */
> +	assert(sizeof(long) == 8 || freemem_start < (3ul << 30));
> +	freemem = (void *)(unsigned long)freemem_start;
> +
> +	/* Move the FDT to the base of free memory */
>  	fdt_size = fdt_totalsize(fdt);
>  	ret = fdt_move(fdt, freemem, fdt_size);
>  	assert(ret == 0);
> @@ -233,6 +246,7 @@ void setup(const void *fdt)
>  	assert(ret == 0);
>  	freemem += fdt_size;
>  
> +	/* Move the initrd to the top of the FDT */
>  	ret = dt_get_initrd(&tmp, &initrd_size);
>  	assert(ret == 0 || ret == -FDT_ERR_NOTFOUND);
>  	if (ret == 0) {
> @@ -241,7 +255,10 @@ void setup(const void *fdt)
>  		freemem += initrd_size;
>  	}
>  
> +	mem_regions_get_dt_regions();
> +	mem_regions_add_assumed();
>  	mem_init(PAGE_ALIGN((unsigned long)freemem));
> +
>  	cpu_init();
>  
>  	/* cpu_init must be called before thread_info_init */

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init Andrew Jones
  2021-04-13 16:42   ` Nikos Nikoleris
@ 2021-04-15 17:03   ` Alexandru Elisei
  1 sibling, 0 replies; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-15 17:03 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/7/21 7:59 PM, Andrew Jones wrote:
> If there's no virtio-console / chr-testdev configured, then the user
> probably didn't want them. Just silently fail rather than stating
> the obvious.
>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  lib/chr-testdev.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/lib/chr-testdev.c b/lib/chr-testdev.c
> index 6890f63c8b29..b3c641a833fe 100644
> --- a/lib/chr-testdev.c
> +++ b/lib/chr-testdev.c
> @@ -54,11 +54,8 @@ void chr_testdev_init(void)
>  	int ret;
>  
>  	vcon = virtio_bind(VIRTIO_ID_CONSOLE);
> -	if (vcon == NULL) {
> -		printf("%s: %s: can't find a virtio-console\n",
> -				__func__, TESTDEV_NAME);
> +	if (vcon == NULL)

Yes, please. This finally removes the warning when using kvmtool:

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>

Thanks,

Alex

>  		return;
> -	}
>  
>  	ret = vcon->config->find_vqs(vcon, 2, vqs, NULL, io_names);
>  	if (ret < 0) {

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions
  2021-04-15 15:48   ` Alexandru Elisei
@ 2021-04-15 17:11     ` Andrew Jones
  2021-04-19 15:09       ` Alexandru Elisei
  0 siblings, 1 reply; 38+ messages in thread
From: Andrew Jones @ 2021-04-15 17:11 UTC (permalink / raw)
  To: Alexandru Elisei; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

On Thu, Apr 15, 2021 at 04:48:41PM +0100, Alexandru Elisei wrote:
> Hi Drew,
> 
> On 4/7/21 7:59 PM, Andrew Jones wrote:
> > Rather than making too many assumptions about the memory layout
> > in mmu code, just set up the page tables per the memory regions
> > (which means putting all the memory layout assumptions in setup).
> > To ensure we get the right default flags set we need to split the
> > primary region into two regions for code and data.
> >
> > We still only expect the primary regions to be present, but the
> > next patch will remove that assumption too.
> 
> Nitpick, but we still make assumptions about the memory layout:
> 
> - In setup_mmu(), we limit the maximum linear address to 3GiB, but on arm64 we can
> have memory starting well above that.

True. I need to try and improve that (at least the comment in setup_mmu).
For now, I may just call out that we still assume 3G-4G is available for
our vmalloc region.

> 
> - In mem_init(), we still have the predefined I/O regions.

The commit message points this out. Also, the commit summary specifies
'mmu' for the component from which we're removing the assumptions.

> 
> I don't know if this is a rebasing error or intentional. If it's intentional, I
> think it should be mentioned in the commit message, if only to say they will be
> removed in a later patch (like you do with the primary region).

We never remove all assumptions from mem setup in setup.c. We just make it
easier to bypass.

> 
> >
> > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > ---
> >  lib/arm/asm/setup.h |  1 +
> >  lib/arm/mmu.c       | 26 +++++++++++++++-----------
> >  lib/arm/setup.c     | 22 ++++++++++++++--------
> >  3 files changed, 30 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
> > index c8afb2493f8d..210c14f818fb 100644
> > --- a/lib/arm/asm/setup.h
> > +++ b/lib/arm/asm/setup.h
> > @@ -15,6 +15,7 @@ extern int nr_cpus;
> >  
> >  #define MR_F_PRIMARY		(1U << 0)
> >  #define MR_F_IO			(1U << 1)
> > +#define MR_F_CODE		(1U << 2)
> >  #define MR_F_UNKNOWN		(1U << 31)
> >  
> >  struct mem_region {
> > diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> > index a7b7ae51afe3..edd2b9da809b 100644
> > --- a/lib/arm/mmu.c
> > +++ b/lib/arm/mmu.c
> > @@ -20,8 +20,6 @@
> >  
> >  #include <linux/compiler.h>
> >  
> > -extern unsigned long etext;
> > -
> >  #define MMU_MAX_PERSISTENT_MAPS 64
> >  
> >  struct mmu_persistent_map {
> > @@ -208,7 +206,7 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
> >  
> >  void *setup_mmu(phys_addr_t phys_end)
> >  {
> > -	uintptr_t code_end = (uintptr_t)&etext;
> > +	struct mem_region *r;
> >  
> >  	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
> >  	if (phys_end > (3ul << 30))
> > @@ -223,14 +221,20 @@ void *setup_mmu(phys_addr_t phys_end)
> >  
> >  	mmu_idmap = alloc_page();
> >  
> > -	/* armv8 requires code shared between EL1 and EL0 to be read-only */
> > -	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
> > -		PHYS_OFFSET, code_end,
> > -		__pgprot(PTE_WBWA | PTE_RDONLY | PTE_USER));
> > -
> > -	mmu_set_range_ptes(mmu_idmap, code_end,
> > -		code_end, phys_end,
> > -		__pgprot(PTE_WBWA | PTE_USER));
> > +	for (r = mem_regions; r->end; ++r) {
> > +		if (r->flags & MR_F_IO) {
> > +			continue;
> > +		} else if (r->flags & MR_F_CODE) {
> > +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
> > +			/* armv8 requires code shared between EL1 and EL0 to be read-only */
> > +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> > +					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
> > +		} else {
> > +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
> > +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> > +					   __pgprot(PTE_WBWA | PTE_USER));
> > +		}
> > +	}
> 
> This looks good.
> 
> >  
> >  	mmu_set_persistent_maps(mmu_idmap);
> >  
> > diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> > index 9c16f6004e9f..9da5d24b0be9 100644
> > --- a/lib/arm/setup.c
> > +++ b/lib/arm/setup.c
> > @@ -31,6 +31,7 @@
> >  #define NR_INITIAL_MEM_REGIONS 16
> >  
> >  extern unsigned long stacktop;
> > +extern unsigned long etext;
> >  
> >  struct timer_state __timer_state;
> >  
> > @@ -88,10 +89,12 @@ unsigned int mem_region_get_flags(phys_addr_t paddr)
> >  
> >  static void mem_init(phys_addr_t freemem_start)
> >  {
> > +	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
> >  	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
> > -	struct mem_region primary, mem = {
> > +	struct mem_region mem = {
> >  		.start = (phys_addr_t)-1,
> >  	};
> > +	struct mem_region *primary = NULL;
> >  	phys_addr_t base, top;
> >  	int nr_regs, nr_io = 0, i;
> >  
> > @@ -110,8 +113,6 @@ static void mem_init(phys_addr_t freemem_start)
> >  	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
> >  	assert(nr_regs > 0);
> >  
> > -	primary = (struct mem_region){ 0 };
> > -
> >  	for (i = 0; i < nr_regs; ++i) {
> >  		struct mem_region *r = &mem_regions[nr_io + i];
> >  
> > @@ -123,7 +124,7 @@ static void mem_init(phys_addr_t freemem_start)
> >  		 */
> >  		if (freemem_start >= r->start && freemem_start < r->end) {
> >  			r->flags |= MR_F_PRIMARY;
> 
> Here we mark mem_regions[nr_io + i] as primary...
> 
> > -			primary = *r;
> > +			primary = r;
> >  		}
> >  
> >  		/*
> > @@ -135,13 +136,18 @@ static void mem_init(phys_addr_t freemem_start)
> >  		if (r->end > mem.end)
> >  			mem.end = r->end;
> >  	}
> > -	assert(primary.end != 0);
> > +	assert(primary);
> >  	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
> >  
> > -	__phys_offset = primary.start;	/* PHYS_OFFSET */
> > -	__phys_end = primary.end;	/* PHYS_END */
> > +	__phys_offset = primary->start;	/* PHYS_OFFSET */
> > +	__phys_end = primary->end;	/* PHYS_END */
> > +
> > +	/* Split the primary region into two regions; code and data */
> > +	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
> 
> Here we mark mem as primary...

Right, mem is now 

 {
  .start = code_end,
  .end = primary->end,
  .flags = MR_F_PRIMARY
 }

> 
> > +	mem_regions[nr_io + i] = mem;
> 
> And then we set mem_regions[nr_io + nr_regs] to mem, which I think means we can
> end up with two primary memory regions. Am I missing something?
> 
> > +	primary->end = code_end, primary->flags |= MR_F_CODE;

And now primary is

 {
  .start = <the original primary start>,
  .end = code_end,
  .flags = MR_F_PRIMARY|MR_F_CODE,
 }

So there are two primary regions, one for data, one for code. Note, that
we know code_end is within the boundaries of the old full primary region.
All we did was split the region into two.

> 
> Please consider splitting the assignments each on its own line, because it makes
> the code so hard to read (and I assume really easy to miss if we ever change
> something).

Sure

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-15 16:59   ` Alexandru Elisei
@ 2021-04-15 17:25     ` Andrew Jones
  2021-04-19 15:56       ` Alexandru Elisei
  0 siblings, 1 reply; 38+ messages in thread
From: Andrew Jones @ 2021-04-15 17:25 UTC (permalink / raw)
  To: Alexandru Elisei; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

On Thu, Apr 15, 2021 at 05:59:19PM +0100, Alexandru Elisei wrote:
> Hi Drew,
> 
> On 4/7/21 7:59 PM, Andrew Jones wrote:
> > Keep as much memory layout assumptions as possible in init::start
> > and a single setup function. This prepares us for calling setup()
> > from different start functions which have been linked with different
> > linker scripts. To do this, stacktop is only referenced from
> > init::start, making freemem_start a parameter to setup(). We also
> > split mem_init() into three parts, one that populates the mem regions
> > per the DT, one that populates the mem regions per assumptions,
> > and one that does the mem init. The concept of a primary region
> > is dropped, but we add a sanity check for the absence of memory
> > holes, because we don't know how to deal with them yet.
> >
> > Signed-off-by: Andrew Jones <drjones@redhat.com>
> > ---
> >  arm/cstart.S        |   4 +-
> >  arm/cstart64.S      |   2 +
> >  arm/flat.lds        |  23 ++++++
> >  lib/arm/asm/setup.h |   8 +--
> >  lib/arm/mmu.c       |   2 -
> >  lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
> >  6 files changed, 123 insertions(+), 81 deletions(-)
> >
> > diff --git a/arm/cstart.S b/arm/cstart.S
> > index 731f841695ce..14444124c43f 100644
> > --- a/arm/cstart.S
> > +++ b/arm/cstart.S
> > @@ -80,7 +80,9 @@ start:
> >  
> >  	/* complete setup */
> >  	pop	{r0-r1}
> > -	bl	setup
> > +	mov	r1, #0
> 
> Doesn't that mean that for arm, the second argument to setup() will be 0 instead
> of stacktop?

The second argument is 64-bit, but we assume the upper 32 are zero.

> 
> > +	ldr	r2, =stacktop		@ r1,r2 is the base of free memory
> > +	bl	setup			@ r0 is the addr of the dtb
> >  
> >  	/* run the test */
> >  	ldr	r0, =__argc
> > diff --git a/arm/cstart64.S b/arm/cstart64.S
> > index add60a2b4e74..434723d4b45d 100644
> > --- a/arm/cstart64.S
> > +++ b/arm/cstart64.S
> > @@ -94,6 +94,8 @@ start:
> >  
> >  	/* complete setup */
> >  	mov	x0, x4				// restore the addr of the dtb
> > +	adrp	x1, stacktop
> > +	add	x1, x1, :lo12:stacktop		// x1 is the base of free memory
> 
> I think we already have stacktop in x5.

Oh yeah, ever since we added zero_range. I'll use it.

> 
> >  	bl	setup
> >  
> >  	/* run the test */
> > diff --git a/arm/flat.lds b/arm/flat.lds
> > index 6ed377c0eaa0..6fb459efb815 100644
> > --- a/arm/flat.lds
> > +++ b/arm/flat.lds
> > @@ -1,3 +1,26 @@
> > +/*
> > + * init::start will pass stacktop to setup() as the base of free memory.
> > + * setup() will then move the FDT and initrd to that base before calling
> > + * mem_init(). With those movements and this linker script, we'll end up
> > + * having the following memory layout:
> > + *
> > + *    +----------------------+   <-- top of physical memory
> > + *    |                      |
> > + *    ~                      ~
> > + *    |                      |
> > + *    +----------------------+   <-- top of initrd
> > + *    |                      |
> > + *    +----------------------+   <-- top of FDT
> > + *    |                      |
> > + *    +----------------------+   <-- top of cpu0's stack
> > + *    |                      |
> > + *    +----------------------+   <-- top of text/data/bss sections
> > + *    |                      |
> > + *    |                      |
> > + *    +----------------------+   <-- load address
> > + *    |                      |
> > + *    +----------------------+   <-- physical address 0x0
> > + */
> >  
> >  SECTIONS
> >  {
> > diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
> > index 210c14f818fb..f0e70b119fb0 100644
> > --- a/lib/arm/asm/setup.h
> > +++ b/lib/arm/asm/setup.h
> > @@ -13,9 +13,8 @@
> >  extern u64 cpus[NR_CPUS];	/* per-cpu IDs (MPIDRs) */
> >  extern int nr_cpus;
> >  
> > -#define MR_F_PRIMARY		(1U << 0)
> > -#define MR_F_IO			(1U << 1)
> > -#define MR_F_CODE		(1U << 2)
> > +#define MR_F_IO			(1U << 0)
> > +#define MR_F_CODE		(1U << 1)
> >  #define MR_F_UNKNOWN		(1U << 31)
> >  
> >  struct mem_region {
> > @@ -26,6 +25,7 @@ struct mem_region {
> >  extern struct mem_region *mem_regions;
> >  extern phys_addr_t __phys_offset, __phys_end;
> >  
> > +extern struct mem_region *mem_region_find(phys_addr_t paddr);
> >  extern unsigned int mem_region_get_flags(phys_addr_t paddr);
> >  
> >  #define PHYS_OFFSET		(__phys_offset)
> > @@ -35,6 +35,6 @@ extern unsigned int mem_region_get_flags(phys_addr_t paddr);
> >  #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
> >  #define SMP_CACHE_BYTES		L1_CACHE_BYTES
> >  
> > -void setup(const void *fdt);
> > +void setup(const void *fdt, phys_addr_t freemem_start);
> >  
> >  #endif /* _ASMARM_SETUP_H_ */
> > diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
> > index edd2b9da809b..7cff22a12e86 100644
> > --- a/lib/arm/mmu.c
> > +++ b/lib/arm/mmu.c
> > @@ -225,12 +225,10 @@ void *setup_mmu(phys_addr_t phys_end)
> 
> What happens now with init_alloc_vpage? We don't make sure that 3-4GiB is not in
> the linear map, and from what I can tell when allocating using vmalloc_ops we can
> end up changing the VA->PA of an existing linear mapping. I think that can break
> code that is already using the VA.

Yup, that's what I was referring to in my reply to the last patch. We need
to deal with this 3-4G range issue, but for this series I may just make
the assumption more clear.

> 
> >  		if (r->flags & MR_F_IO) {
> >  			continue;
> >  		} else if (r->flags & MR_F_CODE) {
> > -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
> >  			/* armv8 requires code shared between EL1 and EL0 to be read-only */
> >  			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> >  					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
> >  		} else {
> > -			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
> >  			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
> >  					   __pgprot(PTE_WBWA | PTE_USER));
> >  		}
> > diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> > index 9da5d24b0be9..5cda2d919d2b 100644
> > --- a/lib/arm/setup.c
> > +++ b/lib/arm/setup.c
> > @@ -28,9 +28,9 @@
> >  
> >  #include "io.h"
> >  
> > -#define NR_INITIAL_MEM_REGIONS 16
> > +#define MAX_DT_MEM_REGIONS	16
> > +#define NR_EXTRA_MEM_REGIONS	16
> >  
> > -extern unsigned long stacktop;
> >  extern unsigned long etext;
> >  
> >  struct timer_state __timer_state;
> > @@ -41,7 +41,7 @@ u32 initrd_size;
> >  u64 cpus[NR_CPUS] = { [0 ... NR_CPUS-1] = (u64)~0 };
> >  int nr_cpus;
> >  
> > -static struct mem_region __initial_mem_regions[NR_INITIAL_MEM_REGIONS + 1];
> > +static struct mem_region __initial_mem_regions[MAX_DT_MEM_REGIONS + NR_EXTRA_MEM_REGIONS];
> >  struct mem_region *mem_regions = __initial_mem_regions;
> >  phys_addr_t __phys_offset, __phys_end;
> >  
> > @@ -75,28 +75,62 @@ static void cpu_init(void)
> >  	set_cpu_online(0, true);
> >  }
> >  
> > -unsigned int mem_region_get_flags(phys_addr_t paddr)
> > +static int mem_regions_next_index(void)
> >  {
> >  	struct mem_region *r;
> > +	int n;
> >  
> > -	for (r = mem_regions; r->end; ++r) {
> > -		if (paddr >= r->start && paddr < r->end)
> > -			return r->flags;
> > +	for (r = mem_regions, n = 0; r->end; ++r, ++n)
> > +		;
> > +	return n;
> > +}
> > +
> > +static void mem_regions_get_dt_regions(void)
> > +{
> > +	struct dt_pbus_reg regs[MAX_DT_MEM_REGIONS];
> > +	int nr_regs, i, n;
> > +
> > +	nr_regs = dt_get_memory_params(regs, MAX_DT_MEM_REGIONS);
> > +	assert(nr_regs > 0);
> > +
> > +	n = mem_regions_next_index();
> > +
> > +	for (i = 0; i < nr_regs; ++i) {
> > +		struct mem_region *r = &mem_regions[n + i];
> > +		r->start = regs[i].addr;
> > +		r->end = regs[i].addr + regs[i].size;
> >  	}
> > +}
> > +
> > +struct mem_region *mem_region_find(phys_addr_t paddr)
> > +{
> > +	struct mem_region *r;
> > +
> > +	for (r = mem_regions; r->end; ++r)
> 
> I guess this relies on the fact that from the DT we cannot have more than
> MAX_DT_MEM_REGIONS, and from the assumed regions we have at most 5 (code + data +
> 3 I/O for arm64), but it looks a bit scary not checking for the bounds of a
> statically allocated array. Same assumption throughout all the functions that
> iterate through the array.

Oops, I accidentally dropped the '+ 1' in the array size that I used to
have. The '+ 1' ensures we always have a zero element at the end, allowing
r->end to always be a safe stop condition.

> 
> > +		if (paddr >= r->start && paddr < r->end)
> > +			return r;
> > +	return NULL;
> > +}
> >  
> > -	return MR_F_UNKNOWN;
> > +unsigned int mem_region_get_flags(phys_addr_t paddr)
> > +{
> > +	struct mem_region *r = mem_region_find(paddr);
> > +	return r ? r->flags : MR_F_UNKNOWN;
> >  }
> >  
> > -static void mem_init(phys_addr_t freemem_start)
> > +static void mem_regions_add_assumed(void)
> >  {
> >  	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
> > -	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
> > -	struct mem_region mem = {
> > -		.start = (phys_addr_t)-1,
> > -	};
> > -	struct mem_region *primary = NULL;
> > -	phys_addr_t base, top;
> > -	int nr_regs, nr_io = 0, i;
> > +	int n = mem_regions_next_index();
> > +	struct mem_region mem = {0}, *r;
> > +
> > +	r = mem_region_find(code_end - 1);
> > +	assert(r);
> > +
> > +	/* Split the region with the code into two regions; code and data */
> > +	mem.start = code_end, mem.end = r->end;
> > +	mem_regions[n++] = mem;
> > +	r->end = code_end, r->flags = MR_F_CODE;
> >  
> >  	/*
> >  	 * mach-virt I/O regions:
> > @@ -104,50 +138,47 @@ static void mem_init(phys_addr_t freemem_start)
> >  	 *   - 512M at 256G (arm64, arm uses highmem=off)
> >  	 *   - 512G at 512G (arm64, arm uses highmem=off)
> >  	 */
> > -	mem_regions[nr_io++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
> > +	mem_regions[n++] = (struct mem_region){ 0, (1ul << 30), MR_F_IO };
> >  #ifdef __aarch64__
> > -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> > -	mem_regions[nr_io++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
> > +	mem_regions[n++] = (struct mem_region){ (1ul << 38), (1ul << 38) | (1ul << 29), MR_F_IO };
> > +	mem_regions[n++] = (struct mem_region){ (1ul << 39), (1ul << 40), MR_F_IO };
> >  #endif
> > +}
> >  
> > -	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
> > -	assert(nr_regs > 0);
> > -
> > -	for (i = 0; i < nr_regs; ++i) {
> > -		struct mem_region *r = &mem_regions[nr_io + i];
> > +static void mem_init(phys_addr_t freemem_start)
> > +{
> > +	phys_addr_t base, top;
> > +	struct mem_region *freemem, *r, mem = {
> > +		.start = (phys_addr_t)-1,
> > +	};
> >  
> > -		r->start = regs[i].addr;
> > -		r->end = regs[i].addr + regs[i].size;
> > +	freemem = mem_region_find(freemem_start);
> > +	assert(freemem && !(freemem->flags & (MR_F_IO | MR_F_CODE)));
> >  
> > -		/*
> > -		 * pick the region we're in for our primary region
> > -		 */
> > -		if (freemem_start >= r->start && freemem_start < r->end) {
> > -			r->flags |= MR_F_PRIMARY;
> > -			primary = r;
> > +	for (r = mem_regions; r->end; ++r) {
> > +		assert(!(r->start & ~PHYS_MASK) && !((r->end - 1) & ~PHYS_MASK));
> 
> I don't think kvm-unit-tests needs *all* available memory to be mapped in order to
> function correctly. As far as I can tell, setup_mmu() will map freemem->end as
> phys_end, so I think the assert is only needed for the freemem region, but I admit
> I'm a bit foggy when it comes to the memory allocators.
>

Nope, we don't need all the available memory and we don't need to care if
the memory we don't map doesn't fit within our assumptions. I'll change
the assert to only check about the region we plan to map.

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions
  2021-04-15 17:11     ` Andrew Jones
@ 2021-04-19 15:09       ` Alexandru Elisei
  0 siblings, 0 replies; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-19 15:09 UTC (permalink / raw)
  To: Andrew Jones; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/15/21 6:11 PM, Andrew Jones wrote:
> On Thu, Apr 15, 2021 at 04:48:41PM +0100, Alexandru Elisei wrote:
>> Hi Drew,
>>
>> On 4/7/21 7:59 PM, Andrew Jones wrote:
>>> Rather than making too many assumptions about the memory layout
>>> in mmu code, just set up the page tables per the memory regions
>>> (which means putting all the memory layout assumptions in setup).
>>> To ensure we get the right default flags set we need to split the
>>> primary region into two regions for code and data.
>>>
>>> We still only expect the primary regions to be present, but the
>>> next patch will remove that assumption too.
>> Nitpick, but we still make assumptions about the memory layout:
>>
>> - In setup_mmu(), we limit the maximum linear address to 3GiB, but on arm64 we can
>> have memory starting well above that.
> True. I need to try and improve that (at least the comment in setup_mmu).
> For now, I may just call out that we still assume 3G-4G is available for
> our vmalloc region.
>
>> - In mem_init(), we still have the predefined I/O regions.
> The commit message points this out. Also, the commit summary specifies
> 'mmu' for the component from which we're removing the assumptions.

You're right, I have managed to miss that.

>
>> I don't know if this is a rebasing error or intentional. If it's intentional, I
>> think it should be mentioned in the commit message, if only to say they will be
>> removed in a later patch (like you do with the primary region).
> We never remove all assumptions from mem setup in setup.c. We just make it
> easier to bypass.

Yes, same as above, I missed the fact that this commit targets only setup_mmu(),
sorry for the noise.

>
>>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>>> ---
>>>  lib/arm/asm/setup.h |  1 +
>>>  lib/arm/mmu.c       | 26 +++++++++++++++-----------
>>>  lib/arm/setup.c     | 22 ++++++++++++++--------
>>>  3 files changed, 30 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/lib/arm/asm/setup.h b/lib/arm/asm/setup.h
>>> index c8afb2493f8d..210c14f818fb 100644
>>> --- a/lib/arm/asm/setup.h
>>> +++ b/lib/arm/asm/setup.h
>>> @@ -15,6 +15,7 @@ extern int nr_cpus;
>>>  
>>>  #define MR_F_PRIMARY		(1U << 0)
>>>  #define MR_F_IO			(1U << 1)
>>> +#define MR_F_CODE		(1U << 2)
>>>  #define MR_F_UNKNOWN		(1U << 31)
>>>  
>>>  struct mem_region {
>>> diff --git a/lib/arm/mmu.c b/lib/arm/mmu.c
>>> index a7b7ae51afe3..edd2b9da809b 100644
>>> --- a/lib/arm/mmu.c
>>> +++ b/lib/arm/mmu.c
>>> @@ -20,8 +20,6 @@
>>>  
>>>  #include <linux/compiler.h>
>>>  
>>> -extern unsigned long etext;
>>> -
>>>  #define MMU_MAX_PERSISTENT_MAPS 64
>>>  
>>>  struct mmu_persistent_map {
>>> @@ -208,7 +206,7 @@ void mmu_set_range_sect(pgd_t *pgtable, uintptr_t virt_offset,
>>>  
>>>  void *setup_mmu(phys_addr_t phys_end)
>>>  {
>>> -	uintptr_t code_end = (uintptr_t)&etext;
>>> +	struct mem_region *r;
>>>  
>>>  	/* 0G-1G = I/O, 1G-3G = identity, 3G-4G = vmalloc */
>>>  	if (phys_end > (3ul << 30))
>>> @@ -223,14 +221,20 @@ void *setup_mmu(phys_addr_t phys_end)
>>>  
>>>  	mmu_idmap = alloc_page();
>>>  
>>> -	/* armv8 requires code shared between EL1 and EL0 to be read-only */
>>> -	mmu_set_range_ptes(mmu_idmap, PHYS_OFFSET,
>>> -		PHYS_OFFSET, code_end,
>>> -		__pgprot(PTE_WBWA | PTE_RDONLY | PTE_USER));
>>> -
>>> -	mmu_set_range_ptes(mmu_idmap, code_end,
>>> -		code_end, phys_end,
>>> -		__pgprot(PTE_WBWA | PTE_USER));
>>> +	for (r = mem_regions; r->end; ++r) {
>>> +		if (r->flags & MR_F_IO) {
>>> +			continue;
>>> +		} else if (r->flags & MR_F_CODE) {
>>> +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected code region");
>>> +			/* armv8 requires code shared between EL1 and EL0 to be read-only */
>>> +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
>>> +					   __pgprot(PTE_WBWA | PTE_USER | PTE_RDONLY));
>>> +		} else {
>>> +			assert_msg(r->flags & MR_F_PRIMARY, "Unexpected data region");
>>> +			mmu_set_range_ptes(mmu_idmap, r->start, r->start, r->end,
>>> +					   __pgprot(PTE_WBWA | PTE_USER));
>>> +		}
>>> +	}
>> This looks good.
>>
>>>  
>>>  	mmu_set_persistent_maps(mmu_idmap);
>>>  
>>> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
>>> index 9c16f6004e9f..9da5d24b0be9 100644
>>> --- a/lib/arm/setup.c
>>> +++ b/lib/arm/setup.c
>>> @@ -31,6 +31,7 @@
>>>  #define NR_INITIAL_MEM_REGIONS 16
>>>  
>>>  extern unsigned long stacktop;
>>> +extern unsigned long etext;
>>>  
>>>  struct timer_state __timer_state;
>>>  
>>> @@ -88,10 +89,12 @@ unsigned int mem_region_get_flags(phys_addr_t paddr)
>>>  
>>>  static void mem_init(phys_addr_t freemem_start)
>>>  {
>>> +	phys_addr_t code_end = (phys_addr_t)(unsigned long)&etext;
>>>  	struct dt_pbus_reg regs[NR_INITIAL_MEM_REGIONS];
>>> -	struct mem_region primary, mem = {
>>> +	struct mem_region mem = {
>>>  		.start = (phys_addr_t)-1,
>>>  	};
>>> +	struct mem_region *primary = NULL;
>>>  	phys_addr_t base, top;
>>>  	int nr_regs, nr_io = 0, i;
>>>  
>>> @@ -110,8 +113,6 @@ static void mem_init(phys_addr_t freemem_start)
>>>  	nr_regs = dt_get_memory_params(regs, NR_INITIAL_MEM_REGIONS - nr_io);
>>>  	assert(nr_regs > 0);
>>>  
>>> -	primary = (struct mem_region){ 0 };
>>> -
>>>  	for (i = 0; i < nr_regs; ++i) {
>>>  		struct mem_region *r = &mem_regions[nr_io + i];
>>>  
>>> @@ -123,7 +124,7 @@ static void mem_init(phys_addr_t freemem_start)
>>>  		 */
>>>  		if (freemem_start >= r->start && freemem_start < r->end) {
>>>  			r->flags |= MR_F_PRIMARY;
>> Here we mark mem_regions[nr_io + i] as primary...
>>
>>> -			primary = *r;
>>> +			primary = r;
>>>  		}
>>>  
>>>  		/*
>>> @@ -135,13 +136,18 @@ static void mem_init(phys_addr_t freemem_start)
>>>  		if (r->end > mem.end)
>>>  			mem.end = r->end;
>>>  	}
>>> -	assert(primary.end != 0);
>>> +	assert(primary);
>>>  	assert(!(mem.start & ~PHYS_MASK) && !((mem.end - 1) & ~PHYS_MASK));
>>>  
>>> -	__phys_offset = primary.start;	/* PHYS_OFFSET */
>>> -	__phys_end = primary.end;	/* PHYS_END */
>>> +	__phys_offset = primary->start;	/* PHYS_OFFSET */
>>> +	__phys_end = primary->end;	/* PHYS_END */
>>> +
>>> +	/* Split the primary region into two regions; code and data */
>>> +	mem.start = code_end, mem.end = primary->end, mem.flags = MR_F_PRIMARY;
>> Here we mark mem as primary...
> Right, mem is now 
>
>  {
>   .start = code_end,
>   .end = primary->end,
>   .flags = MR_F_PRIMARY
>  }
>
>>> +	mem_regions[nr_io + i] = mem;
>> And then we set mem_regions[nr_io + nr_regs] to mem, which I think means we can
>> end up with two primary memory regions. Am I missing something?
>>
>>> +	primary->end = code_end, primary->flags |= MR_F_CODE;
> And now primary is
>
>  {
>   .start = <the original primary start>,
>   .end = code_end,
>   .flags = MR_F_PRIMARY|MR_F_CODE,
>  }
>
> So there are two primary regions, one for data, one for code. Note, that
> we know code_end is within the boundaries of the old full primary region.
> All we did was split the region into two.

Yes, you are right, my reading comprehension seems to have taken a hit lately, I
misunderstood what the code is doing. The code looks fine, now that I hope I have
read it correctly. Will give it another thorough review after the assignment changes.

Thanks,

Alex

>
>> Please consider splitting the assignments each on its own line, because it makes
>> the code so hard to read (and I assume really easy to miss if we ever change
>> something).
> Sure
>
> Thanks,
> drew
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-15 17:25     ` Andrew Jones
@ 2021-04-19 15:56       ` Alexandru Elisei
  2021-04-19 15:59         ` Alexandru Elisei
  2021-04-19 17:53         ` Andrew Jones
  0 siblings, 2 replies; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-19 15:56 UTC (permalink / raw)
  To: Andrew Jones; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/15/21 6:25 PM, Andrew Jones wrote:
> On Thu, Apr 15, 2021 at 05:59:19PM +0100, Alexandru Elisei wrote:
>> Hi Drew,
>>
>> On 4/7/21 7:59 PM, Andrew Jones wrote:
>>> Keep as much memory layout assumptions as possible in init::start
>>> and a single setup function. This prepares us for calling setup()
>>> from different start functions which have been linked with different
>>> linker scripts. To do this, stacktop is only referenced from
>>> init::start, making freemem_start a parameter to setup(). We also
>>> split mem_init() into three parts, one that populates the mem regions
>>> per the DT, one that populates the mem regions per assumptions,
>>> and one that does the mem init. The concept of a primary region
>>> is dropped, but we add a sanity check for the absence of memory
>>> holes, because we don't know how to deal with them yet.
>>>
>>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>>> ---
>>>  arm/cstart.S        |   4 +-
>>>  arm/cstart64.S      |   2 +
>>>  arm/flat.lds        |  23 ++++++
>>>  lib/arm/asm/setup.h |   8 +--
>>>  lib/arm/mmu.c       |   2 -
>>>  lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
>>>  6 files changed, 123 insertions(+), 81 deletions(-)
>>>
>>> diff --git a/arm/cstart.S b/arm/cstart.S
>>> index 731f841695ce..14444124c43f 100644
>>> --- a/arm/cstart.S
>>> +++ b/arm/cstart.S
>>> @@ -80,7 +80,9 @@ start:
>>>  
>>>  	/* complete setup */
>>>  	pop	{r0-r1}
>>> -	bl	setup
>>> +	mov	r1, #0
>> Doesn't that mean that for arm, the second argument to setup() will be 0 instead
>> of stacktop?
> The second argument is 64-bit, but we assume the upper 32 are zero.

I didn't realize that phys_addr_t is 64bit.

According to ARM IHI 0042F, page 15:

"A double-word sized type is passed in two consecutive registers (e.g., r0 and r1,
or r2 and r3). The content of the registers is as if the value had been loaded
from memory representation with a single LDM instruction."

I think r3 should be zeroed, not r1. r2 and r3 represent the 64bit value. arm is
little endian, so the least significant 32bits will be in r2 and the most
significant bits will be in r3. I can't figure out why r3 is zero, but moving the
value 1 into r3 just before calling setup will make the assert that freemem_start
< 3GiB fail.

Thanks,

Alex


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-19 15:56       ` Alexandru Elisei
@ 2021-04-19 15:59         ` Alexandru Elisei
  2021-04-19 17:53         ` Andrew Jones
  1 sibling, 0 replies; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-19 15:59 UTC (permalink / raw)
  To: Andrew Jones; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/19/21 4:56 PM, Alexandru Elisei wrote:
> Hi Drew,
>
> On 4/15/21 6:25 PM, Andrew Jones wrote:
>> On Thu, Apr 15, 2021 at 05:59:19PM +0100, Alexandru Elisei wrote:
>>> Hi Drew,
>>>
>>> On 4/7/21 7:59 PM, Andrew Jones wrote:
>>>> Keep as much memory layout assumptions as possible in init::start
>>>> and a single setup function. This prepares us for calling setup()
>>>> from different start functions which have been linked with different
>>>> linker scripts. To do this, stacktop is only referenced from
>>>> init::start, making freemem_start a parameter to setup(). We also
>>>> split mem_init() into three parts, one that populates the mem regions
>>>> per the DT, one that populates the mem regions per assumptions,
>>>> and one that does the mem init. The concept of a primary region
>>>> is dropped, but we add a sanity check for the absence of memory
>>>> holes, because we don't know how to deal with them yet.
>>>>
>>>> Signed-off-by: Andrew Jones <drjones@redhat.com>
>>>> ---
>>>>  arm/cstart.S        |   4 +-
>>>>  arm/cstart64.S      |   2 +
>>>>  arm/flat.lds        |  23 ++++++
>>>>  lib/arm/asm/setup.h |   8 +--
>>>>  lib/arm/mmu.c       |   2 -
>>>>  lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
>>>>  6 files changed, 123 insertions(+), 81 deletions(-)
>>>>
>>>> diff --git a/arm/cstart.S b/arm/cstart.S
>>>> index 731f841695ce..14444124c43f 100644
>>>> --- a/arm/cstart.S
>>>> +++ b/arm/cstart.S
>>>> @@ -80,7 +80,9 @@ start:
>>>>  
>>>>  	/* complete setup */
>>>>  	pop	{r0-r1}
>>>> -	bl	setup
>>>> +	mov	r1, #0
>>> Doesn't that mean that for arm, the second argument to setup() will be 0 instead
>>> of stacktop?
>> The second argument is 64-bit, but we assume the upper 32 are zero.
> I didn't realize that phys_addr_t is 64bit.
>
> According to ARM IHI 0042F, page 15:
>
> "A double-word sized type is passed in two consecutive registers (e.g., r0 and r1,
> or r2 and r3). The content of the registers is as if the value had been loaded
> from memory representation with a single LDM instruction."
>
> I think r3 should be zeroed, not r1. r2 and r3 represent the 64bit value. arm is
> little endian, so the least significant 32bits will be in r2 and the most
> significant bits will be in r3. I can't figure out why r3 is zero, but moving the

Correction: r3 is zero because KVM zeroes the general purpose registers on vcpu reset.

Thanks,

Alex


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc
  2021-04-07 18:59 ` [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc Andrew Jones
  2021-04-09 17:46   ` Nikos Nikoleris
@ 2021-04-19 16:33   ` Alexandru Elisei
  2021-04-19 18:13     ` Andrew Jones
  1 sibling, 1 reply; 38+ messages in thread
From: Alexandru Elisei @ 2021-04-19 16:33 UTC (permalink / raw)
  To: Andrew Jones, kvm; +Cc: nikos.nikoleris, andre.przywara, eric.auger

Hi Drew,

On 4/7/21 7:59 PM, Andrew Jones wrote:
> The method can also be smc and it will be when running on bare metal.
>
> Signed-off-by: Andrew Jones <drjones@redhat.com>
> ---
>  arm/selftest.c     | 34 +++++++---------------------------
>  lib/arm/asm/psci.h |  9 +++++++--
>  lib/arm/psci.c     | 17 +++++++++++++++--
>  lib/arm/setup.c    | 22 ++++++++++++++++++++++
>  4 files changed, 51 insertions(+), 31 deletions(-)
>
> diff --git a/arm/selftest.c b/arm/selftest.c
> index 4495b161cdd5..9f459ed3d571 100644
> --- a/arm/selftest.c
> +++ b/arm/selftest.c
> @@ -400,33 +400,13 @@ static void check_vectors(void *arg __unused)
>  	exit(report_summary());
>  }
>  
> -static bool psci_check(void)
> +static void psci_print(void)
>  {
> -	const struct fdt_property *method;
> -	int node, len, ver;
> -
> -	node = fdt_node_offset_by_compatible(dt_fdt(), -1, "arm,psci-0.2");
> -	if (node < 0) {
> -		printf("PSCI v0.2 compatibility required\n");
> -		return false;
> -	}
> -
> -	method = fdt_get_property(dt_fdt(), node, "method", &len);
> -	if (method == NULL) {
> -		printf("bad psci device tree node\n");
> -		return false;
> -	}
> -
> -	if (len < 4 || strcmp(method->data, "hvc") != 0) {
> -		printf("psci method must be hvc\n");
> -		return false;
> -	}
> -
> -	ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
> -	printf("PSCI version %d.%d\n", PSCI_VERSION_MAJOR(ver),
> -				       PSCI_VERSION_MINOR(ver));
> -
> -	return true;
> +	int ver = psci_invoke(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0);
> +	report_info("PSCI version: %d.%d", PSCI_VERSION_MAJOR(ver),
> +					  PSCI_VERSION_MINOR(ver));
> +	report_info("PSCI method: %s", psci_invoke == psci_invoke_hvc ?
> +				       "hvc" : "smc");
>  }
>  
>  static void cpu_report(void *data __unused)
> @@ -465,7 +445,7 @@ int main(int argc, char **argv)
>  
>  	} else if (strcmp(argv[1], "smp") == 0) {
>  
> -		report(psci_check(), "PSCI version");
> +		psci_print();
>  		on_cpus(cpu_report, NULL);
>  		while (!cpumask_full(&ready))
>  			cpu_relax();
> diff --git a/lib/arm/asm/psci.h b/lib/arm/asm/psci.h
> index 7b956bf5987d..e385ce27f5d1 100644
> --- a/lib/arm/asm/psci.h
> +++ b/lib/arm/asm/psci.h
> @@ -3,8 +3,13 @@
>  #include <libcflat.h>
>  #include <linux/psci.h>
>  
> -extern int psci_invoke(unsigned long function_id, unsigned long arg0,
> -		       unsigned long arg1, unsigned long arg2);
> +typedef int (*psci_invoke_fn)(unsigned long function_id, unsigned long arg0,

function_id is 32bits. sizeof(unsigned long) is 64 bits for arm64.

> +			      unsigned long arg1, unsigned long arg2);
> +extern psci_invoke_fn psci_invoke;
> +extern int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
> +			   unsigned long arg1, unsigned long arg2);
> +extern int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
> +			   unsigned long arg1, unsigned long arg2);
>  extern int psci_cpu_on(unsigned long cpuid, unsigned long entry_point);
>  extern void psci_system_reset(void);
>  extern int cpu_psci_cpu_boot(unsigned int cpu);
> diff --git a/lib/arm/psci.c b/lib/arm/psci.c
> index 936c83948b6a..46300f30822c 100644
> --- a/lib/arm/psci.c
> +++ b/lib/arm/psci.c
> @@ -11,9 +11,11 @@
>  #include <asm/page.h>
>  #include <asm/smp.h>
>  
> +psci_invoke_fn psci_invoke;

In setup(), we set the conduit after we call assert() several time. If the asert()
fails, then psci_system_off() will end up calling a NULL function. Maybe there
should be some sort of check for that?

> +
>  __attribute__((noinline))
> -int psci_invoke(unsigned long function_id, unsigned long arg0,
> -		unsigned long arg1, unsigned long arg2)
> +int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
> +		    unsigned long arg1, unsigned long arg2)
>  {
>  	asm volatile(
>  		"hvc #0"
> @@ -22,6 +24,17 @@ int psci_invoke(unsigned long function_id, unsigned long arg0,
>  	return function_id;
>  }
>  
> +__attribute__((noinline))
> +int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
> +		    unsigned long arg1, unsigned long arg2)
> +{
> +	asm volatile(
> +		"smc #0"
> +	: "+r" (function_id)
> +	: "r" (arg0), "r" (arg1), "r" (arg2));
> +	return function_id;

I haven't been able to figure out what prevents the compiler from shuffling the
arguments around before executing the inline assembly, such that x0-x3 doesn't
contain the arguments in the order we are expecting.

Some excerpts from the extended asm help page [1] that make me believe that the
compiler doesn't provide any guarantees:

"If you must use a specific register, but your Machine Constraints do not provide
sufficient control to select the specific register you want, local register
variables may provide a solution"

"Using the generic ‘r’ constraint instead of a constraint for a specific register
allows the compiler to pick the register to use, which can result in more
efficient code."

Same with psci_invoke_hvc(). Doing both in assembly (like Linux) should be
sufficient and fairly straightforward.

[1] https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Extended-Asm

> +}
> +
>  int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
>  {
>  #ifdef __arm__
> diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> index 5cda2d919d2b..e595a9e5a167 100644
> --- a/lib/arm/setup.c
> +++ b/lib/arm/setup.c
> @@ -25,6 +25,7 @@
>  #include <asm/processor.h>
>  #include <asm/smp.h>
>  #include <asm/timer.h>
> +#include <asm/psci.h>
>  
>  #include "io.h"
>  
> @@ -55,6 +56,26 @@ int mpidr_to_cpu(uint64_t mpidr)
>  	return -1;
>  }
>  
> +static void psci_set_conduit(void)
> +{
> +	const void *fdt = dt_fdt();
> +	const struct fdt_property *method;
> +	int node, len;
> +
> +	node = fdt_node_offset_by_compatible(fdt, -1, "arm,psci-0.2");
> +	assert_msg(node >= 0, "PSCI v0.2 compatibility required");
> +
> +	method = fdt_get_property(fdt, node, "method", &len);
> +	assert(method != NULL && len == 4);
> +
> +	if (strcmp(method->data, "hvc") == 0)
> +		psci_invoke = psci_invoke_hvc;
> +	else if (strcmp(method->data, "smc") == 0)
> +		psci_invoke = psci_invoke_smc;
> +	else
> +		assert_msg(false, "Unknown PSCI conduit: %s", method->data);
> +}

Any particular reason for doing this here instead of in psci.c? This looks like
something that belongs to that file, but that might just be my personal preference.

Thanks,

Alex

> +
>  static void cpu_set(int fdtnode __unused, u64 regval, void *info __unused)
>  {
>  	int cpu = nr_cpus++;
> @@ -259,6 +280,7 @@ void setup(const void *fdt, phys_addr_t freemem_start)
>  	mem_regions_add_assumed();
>  	mem_init(PAGE_ALIGN((unsigned long)freemem));
>  
> +	psci_set_conduit();
>  	cpu_init();
>  
>  	/* cpu_init must be called before thread_info_init */

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate memory layout assumptions
  2021-04-19 15:56       ` Alexandru Elisei
  2021-04-19 15:59         ` Alexandru Elisei
@ 2021-04-19 17:53         ` Andrew Jones
  1 sibling, 0 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-19 17:53 UTC (permalink / raw)
  To: Alexandru Elisei; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

On Mon, Apr 19, 2021 at 04:56:33PM +0100, Alexandru Elisei wrote:
> Hi Drew,
> 
> On 4/15/21 6:25 PM, Andrew Jones wrote:
> > On Thu, Apr 15, 2021 at 05:59:19PM +0100, Alexandru Elisei wrote:
> >> Hi Drew,
> >>
> >> On 4/7/21 7:59 PM, Andrew Jones wrote:
> >>> Keep as much memory layout assumptions as possible in init::start
> >>> and a single setup function. This prepares us for calling setup()
> >>> from different start functions which have been linked with different
> >>> linker scripts. To do this, stacktop is only referenced from
> >>> init::start, making freemem_start a parameter to setup(). We also
> >>> split mem_init() into three parts, one that populates the mem regions
> >>> per the DT, one that populates the mem regions per assumptions,
> >>> and one that does the mem init. The concept of a primary region
> >>> is dropped, but we add a sanity check for the absence of memory
> >>> holes, because we don't know how to deal with them yet.
> >>>
> >>> Signed-off-by: Andrew Jones <drjones@redhat.com>
> >>> ---
> >>>  arm/cstart.S        |   4 +-
> >>>  arm/cstart64.S      |   2 +
> >>>  arm/flat.lds        |  23 ++++++
> >>>  lib/arm/asm/setup.h |   8 +--
> >>>  lib/arm/mmu.c       |   2 -
> >>>  lib/arm/setup.c     | 165 ++++++++++++++++++++++++--------------------
> >>>  6 files changed, 123 insertions(+), 81 deletions(-)
> >>>
> >>> diff --git a/arm/cstart.S b/arm/cstart.S
> >>> index 731f841695ce..14444124c43f 100644
> >>> --- a/arm/cstart.S
> >>> +++ b/arm/cstart.S
> >>> @@ -80,7 +80,9 @@ start:
> >>>  
> >>>  	/* complete setup */
> >>>  	pop	{r0-r1}
> >>> -	bl	setup
> >>> +	mov	r1, #0
> >> Doesn't that mean that for arm, the second argument to setup() will be 0 instead
> >> of stacktop?
> > The second argument is 64-bit, but we assume the upper 32 are zero.
> 
> I didn't realize that phys_addr_t is 64bit.
> 
> According to ARM IHI 0042F, page 15:
> 
> "A double-word sized type is passed in two consecutive registers (e.g., r0 and r1,
> or r2 and r3). The content of the registers is as if the value had been loaded
> from memory representation with a single LDM instruction."
> 
> I think r3 should be zeroed, not r1. r2 and r3 represent the 64bit value. arm is
> little endian, so the least significant 32bits will be in r2 and the most
> significant bits will be in r3.

Thanks for this. It looks like I managed to mess it up two ways. The
registers must be r2,r3 and r3 has the high bits. I'll fix it for v2.

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc
  2021-04-19 16:33   ` Alexandru Elisei
@ 2021-04-19 18:13     ` Andrew Jones
  0 siblings, 0 replies; 38+ messages in thread
From: Andrew Jones @ 2021-04-19 18:13 UTC (permalink / raw)
  To: Alexandru Elisei; +Cc: kvm, nikos.nikoleris, andre.przywara, eric.auger

On Mon, Apr 19, 2021 at 05:33:37PM +0100, Alexandru Elisei wrote:
> On 4/7/21 7:59 PM, Andrew Jones wrote:
> > +psci_invoke_fn psci_invoke;
> 
> In setup(), we set the conduit after we call assert() several time. If the asert()
> fails, then psci_system_off() will end up calling a NULL function. Maybe there
> should be some sort of check for that?

I can initialize psci_invoke to something that will fail in a more obvious
manner.

> 
> > +
> >  __attribute__((noinline))
> > -int psci_invoke(unsigned long function_id, unsigned long arg0,
> > -		unsigned long arg1, unsigned long arg2)
> > +int psci_invoke_hvc(unsigned long function_id, unsigned long arg0,
> > +		    unsigned long arg1, unsigned long arg2)
> >  {
> >  	asm volatile(
> >  		"hvc #0"
> > @@ -22,6 +24,17 @@ int psci_invoke(unsigned long function_id, unsigned long arg0,
> >  	return function_id;
> >  }
> >  
> > +__attribute__((noinline))
> > +int psci_invoke_smc(unsigned long function_id, unsigned long arg0,
> > +		    unsigned long arg1, unsigned long arg2)
> > +{
> > +	asm volatile(
> > +		"smc #0"
> > +	: "+r" (function_id)
> > +	: "r" (arg0), "r" (arg1), "r" (arg2));
> > +	return function_id;
> 
> I haven't been able to figure out what prevents the compiler from shuffling the
> arguments around before executing the inline assembly, such that x0-x3 doesn't
> contain the arguments in the order we are expecting.

We know the arguments will be in r0-r3 because of the noinline and that
shuffling them wouldn't make much sense, but I agree that this is in the
realm of [too] fragile assumptions.

> 
> Some excerpts from the extended asm help page [1] that make me believe that the
> compiler doesn't provide any guarantees:
> 
> "If you must use a specific register, but your Machine Constraints do not provide
> sufficient control to select the specific register you want, local register
> variables may provide a solution"
> 
> "Using the generic ‘r’ constraint instead of a constraint for a specific register
> allows the compiler to pick the register to use, which can result in more
> efficient code."
> 
> Same with psci_invoke_hvc(). Doing both in assembly (like Linux) should be
> sufficient and fairly straightforward.

OK, I'll just use assembly to avoid the assumptions.

> 
> [1] https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Extended-Asm
> 
> > +}
> > +
> >  int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
> >  {
> >  #ifdef __arm__
> > diff --git a/lib/arm/setup.c b/lib/arm/setup.c
> > index 5cda2d919d2b..e595a9e5a167 100644
> > --- a/lib/arm/setup.c
> > +++ b/lib/arm/setup.c
> > @@ -25,6 +25,7 @@
> >  #include <asm/processor.h>
> >  #include <asm/smp.h>
> >  #include <asm/timer.h>
> > +#include <asm/psci.h>
> >  
> >  #include "io.h"
> >  
> > @@ -55,6 +56,26 @@ int mpidr_to_cpu(uint64_t mpidr)
> >  	return -1;
> >  }
> >  
> > +static void psci_set_conduit(void)
> > +{
> > +	const void *fdt = dt_fdt();
> > +	const struct fdt_property *method;
> > +	int node, len;
> > +
> > +	node = fdt_node_offset_by_compatible(fdt, -1, "arm,psci-0.2");
> > +	assert_msg(node >= 0, "PSCI v0.2 compatibility required");
> > +
> > +	method = fdt_get_property(fdt, node, "method", &len);
> > +	assert(method != NULL && len == 4);
> > +
> > +	if (strcmp(method->data, "hvc") == 0)
> > +		psci_invoke = psci_invoke_hvc;
> > +	else if (strcmp(method->data, "smc") == 0)
> > +		psci_invoke = psci_invoke_smc;
> > +	else
> > +		assert_msg(false, "Unknown PSCI conduit: %s", method->data);
> > +}
> 
> Any particular reason for doing this here instead of in psci.c? This looks like
> something that belongs to that file, but that might just be my personal preference.

I don't have a strong preference on this, so I'll move it.

Thanks,
drew


^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, back to index

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-07 18:59 [PATCH kvm-unit-tests 0/8] arm/arm64: Prepare for target-efi Andrew Jones
2021-04-07 18:59 ` [PATCH kvm-unit-tests 1/8] arm/arm64: Reorganize cstart assembler Andrew Jones
2021-04-09 17:18   ` Nikos Nikoleris
2021-04-09 17:28     ` Andrew Jones
2021-04-13 16:34   ` Alexandru Elisei
2021-04-14  8:59     ` Andrew Jones
2021-04-14 15:15       ` Alexandru Elisei
2021-04-15 13:03         ` Andrew Jones
2021-04-07 18:59 ` [PATCH kvm-unit-tests 2/8] arm/arm64: Move setup_vm into setup Andrew Jones
2021-04-09 17:24   ` Nikos Nikoleris
2021-04-14 15:19   ` Alexandru Elisei
2021-04-07 18:59 ` [PATCH kvm-unit-tests 3/8] pci-testdev: ioremap regions Andrew Jones
2021-04-13 14:12   ` Nikos Nikoleris
2021-04-07 18:59 ` [PATCH kvm-unit-tests 4/8] arm/arm64: mmu: Stop mapping an assumed IO region Andrew Jones
2021-04-13 14:06   ` Nikos Nikoleris
2021-04-14 15:42   ` Alexandru Elisei
2021-04-15 13:09     ` Andrew Jones
2021-04-07 18:59 ` [PATCH kvm-unit-tests 5/8] arm/arm64: mmu: Remove memory layout assumptions Andrew Jones
2021-04-13 14:27   ` Nikos Nikoleris
2021-04-15 15:48   ` Alexandru Elisei
2021-04-15 17:11     ` Andrew Jones
2021-04-19 15:09       ` Alexandru Elisei
2021-04-07 18:59 ` [PATCH kvm-unit-tests 6/8] arm/arm64: setup: Consolidate " Andrew Jones
2021-04-13 16:41   ` Nikos Nikoleris
2021-04-14  9:03     ` Andrew Jones
2021-04-15 16:59   ` Alexandru Elisei
2021-04-15 17:25     ` Andrew Jones
2021-04-19 15:56       ` Alexandru Elisei
2021-04-19 15:59         ` Alexandru Elisei
2021-04-19 17:53         ` Andrew Jones
2021-04-07 18:59 ` [PATCH kvm-unit-tests 7/8] chr-testdev: Silently fail init Andrew Jones
2021-04-13 16:42   ` Nikos Nikoleris
2021-04-15 17:03   ` Alexandru Elisei
2021-04-07 18:59 ` [PATCH kvm-unit-tests 8/8] arm/arm64: psci: don't assume method is hvc Andrew Jones
2021-04-09 17:46   ` Nikos Nikoleris
2021-04-14  9:06     ` Andrew Jones
2021-04-19 16:33   ` Alexandru Elisei
2021-04-19 18:13     ` Andrew Jones

KVM Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/kvm/0 kvm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 kvm kvm/ https://lore.kernel.org/kvm \
		kvm@vger.kernel.org
	public-inbox-index kvm

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.kvm


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git