All of lore.kernel.org
 help / color / mirror / Atom feed
* [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages
@ 2021-06-11 14:06 Claudio Imbrenda
  2021-06-11 14:06 ` [kvm-unit-tests PATCH v5 1/7] s390x: lib: add and use macros for control register bits Claudio Imbrenda
                   ` (7 more replies)
  0 siblings, 8 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:06 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Introduce support for large (1M) and huge (2G) pages.

Add a simple testcase for EDAT1 and EDAT2.

v4->v5
* fixed some typos and comment style issues
* introduced enum pgt_level, switched all functions to use it

v3->v4
* replace macros in patch 5 with a union representing TEID fields
* clear the teid in expect_pgm_int and clear_pgm_int
* update testcase to use expect_pgm_int, remove expect_dat_fault
* update testcase to use teid union

v2->v3
* Add proper macros for control register bits
* Improved patch titles and descriptions
* Moved definition of TEID bits to library
* Rebased on the lastest upstream branch

v1->v2

* split patch 2 -> new patch 2 and new patch 3
* patch 2: fixes pgtable.h, also fixes wrong usage of REGION_TABLE_LENGTH
  instead of SEGMENT_TABLE_LENGTH
* patch 3: introduces new macros and functions for large pages
* patch 4: remove erroneous double call to pte_alloc in get_pte
* patch 4: added comment in mmu.c to bridge the s390x architecural names
  with the Linux ones used in the kvm-unit-tests
* patch 5: added and fixed lots of comments to explain what's going on
* patch 5: set FC for region 3 after writing the canary, like for segments
* patch 5: use uintptr_t instead of intptr_t for set_prefix
* patch 5: introduce new macro PGD_PAGE_SHIFT instead of using magic value 41
* patch 5: use VIRT(0) instead of mem to make it more clear what we are
  doing, even though VIRT(0) expands to mem


Claudio Imbrenda (7):
  s390x: lib: add and use macros for control register bits
  libcflat: add SZ_1M and SZ_2G
  s390x: lib: fix pgtable.h
  s390x: lib: Add idte and other huge pages functions/macros
  s390x: lib: add teid union and clear teid from lowcore
  s390x: mmu: add support for large pages
  s390x: edat test

 s390x/Makefile            |   1 +
 lib/s390x/asm/arch_def.h  |  12 ++
 lib/s390x/asm/float.h     |   4 +-
 lib/s390x/asm/interrupt.h |  28 +++-
 lib/s390x/asm/pgtable.h   |  44 +++++-
 lib/libcflat.h            |   2 +
 lib/s390x/mmu.h           |  84 +++++++++++-
 lib/s390x/interrupt.c     |   2 +
 lib/s390x/mmu.c           | 262 ++++++++++++++++++++++++++++++++----
 lib/s390x/sclp.c          |   4 +-
 s390x/diag288.c           |   2 +-
 s390x/edat.c              | 274 ++++++++++++++++++++++++++++++++++++++
 s390x/gs.c                |   2 +-
 s390x/iep.c               |   4 +-
 s390x/skrf.c              |   2 +-
 s390x/smp.c               |   8 +-
 s390x/vector.c            |   2 +-
 s390x/unittests.cfg       |   3 +
 18 files changed, 691 insertions(+), 49 deletions(-)
 create mode 100644 s390x/edat.c

-- 
2.31.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [kvm-unit-tests PATCH v5 1/7] s390x: lib: add and use macros for control register bits
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
@ 2021-06-11 14:06 ` Claudio Imbrenda
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 2/7] libcflat: add SZ_1M and SZ_2G Claudio Imbrenda
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:06 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Add CTL0_* and CTL2_* macros for specific control register bits.

Replace all hardcoded values in the library and in the existing testcases so
that they use the new macros.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
 lib/s390x/asm/arch_def.h  | 12 ++++++++++++
 lib/s390x/asm/float.h     |  4 ++--
 lib/s390x/asm/interrupt.h |  4 ++--
 lib/s390x/sclp.c          |  4 ++--
 s390x/diag288.c           |  2 +-
 s390x/gs.c                |  2 +-
 s390x/iep.c               |  4 ++--
 s390x/skrf.c              |  2 +-
 s390x/smp.c               |  8 ++++----
 s390x/vector.c            |  2 +-
 10 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/lib/s390x/asm/arch_def.h b/lib/s390x/asm/arch_def.h
index 76f9e386..3aa5da9c 100644
--- a/lib/s390x/asm/arch_def.h
+++ b/lib/s390x/asm/arch_def.h
@@ -229,6 +229,18 @@ static inline uint64_t stctg(int cr)
 	return value;
 }
 
+#define CTL0_LOW_ADDR_PROT	(63 - 35)
+#define CTL0_EDAT		(63 - 40)
+#define CTL0_IEP		(63 - 43)
+#define CTL0_AFP		(63 - 45)
+#define CTL0_VECTOR		(63 - 46)
+#define CTL0_EMERGENCY_SIGNAL	(63 - 49)
+#define CTL0_EXTERNAL_CALL	(63 - 50)
+#define CTL0_CLOCK_COMPARATOR	(63 - 52)
+#define CTL0_SERVICE_SIGNAL	(63 - 54)
+
+#define CTL2_GUARDED_STORAGE	(63 - 59)
+
 static inline void ctl_set_bit(int cr, unsigned int bit)
 {
         uint64_t reg;
diff --git a/lib/s390x/asm/float.h b/lib/s390x/asm/float.h
index eb752050..73c642d4 100644
--- a/lib/s390x/asm/float.h
+++ b/lib/s390x/asm/float.h
@@ -38,12 +38,12 @@ static inline void set_fpc_dxc(uint8_t dxc)
 
 static inline void afp_enable(void)
 {
-	ctl_set_bit(0, 63 - 45);
+	ctl_set_bit(0, CTL0_AFP);
 }
 
 static inline void afp_disable(void)
 {
-	ctl_clear_bit(0, 63 - 45);
+	ctl_clear_bit(0, CTL0_AFP);
 }
 
 #endif
diff --git a/lib/s390x/asm/interrupt.h b/lib/s390x/asm/interrupt.h
index 31e4766d..bf0eb40d 100644
--- a/lib/s390x/asm/interrupt.h
+++ b/lib/s390x/asm/interrupt.h
@@ -27,13 +27,13 @@ void check_pgm_int_code(uint16_t code);
 /* Activate low-address protection */
 static inline void low_prot_enable(void)
 {
-	ctl_set_bit(0, 63 - 35);
+	ctl_set_bit(0, CTL0_LOW_ADDR_PROT);
 }
 
 /* Disable low-address protection */
 static inline void low_prot_disable(void)
 {
-	ctl_clear_bit(0, 63 - 35);
+	ctl_clear_bit(0, CTL0_LOW_ADDR_PROT);
 }
 
 #endif
diff --git a/lib/s390x/sclp.c b/lib/s390x/sclp.c
index 291924b0..9502d161 100644
--- a/lib/s390x/sclp.c
+++ b/lib/s390x/sclp.c
@@ -50,7 +50,7 @@ void sclp_setup_int(void)
 {
 	uint64_t mask;
 
-	ctl_set_bit(0, 9);
+	ctl_set_bit(0, CTL0_SERVICE_SIGNAL);
 
 	mask = extract_psw_mask();
 	mask |= PSW_MASK_EXT;
@@ -59,7 +59,7 @@ void sclp_setup_int(void)
 
 void sclp_handle_ext(void)
 {
-	ctl_clear_bit(0, 9);
+	ctl_clear_bit(0, CTL0_SERVICE_SIGNAL);
 	spin_lock(&sclp_lock);
 	sclp_busy = false;
 	spin_unlock(&sclp_lock);
diff --git a/s390x/diag288.c b/s390x/diag288.c
index e132ff04..82b6ec17 100644
--- a/s390x/diag288.c
+++ b/s390x/diag288.c
@@ -86,7 +86,7 @@ static void test_bite(void)
 	asm volatile("stck %0" : "=Q" (time) : : "cc");
 	time += (uint64_t)(16000 * 1000) << 12;
 	asm volatile("sckc %0" : : "Q" (time));
-	ctl_set_bit(0, 11);
+	ctl_set_bit(0, CTL0_CLOCK_COMPARATOR);
 	mask = extract_psw_mask();
 	mask |= PSW_MASK_EXT;
 	load_psw_mask(mask);
diff --git a/s390x/gs.c b/s390x/gs.c
index 1376d0e6..a017a97d 100644
--- a/s390x/gs.c
+++ b/s390x/gs.c
@@ -145,7 +145,7 @@ static void test_special(void)
 static void init(void)
 {
 	/* Enable control bit for gs */
-	ctl_set_bit(2, 4);
+	ctl_set_bit(2, CTL2_GUARDED_STORAGE);
 
 	/* Setup gs registers to guard the gs_area */
 	gs_cb.gsd = gs_area | 25;
diff --git a/s390x/iep.c b/s390x/iep.c
index fe167ef0..906c77b3 100644
--- a/s390x/iep.c
+++ b/s390x/iep.c
@@ -22,7 +22,7 @@ static void test_iep(void)
 	void (*fn)(void);
 
 	/* Enable IEP */
-	ctl_set_bit(0, 20);
+	ctl_set_bit(0, CTL0_IEP);
 
 	/* Get and protect a page with the IEP bit */
 	iepbuf = alloc_page();
@@ -40,7 +40,7 @@ static void test_iep(void)
 	check_pgm_int_code(PGM_INT_CODE_PROTECTION);
 	report_prefix_pop();
 	unprotect_page(iepbuf, PAGE_ENTRY_IEP);
-	ctl_clear_bit(0, 20);
+	ctl_clear_bit(0, CTL0_IEP);
 	free_page(iepbuf);
 }
 
diff --git a/s390x/skrf.c b/s390x/skrf.c
index 57524ba8..94e906a6 100644
--- a/s390x/skrf.c
+++ b/s390x/skrf.c
@@ -150,7 +150,7 @@ static void ecall_setup(void)
 	/* Put a skey into the ext new psw */
 	lc->ext_new_psw.mask = 0x00F0000180000000UL;
 	/* Open up ext masks */
-	ctl_set_bit(0, 13);
+	ctl_set_bit(0, CTL0_EXTERNAL_CALL);
 	mask = extract_psw_mask();
 	mask |= PSW_MASK_EXT;
 	load_psw_mask(mask);
diff --git a/s390x/smp.c b/s390x/smp.c
index b0ece491..f25ec769 100644
--- a/s390x/smp.c
+++ b/s390x/smp.c
@@ -154,7 +154,7 @@ static void ecall(void)
 	struct lowcore *lc = (void *)0x0;
 
 	expect_ext_int();
-	ctl_set_bit(0, 13);
+	ctl_set_bit(0, CTL0_EXTERNAL_CALL);
 	mask = extract_psw_mask();
 	mask |= PSW_MASK_EXT;
 	load_psw_mask(mask);
@@ -188,7 +188,7 @@ static void emcall(void)
 	struct lowcore *lc = (void *)0x0;
 
 	expect_ext_int();
-	ctl_set_bit(0, 14);
+	ctl_set_bit(0, CTL0_EMERGENCY_SIGNAL);
 	mask = extract_psw_mask();
 	mask |= PSW_MASK_EXT;
 	load_psw_mask(mask);
@@ -283,8 +283,8 @@ static void test_local_ints(void)
 	unsigned long mask;
 
 	/* Open masks for ecall and emcall */
-	ctl_set_bit(0, 13);
-	ctl_set_bit(0, 14);
+	ctl_set_bit(0, CTL0_EXTERNAL_CALL);
+	ctl_set_bit(0, CTL0_EMERGENCY_SIGNAL);
 	mask = extract_psw_mask();
 	mask |= PSW_MASK_EXT;
 	load_psw_mask(mask);
diff --git a/s390x/vector.c b/s390x/vector.c
index b052de55..fdb0eee2 100644
--- a/s390x/vector.c
+++ b/s390x/vector.c
@@ -106,7 +106,7 @@ static void test_bcd_add(void)
 static void init(void)
 {
 	/* Enable vector instructions */
-	ctl_set_bit(0, 17);
+	ctl_set_bit(0, CTL0_VECTOR);
 
 	/* Preset vector registers to 0xff */
 	memset(pagebuf, 0xff, PAGE_SIZE);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [kvm-unit-tests PATCH v5 2/7] libcflat: add SZ_1M and SZ_2G
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
  2021-06-11 14:06 ` [kvm-unit-tests PATCH v5 1/7] s390x: lib: add and use macros for control register bits Claudio Imbrenda
@ 2021-06-11 14:07 ` Claudio Imbrenda
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 3/7] s390x: lib: fix pgtable.h Claudio Imbrenda
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:07 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Add SZ_1M and SZ_2G to libcflat.h

s390x needs those for large/huge pages

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
---
 lib/libcflat.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/libcflat.h b/lib/libcflat.h
index f40b431d..97db9e38 100644
--- a/lib/libcflat.h
+++ b/lib/libcflat.h
@@ -157,7 +157,9 @@ extern void setup_vm(void);
 #define SZ_8K			(1 << 13)
 #define SZ_16K			(1 << 14)
 #define SZ_64K			(1 << 16)
+#define SZ_1M			(1 << 20)
 #define SZ_2M			(1 << 21)
 #define SZ_1G			(1 << 30)
+#define SZ_2G			(1ul << 31)
 
 #endif
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [kvm-unit-tests PATCH v5 3/7] s390x: lib: fix pgtable.h
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
  2021-06-11 14:06 ` [kvm-unit-tests PATCH v5 1/7] s390x: lib: add and use macros for control register bits Claudio Imbrenda
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 2/7] libcflat: add SZ_1M and SZ_2G Claudio Imbrenda
@ 2021-06-11 14:07 ` Claudio Imbrenda
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 4/7] s390x: lib: Add idte and other huge pages functions/macros Claudio Imbrenda
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:07 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Fix pgtable.h:

* SEGMENT_ENTRY_SFAA had one extra bit set
* pmd entries don't have a length field
* ipte does not need to clear the lower bits
 - clearing the 12 lower bits is technically incorrect, as page tables are
   architecturally aligned to 11 bit addresses (even though the unit tests
   allocate always one full page)
* region table entries should use REGION_ENTRY_TL instead of *_TABLE_LENGTH
 - *_TABLE_LENGTH need to stay, because they should be used for ASCEs

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
 lib/s390x/asm/pgtable.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
index 277f3480..1a21f175 100644
--- a/lib/s390x/asm/pgtable.h
+++ b/lib/s390x/asm/pgtable.h
@@ -60,7 +60,7 @@
 #define SEGMENT_SHIFT			20
 
 #define SEGMENT_ENTRY_ORIGIN		0xfffffffffffff800UL
-#define SEGMENT_ENTRY_SFAA		0xfffffffffff80000UL
+#define SEGMENT_ENTRY_SFAA		0xfffffffffff00000UL
 #define SEGMENT_ENTRY_AV		0x0000000000010000UL
 #define SEGMENT_ENTRY_ACC		0x000000000000f000UL
 #define SEGMENT_ENTRY_F			0x0000000000000800UL
@@ -143,7 +143,7 @@ static inline p4d_t *p4d_alloc(pgd_t *pgd, unsigned long addr)
 	if (pgd_none(*pgd)) {
 		p4d_t *p4d = p4d_alloc_one();
 		pgd_val(*pgd) = __pa(p4d) | REGION_ENTRY_TT_REGION1 |
-				REGION_TABLE_LENGTH;
+				REGION_ENTRY_TL;
 	}
 	return p4d_offset(pgd, addr);
 }
@@ -163,7 +163,7 @@ static inline pud_t *pud_alloc(p4d_t *p4d, unsigned long addr)
 	if (p4d_none(*p4d)) {
 		pud_t *pud = pud_alloc_one();
 		p4d_val(*p4d) = __pa(pud) | REGION_ENTRY_TT_REGION2 |
-				REGION_TABLE_LENGTH;
+				REGION_ENTRY_TL;
 	}
 	return pud_offset(p4d, addr);
 }
@@ -183,7 +183,7 @@ static inline pmd_t *pmd_alloc(pud_t *pud, unsigned long addr)
 	if (pud_none(*pud)) {
 		pmd_t *pmd = pmd_alloc_one();
 		pud_val(*pud) = __pa(pmd) | REGION_ENTRY_TT_REGION3 |
-				REGION_TABLE_LENGTH;
+				REGION_ENTRY_TL;
 	}
 	return pmd_offset(pud, addr);
 }
@@ -202,15 +202,14 @@ static inline pte_t *pte_alloc(pmd_t *pmd, unsigned long addr)
 {
 	if (pmd_none(*pmd)) {
 		pte_t *pte = pte_alloc_one();
-		pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT |
-				SEGMENT_TABLE_LENGTH;
+		pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT;
 	}
 	return pte_offset(pmd, addr);
 }
 
 static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
 {
-	unsigned long table_origin = (unsigned long)p_pte & PAGE_MASK;
+	unsigned long table_origin = (unsigned long)p_pte;
 
 	asm volatile(
 		"	ipte %0,%1\n"
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [kvm-unit-tests PATCH v5 4/7] s390x: lib: Add idte and other huge pages functions/macros
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
                   ` (2 preceding siblings ...)
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 3/7] s390x: lib: fix pgtable.h Claudio Imbrenda
@ 2021-06-11 14:07 ` Claudio Imbrenda
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 5/7] s390x: lib: add teid union and clear teid from lowcore Claudio Imbrenda
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:07 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Improve pgtable.h:

* add macros to check whether a pmd or a pud are large / huge
* add idte functions for pmd, pud, p4d and pgd

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
---
 lib/s390x/asm/pgtable.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
index 1a21f175..f166dcc6 100644
--- a/lib/s390x/asm/pgtable.h
+++ b/lib/s390x/asm/pgtable.h
@@ -100,6 +100,9 @@
 #define pmd_none(entry) (pmd_val(entry) & SEGMENT_ENTRY_I)
 #define pte_none(entry) (pte_val(entry) & PAGE_ENTRY_I)
 
+#define pud_huge(entry)  (pud_val(entry) & REGION3_ENTRY_FC)
+#define pmd_large(entry) (pmd_val(entry) & SEGMENT_ENTRY_FC)
+
 #define pgd_addr(entry) __va(pgd_val(entry) & REGION_ENTRY_ORIGIN)
 #define p4d_addr(entry) __va(p4d_val(entry) & REGION_ENTRY_ORIGIN)
 #define pud_addr(entry) __va(pud_val(entry) & REGION_ENTRY_ORIGIN)
@@ -216,6 +219,34 @@ static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
 		: : "a" (table_origin), "a" (vaddr) : "memory");
 }
 
+static inline void idte(unsigned long table_origin, unsigned long vaddr)
+{
+	vaddr &= SEGMENT_ENTRY_SFAA;
+	asm volatile(
+		"	idte %0,0,%1\n"
+		: : "a" (table_origin), "a" (vaddr) : "memory");
+}
+
+static inline void idte_pmdp(unsigned long vaddr, pmdval_t *pmdp)
+{
+	idte((unsigned long)(pmdp - pmd_index(vaddr)) | ASCE_DT_SEGMENT, vaddr);
+}
+
+static inline void idte_pudp(unsigned long vaddr, pudval_t *pudp)
+{
+	idte((unsigned long)(pudp - pud_index(vaddr)) | ASCE_DT_REGION3, vaddr);
+}
+
+static inline void idte_p4dp(unsigned long vaddr, p4dval_t *p4dp)
+{
+	idte((unsigned long)(p4dp - p4d_index(vaddr)) | ASCE_DT_REGION2, vaddr);
+}
+
+static inline void idte_pgdp(unsigned long vaddr, pgdval_t *pgdp)
+{
+	idte((unsigned long)(pgdp - pgd_index(vaddr)) | ASCE_DT_REGION1, vaddr);
+}
+
 void configure_dat(int enable);
 
 #endif /* _ASMS390X_PGTABLE_H_ */
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [kvm-unit-tests PATCH v5 5/7] s390x: lib: add teid union and clear teid from lowcore
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
                   ` (3 preceding siblings ...)
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 4/7] s390x: lib: Add idte and other huge pages functions/macros Claudio Imbrenda
@ 2021-06-11 14:07 ` Claudio Imbrenda
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 6/7] s390x: mmu: add support for large pages Claudio Imbrenda
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:07 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Add a union to represent Translation-Exception Identification (TEID).

Clear the TEID in expect_pgm_int clear_pgm_int.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
---
 lib/s390x/asm/interrupt.h | 24 ++++++++++++++++++++++++
 lib/s390x/interrupt.c     |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/lib/s390x/asm/interrupt.h b/lib/s390x/asm/interrupt.h
index bf0eb40d..d9ab0bd7 100644
--- a/lib/s390x/asm/interrupt.h
+++ b/lib/s390x/asm/interrupt.h
@@ -13,6 +13,30 @@
 #define EXT_IRQ_EXTERNAL_CALL	0x1202
 #define EXT_IRQ_SERVICE_SIG	0x2401
 
+#define TEID_ASCE_PRIMARY	0
+#define TEID_ASCE_AR		1
+#define TEID_ASCE_SECONDARY	2
+#define TEID_ASCE_HOME		3
+
+union teid {
+	unsigned long val;
+	struct {
+		unsigned long addr:52;
+		unsigned long fetch:1;
+		unsigned long store:1;
+		unsigned long reserved:6;
+		unsigned long acc_list_prot:1;
+		/*
+		 * depending on the exception and the installed facilities,
+		 * the m field can indicate several different things,
+		 * including whether the exception was triggered by a MVPG
+		 * instruction, or whether the addr field is meaningful
+		 */
+		unsigned long m:1;
+		unsigned long asce_id:2;
+	};
+};
+
 void register_pgm_cleanup_func(void (*f)(void));
 void handle_pgm_int(struct stack_frame_int *stack);
 void handle_ext_int(struct stack_frame_int *stack);
diff --git a/lib/s390x/interrupt.c b/lib/s390x/interrupt.c
index ce0003de..b627942f 100644
--- a/lib/s390x/interrupt.c
+++ b/lib/s390x/interrupt.c
@@ -22,6 +22,7 @@ void expect_pgm_int(void)
 {
 	pgm_int_expected = true;
 	lc->pgm_int_code = 0;
+	lc->trans_exc_id = 0;
 	mb();
 }
 
@@ -39,6 +40,7 @@ uint16_t clear_pgm_int(void)
 	mb();
 	code = lc->pgm_int_code;
 	lc->pgm_int_code = 0;
+	lc->trans_exc_id = 0;
 	pgm_int_expected = false;
 	return code;
 }
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [kvm-unit-tests PATCH v5 6/7] s390x: mmu: add support for large pages
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
                   ` (4 preceding siblings ...)
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 5/7] s390x: lib: add teid union and clear teid from lowcore Claudio Imbrenda
@ 2021-06-11 14:07 ` Claudio Imbrenda
  2021-06-18  7:36   ` Janosch Frank
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 7/7] s390x: edat test Claudio Imbrenda
  2021-06-18  7:36 ` [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Janosch Frank
  7 siblings, 1 reply; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:07 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Add support for 1M and 2G pages.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 lib/s390x/mmu.h |  84 +++++++++++++++-
 lib/s390x/mmu.c | 262 +++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 320 insertions(+), 26 deletions(-)

diff --git a/lib/s390x/mmu.h b/lib/s390x/mmu.h
index b995f85b..ab35d782 100644
--- a/lib/s390x/mmu.h
+++ b/lib/s390x/mmu.h
@@ -10,9 +10,89 @@
 #ifndef _S390X_MMU_H_
 #define _S390X_MMU_H_
 
-void protect_page(void *vaddr, unsigned long prot);
+enum pgt_level {
+	pgtable_level_pgd = 1,
+	pgtable_level_p4d,
+	pgtable_level_pud,
+	pgtable_level_pmd,
+	pgtable_level_pte,
+};
+
+/*
+ * Splits the pagetables down to the given DAT tables level.
+ * Returns a pointer to the DAT table entry of the given level.
+ * @pgtable root of the page table tree
+ * @vaddr address whose page tables are to split
+ * @level 3 (for 2GB pud), 4 (for 1 MB pmd) or 5 (for 4KB pages)
+ */
+void *split_page(pgd_t *pgtable, void *vaddr, enum pgt_level level);
+
+/*
+ * Applies the given protection bits to the given DAT tables level,
+ * splitting if necessary.
+ * @pgtable root of the page table tree
+ * @vaddr address whose protection bits are to be changed
+ * @prot the protection bits to set
+ * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4KB pages)
+ */
+void protect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level);
+
+/*
+ * Clears the given protection bits from the given DAT tables level,
+ * splitting if necessary.
+ * @pgtable root of the page table tree
+ * @vaddr address whose protection bits are to be changed
+ * @prot the protection bits to clear
+ * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4kB pages)
+ */
+void unprotect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level);
+
+/*
+ * Applies the given protection bits to the given 4kB pages range,
+ * splitting if necessary.
+ * @start starting address whose protection bits are to be changed
+ * @len size in bytes
+ * @prot the protection bits to set
+ */
 void protect_range(void *start, unsigned long len, unsigned long prot);
-void unprotect_page(void *vaddr, unsigned long prot);
+
+/*
+ * Clears the given protection bits from the given 4kB pages range,
+ * splitting if necessary.
+ * @start starting address whose protection bits are to be changed
+ * @len size in bytes
+ * @prot the protection bits to set
+ */
 void unprotect_range(void *start, unsigned long len, unsigned long prot);
 
+/* Similar to install_page, maps the virtual address to the physical address
+ * for the given page tables, using 1MB large pages.
+ * Returns a pointer to the DAT table entry.
+ * @pgtable root of the page table tree
+ * @phys physical address to map, must be 1MB aligned!
+ * @vaddr virtual address to map, must be 1MB aligned!
+ */
+pmdval_t *install_large_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr);
+
+/* Similar to install_page, maps the virtual address to the physical address
+ * for the given page tables, using 2GB huge pages.
+ * Returns a pointer to the DAT table entry.
+ * @pgtable root of the page table tree
+ * @phys physical address to map, must be 2GB aligned!
+ * @vaddr virtual address to map, must be 2GB aligned!
+ */
+pudval_t *install_huge_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr);
+
+static inline void protect_page(void *vaddr, unsigned long prot)
+{
+	protect_dat_entry(vaddr, prot, pgtable_level_pte);
+}
+
+static inline void unprotect_page(void *vaddr, unsigned long prot)
+{
+	unprotect_dat_entry(vaddr, prot, pgtable_level_pte);
+}
+
+void *get_dat_entry(pgd_t *pgtable, void *vaddr, unsigned int level);
+
 #endif /* _ASMS390X_MMU_H_ */
diff --git a/lib/s390x/mmu.c b/lib/s390x/mmu.c
index 5c517366..c973443b 100644
--- a/lib/s390x/mmu.c
+++ b/lib/s390x/mmu.c
@@ -15,6 +15,18 @@
 #include <vmalloc.h>
 #include "mmu.h"
 
+/*
+ * The naming convention used here is the same as used in the Linux kernel;
+ * this is the correspondence between the s390x architectural names and the
+ * Linux ones:
+ *
+ * pgd - region 1 table entry
+ * p4d - region 2 table entry
+ * pud - region 3 table entry
+ * pmd - segment table entry
+ * pte - page table entry
+ */
+
 static pgd_t *table_root;
 
 void configure_dat(int enable)
@@ -46,54 +58,256 @@ static void mmu_enable(pgd_t *pgtable)
 	lc->pgm_new_psw.mask |= PSW_MASK_DAT;
 }
 
-static pteval_t *get_pte(pgd_t *pgtable, uintptr_t vaddr)
+/*
+ * Get the pud (region 3) DAT table entry for the given address and root,
+ * allocating it if necessary
+ */
+static inline pud_t *get_pud(pgd_t *pgtable, uintptr_t vaddr)
 {
 	pgd_t *pgd = pgd_offset(pgtable, vaddr);
 	p4d_t *p4d = p4d_alloc(pgd, vaddr);
 	pud_t *pud = pud_alloc(p4d, vaddr);
-	pmd_t *pmd = pmd_alloc(pud, vaddr);
-	pte_t *pte = pte_alloc(pmd, vaddr);
 
-	return &pte_val(*pte);
+	return pud;
+}
+
+/*
+ * Get the pmd (segment) DAT table entry for the given address and pud,
+ * allocating it if necessary.
+ * The pud must not be huge.
+ */
+static inline pmd_t *get_pmd(pud_t *pud, uintptr_t vaddr)
+{
+	pmd_t *pmd;
+
+	assert(!pud_huge(*pud));
+	pmd = pmd_alloc(pud, vaddr);
+	return pmd;
+}
+
+/*
+ * Get the pte (page) DAT table entry for the given address and pmd,
+ * allocating it if necessary.
+ * The pmd must not be large.
+ */
+static inline pte_t *get_pte(pmd_t *pmd, uintptr_t vaddr)
+{
+	pte_t *pte;
+
+	assert(!pmd_large(*pmd));
+	pte = pte_alloc(pmd, vaddr);
+	return pte;
+}
+
+/*
+ * Splits a large pmd (segment) DAT table entry into equivalent 4kB small
+ * pages.
+ * @pmd The pmd to split, it must be large.
+ * @va the virtual address corresponding to this pmd.
+ */
+static void split_pmd(pmd_t *pmd, uintptr_t va)
+{
+	phys_addr_t pa = pmd_val(*pmd) & SEGMENT_ENTRY_SFAA;
+	unsigned long i, prot;
+	pte_t *pte;
+
+	assert(pmd_large(*pmd));
+	pte = alloc_pages(PAGE_TABLE_ORDER);
+	prot = pmd_val(*pmd) & (SEGMENT_ENTRY_IEP | SEGMENT_ENTRY_P);
+	for (i = 0; i < PAGE_TABLE_ENTRIES; i++)
+		pte_val(pte[i]) =  pa | PAGE_SIZE * i | prot;
+	idte_pmdp(va, &pmd_val(*pmd));
+	pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT;
+
+}
+
+/*
+ * Splits a huge pud (region 3) DAT table entry into equivalent 1MB large
+ * pages.
+ * @pud The pud to split, it must be huge.
+ * @va the virtual address corresponding to this pud.
+ */
+static void split_pud(pud_t *pud, uintptr_t va)
+{
+	phys_addr_t pa = pud_val(*pud) & REGION3_ENTRY_RFAA;
+	unsigned long i, prot;
+	pmd_t *pmd;
+
+	assert(pud_huge(*pud));
+	pmd = alloc_pages(SEGMENT_TABLE_ORDER);
+	prot = pud_val(*pud) & (REGION3_ENTRY_IEP | REGION_ENTRY_P);
+	for (i = 0; i < SEGMENT_TABLE_ENTRIES; i++)
+		pmd_val(pmd[i]) =  pa | SZ_1M * i | prot | SEGMENT_ENTRY_FC | SEGMENT_ENTRY_TT_SEGMENT;
+	idte_pudp(va, &pud_val(*pud));
+	pud_val(*pud) = __pa(pmd) | REGION_ENTRY_TT_REGION3 | REGION_TABLE_LENGTH;
+}
+
+void *get_dat_entry(pgd_t *pgtable, void *vaddr, enum pgt_level level)
+{
+	uintptr_t va = (uintptr_t)vaddr;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	assert(level && (level <= 5));
+	pgd = pgd_offset(pgtable, va);
+	if (level == pgtable_level_pgd)
+		return pgd;
+	p4d = p4d_alloc(pgd, va);
+	if (level == pgtable_level_p4d)
+		return p4d;
+	pud = pud_alloc(p4d, va);
+
+	if (level == pgtable_level_pud)
+		return pud;
+	if (!pud_none(*pud) && pud_huge(*pud))
+		split_pud(pud, va);
+	pmd = get_pmd(pud, va);
+	if (level == pgtable_level_pmd)
+		return pmd;
+	if (!pmd_none(*pmd) && pmd_large(*pmd))
+		split_pmd(pmd, va);
+	return get_pte(pmd, va);
+}
+
+void *split_page(pgd_t *pgtable, void *vaddr, enum pgt_level level)
+{
+	assert((level >= 3) && (level <= 5));
+	return get_dat_entry(pgtable ? pgtable : table_root, vaddr, level);
 }
 
 phys_addr_t virt_to_pte_phys(pgd_t *pgtable, void *vaddr)
 {
-	return (*get_pte(pgtable, (uintptr_t)vaddr) & PAGE_MASK) +
-	       ((unsigned long)vaddr & ~PAGE_MASK);
+	uintptr_t va = (uintptr_t)vaddr;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pud = get_pud(pgtable, va);
+	if (pud_huge(*pud))
+		return (pud_val(*pud) & REGION3_ENTRY_RFAA) | (va & ~REGION3_ENTRY_RFAA);
+	pmd = get_pmd(pud, va);
+	if (pmd_large(*pmd))
+		return (pmd_val(*pmd) & SEGMENT_ENTRY_SFAA) | (va & ~SEGMENT_ENTRY_SFAA);
+	pte = get_pte(pmd, va);
+	return (pte_val(*pte) & PAGE_MASK) | (va & ~PAGE_MASK);
+}
+
+/*
+ * Get the DAT table entry of the given level for the given address,
+ * splitting if necessary. If the entry was not invalid, invalidate it, and
+ * return the pointer to the entry and, if requested, its old value.
+ * @pgtable root of the page tables
+ * @vaddr virtual address
+ * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4kB pages)
+ * @old if not NULL, will be written with the old value of the DAT table
+ * entry before invalidation
+ */
+static void *dat_get_and_invalidate(pgd_t *pgtable, void *vaddr, enum pgt_level level, unsigned long *old)
+{
+	unsigned long va = (unsigned long)vaddr;
+	void *ptr;
+
+	ptr = get_dat_entry(pgtable, vaddr, level);
+	if (old)
+		*old = *(unsigned long *)ptr;
+	if ((level == pgtable_level_pgd) && !pgd_none(*(pgd_t *)ptr))
+		idte_pgdp(va, ptr);
+	else if ((level == pgtable_level_p4d) && !p4d_none(*(p4d_t *)ptr))
+		idte_p4dp(va, ptr);
+	else if ((level == pgtable_level_pud) && !pud_none(*(pud_t *)ptr))
+		idte_pudp(va, ptr);
+	else if ((level == pgtable_level_pmd) && !pmd_none(*(pmd_t *)ptr))
+		idte_pmdp(va, ptr);
+	else if (!pte_none(*(pte_t *)ptr))
+		ipte(va, ptr);
+	return ptr;
 }
 
-static pteval_t *set_pte(pgd_t *pgtable, pteval_t val, void *vaddr)
+static void cleanup_pmd(pmd_t *pmd)
 {
-	pteval_t *p_pte = get_pte(pgtable, (uintptr_t)vaddr);
+	/* was invalid or large, nothing to do */
+	if (pmd_none(*pmd) || pmd_large(*pmd))
+		return;
+	/* was not large, free the corresponding page table */
+	free_pages((void *)(pmd_val(*pmd) & PAGE_MASK));
+}
 
-	/* first flush the old entry (if we're replacing anything) */
-	if (!(*p_pte & PAGE_ENTRY_I))
-		ipte((uintptr_t)vaddr, p_pte);
+static void cleanup_pud(pud_t *pud)
+{
+	unsigned long i;
+	pmd_t *pmd;
 
-	*p_pte = val;
-	return p_pte;
+	/* was invalid or large, nothing to do */
+	if (pud_none(*pud) || pud_huge(*pud))
+		return;
+	/* recursively clean up all pmds if needed */
+	pmd = (pmd_t *)(pud_val(*pud) & PAGE_MASK);
+	for (i = 0; i < SEGMENT_TABLE_ENTRIES; i++)
+		cleanup_pmd(pmd + i);
+	/* free the corresponding segment table */
+	free_pages(pmd);
+}
+
+/*
+ * Set the DAT entry for the given level of the given virtual address. If a
+ * mapping already existed, it is overwritten. If an existing mapping with
+ * smaller pages existed, all the lower tables are freed.
+ * Returns the pointer to the DAT table entry.
+ * @pgtable root of the page tables
+ * @val the new value for the DAT table entry
+ * @vaddr the virtual address
+ * @level 3 for pud (region 3), 4 for pmd (segment) and 5 for pte (pages)
+ */
+static void *set_dat_entry(pgd_t *pgtable, unsigned long val, void *vaddr, enum pgt_level level)
+{
+	unsigned long old, *res;
+
+	res = dat_get_and_invalidate(pgtable, vaddr, level, &old);
+	if (level == pgtable_level_pmd)
+		cleanup_pmd((pmd_t *)&old);
+	if (level == pgtable_level_pud)
+		cleanup_pud((pud_t *)&old);
+	*res = val;
+	return res;
 }
 
 pteval_t *install_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr)
 {
-	return set_pte(pgtable, __pa(phys), vaddr);
+	assert(IS_ALIGNED(phys, PAGE_SIZE));
+	assert(IS_ALIGNED((uintptr_t)vaddr, PAGE_SIZE));
+	return set_dat_entry(pgtable, phys, vaddr, pgtable_level_pte);
+}
+
+pmdval_t *install_large_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr)
+{
+	assert(IS_ALIGNED(phys, SZ_1M));
+	assert(IS_ALIGNED((uintptr_t)vaddr, SZ_1M));
+	return set_dat_entry(pgtable, phys | SEGMENT_ENTRY_FC, vaddr, pgtable_level_pmd);
+}
+
+pudval_t *install_huge_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr)
+{
+	assert(IS_ALIGNED(phys, SZ_2G));
+	assert(IS_ALIGNED((uintptr_t)vaddr, SZ_2G));
+	return set_dat_entry(pgtable, phys | REGION3_ENTRY_FC | REGION_ENTRY_TT_REGION3, vaddr, pgtable_level_pud);
 }
 
-void protect_page(void *vaddr, unsigned long prot)
+void protect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level)
 {
-	pteval_t *p_pte = get_pte(table_root, (uintptr_t)vaddr);
-	pteval_t n_pte = *p_pte | prot;
+	unsigned long old, *ptr;
 
-	set_pte(table_root, n_pte, vaddr);
+	ptr = dat_get_and_invalidate(table_root, vaddr, level, &old);
+	*ptr = old | prot;
 }
 
-void unprotect_page(void *vaddr, unsigned long prot)
+void unprotect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level)
 {
-	pteval_t *p_pte = get_pte(table_root, (uintptr_t)vaddr);
-	pteval_t n_pte = *p_pte & ~prot;
+	unsigned long old, *ptr;
 
-	set_pte(table_root, n_pte, vaddr);
+	ptr = dat_get_and_invalidate(table_root, vaddr, level, &old);
+	*ptr = old & ~prot;
 }
 
 void protect_range(void *start, unsigned long len, unsigned long prot)
@@ -102,7 +316,7 @@ void protect_range(void *start, unsigned long len, unsigned long prot)
 
 	len &= PAGE_MASK;
 	for (; len; len -= PAGE_SIZE, curr += PAGE_SIZE)
-		protect_page((void *)curr, prot);
+		protect_dat_entry((void *)curr, prot, 5);
 }
 
 void unprotect_range(void *start, unsigned long len, unsigned long prot)
@@ -111,7 +325,7 @@ void unprotect_range(void *start, unsigned long len, unsigned long prot)
 
 	len &= PAGE_MASK;
 	for (; len; len -= PAGE_SIZE, curr += PAGE_SIZE)
-		unprotect_page((void *)curr, prot);
+		unprotect_dat_entry((void *)curr, prot, 5);
 }
 
 static void setup_identity(pgd_t *pgtable, phys_addr_t start_addr,
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [kvm-unit-tests PATCH v5 7/7] s390x: edat test
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
                   ` (5 preceding siblings ...)
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 6/7] s390x: mmu: add support for large pages Claudio Imbrenda
@ 2021-06-11 14:07 ` Claudio Imbrenda
  2021-06-18  7:36 ` [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Janosch Frank
  7 siblings, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2021-06-11 14:07 UTC (permalink / raw)
  To: kvm; +Cc: linux-s390, david, thuth, frankja, cohuck

Simple EDAT test.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Acked-by Janosch Frank <frankja@linux.ibm.com>
---
 s390x/Makefile      |   1 +
 s390x/edat.c        | 274 ++++++++++++++++++++++++++++++++++++++++++++
 s390x/unittests.cfg |   3 +
 3 files changed, 278 insertions(+)
 create mode 100644 s390x/edat.c

diff --git a/s390x/Makefile b/s390x/Makefile
index 8de926ab..8820e998 100644
--- a/s390x/Makefile
+++ b/s390x/Makefile
@@ -22,6 +22,7 @@ tests += $(TEST_DIR)/uv-guest.elf
 tests += $(TEST_DIR)/sie.elf
 tests += $(TEST_DIR)/mvpg.elf
 tests += $(TEST_DIR)/uv-host.elf
+tests += $(TEST_DIR)/edat.elf
 
 tests_binary = $(patsubst %.elf,%.bin,$(tests))
 ifneq ($(HOST_KEY_DOCUMENT),)
diff --git a/s390x/edat.c b/s390x/edat.c
new file mode 100644
index 00000000..c3bee0c8
--- /dev/null
+++ b/s390x/edat.c
@@ -0,0 +1,274 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * EDAT test.
+ *
+ * Copyright (c) 2021 IBM Corp
+ *
+ * Authors:
+ *	Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+#include <libcflat.h>
+#include <vmalloc.h>
+#include <asm/facility.h>
+#include <asm/interrupt.h>
+#include <mmu.h>
+#include <asm/pgtable.h>
+#include <asm-generic/barrier.h>
+
+#define PGD_PAGE_SHIFT (REGION1_SHIFT - PAGE_SHIFT)
+
+#define LC_SIZE	(2 * PAGE_SIZE)
+#define VIRT(x)	((void *)((unsigned long)(x) + (unsigned long)mem))
+
+static uint8_t prefix_buf[LC_SIZE] __attribute__((aligned(LC_SIZE)));
+static unsigned int tmp[1024] __attribute__((aligned(PAGE_SIZE)));
+static void *root, *mem, *m;
+static struct lowcore *lc;
+volatile unsigned int *p;
+
+/*
+ * Check if a non-access-list protection exception happened for the given
+ * address, in the primary address space.
+ */
+static bool check_pgm_prot(void *ptr)
+{
+	union teid teid;
+
+	if (lc->pgm_int_code != PGM_INT_CODE_PROTECTION)
+		return false;
+
+	teid.val = lc->trans_exc_id;
+
+	/*
+	 * depending on the presence of the ESOP feature, the rest of the
+	 * field might or might not be meaningful when the m field is 0.
+	 */
+	if (!teid.m)
+		return true;
+	return (!teid.acc_list_prot && !teid.asce_id &&
+		(teid.addr == ((unsigned long)ptr >> PAGE_SHIFT)));
+}
+
+static void test_dat(void)
+{
+	report_prefix_push("edat off");
+	/* disable EDAT */
+	ctl_clear_bit(0, CTL0_EDAT);
+
+	/* Check some basics */
+	p[0] = 42;
+	report(p[0] == 42, "pte, r/w");
+	p[0] = 0;
+
+	/* Write protect the page and try to write, expect a fault */
+	protect_page(m, PAGE_ENTRY_P);
+	expect_pgm_int();
+	p[0] = 42;
+	unprotect_page(m, PAGE_ENTRY_P);
+	report(!p[0] && check_pgm_prot(m), "pte, ro");
+
+	/*
+	 * The FC bit (for large pages) should be ignored because EDAT is
+	 * off. We set a value and then we try to read it back again after
+	 * setting the FC bit. This way we can check if large pages were
+	 * erroneously enabled despite EDAT being off.
+	 */
+	p[0] = 42;
+	protect_dat_entry(m, SEGMENT_ENTRY_FC, pgtable_level_pmd);
+	report(p[0] == 42, "pmd, fc=1, r/w");
+	unprotect_dat_entry(m, SEGMENT_ENTRY_FC, pgtable_level_pmd);
+	p[0] = 0;
+
+	/*
+	 * Segment protection should work even with EDAT off, try to write
+	 * anyway and expect a fault
+	 */
+	protect_dat_entry(m, SEGMENT_ENTRY_P, pgtable_level_pmd);
+	expect_pgm_int();
+	p[0] = 42;
+	report(!p[0] && check_pgm_prot(m), "pmd, ro");
+	unprotect_dat_entry(m, SEGMENT_ENTRY_P, pgtable_level_pmd);
+
+	/* The FC bit should be ignored because EDAT is off, like above */
+	p[0] = 42;
+	protect_dat_entry(m, REGION3_ENTRY_FC, pgtable_level_pud);
+	report(p[0] == 42, "pud, fc=1, r/w");
+	unprotect_dat_entry(m, REGION3_ENTRY_FC, pgtable_level_pud);
+	p[0] = 0;
+
+	/*
+	 * Region1/2/3 protection should not work, because EDAT is off.
+	 * Protect the various region1/2/3 entries and write, expect the
+	 * write to be successful.
+	 */
+	protect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pud);
+	p[0] = 42;
+	report(p[0] == 42, "pud, ro");
+	unprotect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pud);
+	p[0] = 0;
+
+	protect_dat_entry(m, REGION_ENTRY_P, pgtable_level_p4d);
+	p[0] = 42;
+	report(p[0] == 42, "p4d, ro");
+	unprotect_dat_entry(m, REGION_ENTRY_P, pgtable_level_p4d);
+	p[0] = 0;
+
+	protect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pgd);
+	p[0] = 42;
+	report(p[0] == 42, "pgd, ro");
+	unprotect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pgd);
+	p[0] = 0;
+
+	report_prefix_pop();
+}
+
+static void test_edat1(void)
+{
+	report_prefix_push("edat1");
+	/* Enable EDAT */
+	ctl_set_bit(0, CTL0_EDAT);
+	p[0] = 0;
+
+	/*
+	 * Segment protection should work normally, try to write and expect
+	 * a fault.
+	 */
+	expect_pgm_int();
+	protect_dat_entry(m, SEGMENT_ENTRY_P, pgtable_level_pmd);
+	p[0] = 42;
+	report(!p[0] && check_pgm_prot(m), "pmd, ro");
+	unprotect_dat_entry(m, SEGMENT_ENTRY_P, pgtable_level_pmd);
+
+	/*
+	 * Region1/2/3 protection should work now, because EDAT is on. Try
+	 * to write anyway and expect a fault.
+	 */
+	expect_pgm_int();
+	protect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pud);
+	p[0] = 42;
+	report(!p[0] && check_pgm_prot(m), "pud, ro");
+	unprotect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pud);
+
+	expect_pgm_int();
+	protect_dat_entry(m, REGION_ENTRY_P, pgtable_level_p4d);
+	p[0] = 42;
+	report(!p[0] && check_pgm_prot(m), "p4d, ro");
+	unprotect_dat_entry(m, REGION_ENTRY_P, pgtable_level_p4d);
+
+	expect_pgm_int();
+	protect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pgd);
+	p[0] = 42;
+	report(!p[0] && check_pgm_prot(m), "pgd, ro");
+	unprotect_dat_entry(m, REGION_ENTRY_P, pgtable_level_pgd);
+
+	/* Large pages should work */
+	p[0] = 42;
+	install_large_page(root, 0, mem);
+	report(p[0] == 42, "pmd, large");
+
+	/*
+	 * Prefixing should not work with large pages. Since the lower
+	 * addresses are mapped with small pages, which are subject to
+	 * prefixing, and the pages mapped with large pages are not subject
+	 * to prefixing, this is the resulting scenario:
+	 *
+	 * virtual 0 = real 0 -> absolute prefix_buf
+	 * virtual prefix_buf = real prefix_buf -> absolute 0
+	 * VIRT(0) -> absolute 0
+	 * VIRT(prefix_buf) -> absolute prefix_buf
+	 *
+	 * The testcase checks if the memory at virtual 0 has the same
+	 * content as the memory at VIRT(prefix_buf) and the memory at
+	 * VIRT(0) has the same content as the memory at virtual prefix_buf.
+	 * If prefixing is erroneously applied for large pages, the testcase
+	 * will therefore fail.
+	 */
+	report(!memcmp(0, VIRT(prefix_buf), LC_SIZE) &&
+		!memcmp(prefix_buf, VIRT(0), LC_SIZE),
+		"pmd, large, prefixing");
+
+	report_prefix_pop();
+}
+
+static void test_edat2(void)
+{
+	report_prefix_push("edat2");
+	p[0] = 42;
+
+	/* Huge pages should work */
+	install_huge_page(root, 0, mem);
+	report(p[0] == 42, "pud, huge");
+
+	/* Prefixing should not work with huge pages, just like large pages */
+	report(!memcmp(0, VIRT(prefix_buf), LC_SIZE) &&
+		!memcmp(prefix_buf, VIRT(0), LC_SIZE),
+		"pmd, large, prefixing");
+
+	report_prefix_pop();
+}
+
+static unsigned int setup(void)
+{
+	bool has_edat1 = test_facility(8);
+	bool has_edat2 = test_facility(78);
+	unsigned long pa, va;
+
+	if (has_edat2 && !has_edat1)
+		report_abort("EDAT2 available, but EDAT1 not available");
+
+	/* Setup DAT 1:1 mapping and memory management */
+	setup_vm();
+	root = (void *)(stctg(1) & PAGE_MASK);
+
+	/*
+	 * Get a pgd worth of virtual memory, so we can test things later
+	 * without interfering with the test code or the interrupt handler
+	 */
+	mem = alloc_vpages_aligned(BIT_ULL(PGD_PAGE_SHIFT), PGD_PAGE_SHIFT);
+	assert(mem);
+	va = (unsigned long)mem;
+
+	/* Map the first 1GB of real memory */
+	for (pa = 0; pa < SZ_1G; pa += PAGE_SIZE, va += PAGE_SIZE)
+		install_page(root, pa, (void *)va);
+
+	/*
+	 * Move the lowcore to a known non-zero location. This is needed
+	 * later to check whether prefixing is working with large pages.
+	 */
+	assert((unsigned long)&prefix_buf < SZ_2G);
+	memcpy(prefix_buf, 0, LC_SIZE);
+	set_prefix((uint32_t)(uintptr_t)prefix_buf);
+	/* Clear the old copy */
+	memset(prefix_buf, 0, LC_SIZE);
+
+	/* m will point to tmp through the new virtual mapping */
+	m = VIRT(&tmp);
+	/* p is the same as m but volatile */
+	p = (volatile unsigned int *)m;
+
+	return has_edat1 + has_edat2;
+}
+
+int main(void)
+{
+	unsigned int edat;
+
+	report_prefix_push("edat");
+	edat = setup();
+
+	test_dat();
+
+	if (edat)
+		test_edat1();
+	else
+		report_skip("EDAT not available");
+
+	if (edat >= 2)
+		test_edat2();
+	else
+		report_skip("EDAT2 not available");
+
+	report_prefix_pop();
+	return report_summary();
+}
diff --git a/s390x/unittests.cfg b/s390x/unittests.cfg
index 9f81a608..a0ec8864 100644
--- a/s390x/unittests.cfg
+++ b/s390x/unittests.cfg
@@ -103,3 +103,6 @@ file = sie.elf
 [mvpg]
 file = mvpg.elf
 timeout = 10
+
+[edat]
+file = edat.elf
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [kvm-unit-tests PATCH v5 6/7] s390x: mmu: add support for large pages
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 6/7] s390x: mmu: add support for large pages Claudio Imbrenda
@ 2021-06-18  7:36   ` Janosch Frank
  0 siblings, 0 replies; 10+ messages in thread
From: Janosch Frank @ 2021-06-18  7:36 UTC (permalink / raw)
  To: Claudio Imbrenda, kvm; +Cc: linux-s390, david, thuth, cohuck

On 6/11/21 4:07 PM, Claudio Imbrenda wrote:
> Add support for 1M and 2G pages.
> 
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

Acked-by: Janosch Frank <frankja@de.ibm.com>

> ---
>  lib/s390x/mmu.h |  84 +++++++++++++++-
>  lib/s390x/mmu.c | 262 +++++++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 320 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/s390x/mmu.h b/lib/s390x/mmu.h
> index b995f85b..ab35d782 100644
> --- a/lib/s390x/mmu.h
> +++ b/lib/s390x/mmu.h
> @@ -10,9 +10,89 @@
>  #ifndef _S390X_MMU_H_
>  #define _S390X_MMU_H_
>  
> -void protect_page(void *vaddr, unsigned long prot);
> +enum pgt_level {
> +	pgtable_level_pgd = 1,
> +	pgtable_level_p4d,
> +	pgtable_level_pud,
> +	pgtable_level_pmd,
> +	pgtable_level_pte,
> +};
> +
> +/*
> + * Splits the pagetables down to the given DAT tables level.
> + * Returns a pointer to the DAT table entry of the given level.
> + * @pgtable root of the page table tree
> + * @vaddr address whose page tables are to split
> + * @level 3 (for 2GB pud), 4 (for 1 MB pmd) or 5 (for 4KB pages)
> + */
> +void *split_page(pgd_t *pgtable, void *vaddr, enum pgt_level level);
> +
> +/*
> + * Applies the given protection bits to the given DAT tables level,
> + * splitting if necessary.
> + * @pgtable root of the page table tree
> + * @vaddr address whose protection bits are to be changed
> + * @prot the protection bits to set
> + * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4KB pages)
> + */
> +void protect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level);
> +
> +/*
> + * Clears the given protection bits from the given DAT tables level,
> + * splitting if necessary.
> + * @pgtable root of the page table tree
> + * @vaddr address whose protection bits are to be changed
> + * @prot the protection bits to clear
> + * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4kB pages)
> + */
> +void unprotect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level);
> +
> +/*
> + * Applies the given protection bits to the given 4kB pages range,
> + * splitting if necessary.
> + * @start starting address whose protection bits are to be changed
> + * @len size in bytes
> + * @prot the protection bits to set
> + */
>  void protect_range(void *start, unsigned long len, unsigned long prot);
> -void unprotect_page(void *vaddr, unsigned long prot);
> +
> +/*
> + * Clears the given protection bits from the given 4kB pages range,
> + * splitting if necessary.
> + * @start starting address whose protection bits are to be changed
> + * @len size in bytes
> + * @prot the protection bits to set
> + */
>  void unprotect_range(void *start, unsigned long len, unsigned long prot);
>  
> +/* Similar to install_page, maps the virtual address to the physical address
> + * for the given page tables, using 1MB large pages.
> + * Returns a pointer to the DAT table entry.
> + * @pgtable root of the page table tree
> + * @phys physical address to map, must be 1MB aligned!
> + * @vaddr virtual address to map, must be 1MB aligned!
> + */
> +pmdval_t *install_large_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr);
> +
> +/* Similar to install_page, maps the virtual address to the physical address
> + * for the given page tables, using 2GB huge pages.
> + * Returns a pointer to the DAT table entry.
> + * @pgtable root of the page table tree
> + * @phys physical address to map, must be 2GB aligned!
> + * @vaddr virtual address to map, must be 2GB aligned!
> + */
> +pudval_t *install_huge_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr);
> +
> +static inline void protect_page(void *vaddr, unsigned long prot)
> +{
> +	protect_dat_entry(vaddr, prot, pgtable_level_pte);
> +}
> +
> +static inline void unprotect_page(void *vaddr, unsigned long prot)
> +{
> +	unprotect_dat_entry(vaddr, prot, pgtable_level_pte);
> +}
> +
> +void *get_dat_entry(pgd_t *pgtable, void *vaddr, unsigned int level);
> +
>  #endif /* _ASMS390X_MMU_H_ */
> diff --git a/lib/s390x/mmu.c b/lib/s390x/mmu.c
> index 5c517366..c973443b 100644
> --- a/lib/s390x/mmu.c
> +++ b/lib/s390x/mmu.c
> @@ -15,6 +15,18 @@
>  #include <vmalloc.h>
>  #include "mmu.h"
>  
> +/*
> + * The naming convention used here is the same as used in the Linux kernel;
> + * this is the correspondence between the s390x architectural names and the
> + * Linux ones:
> + *
> + * pgd - region 1 table entry
> + * p4d - region 2 table entry
> + * pud - region 3 table entry
> + * pmd - segment table entry
> + * pte - page table entry
> + */
> +
>  static pgd_t *table_root;
>  
>  void configure_dat(int enable)
> @@ -46,54 +58,256 @@ static void mmu_enable(pgd_t *pgtable)
>  	lc->pgm_new_psw.mask |= PSW_MASK_DAT;
>  }
>  
> -static pteval_t *get_pte(pgd_t *pgtable, uintptr_t vaddr)
> +/*
> + * Get the pud (region 3) DAT table entry for the given address and root,
> + * allocating it if necessary
> + */
> +static inline pud_t *get_pud(pgd_t *pgtable, uintptr_t vaddr)
>  {
>  	pgd_t *pgd = pgd_offset(pgtable, vaddr);
>  	p4d_t *p4d = p4d_alloc(pgd, vaddr);
>  	pud_t *pud = pud_alloc(p4d, vaddr);
> -	pmd_t *pmd = pmd_alloc(pud, vaddr);
> -	pte_t *pte = pte_alloc(pmd, vaddr);
>  
> -	return &pte_val(*pte);
> +	return pud;
> +}
> +
> +/*
> + * Get the pmd (segment) DAT table entry for the given address and pud,
> + * allocating it if necessary.
> + * The pud must not be huge.
> + */
> +static inline pmd_t *get_pmd(pud_t *pud, uintptr_t vaddr)
> +{
> +	pmd_t *pmd;
> +
> +	assert(!pud_huge(*pud));
> +	pmd = pmd_alloc(pud, vaddr);
> +	return pmd;
> +}
> +
> +/*
> + * Get the pte (page) DAT table entry for the given address and pmd,
> + * allocating it if necessary.
> + * The pmd must not be large.
> + */
> +static inline pte_t *get_pte(pmd_t *pmd, uintptr_t vaddr)
> +{
> +	pte_t *pte;
> +
> +	assert(!pmd_large(*pmd));
> +	pte = pte_alloc(pmd, vaddr);
> +	return pte;
> +}
> +
> +/*
> + * Splits a large pmd (segment) DAT table entry into equivalent 4kB small
> + * pages.
> + * @pmd The pmd to split, it must be large.
> + * @va the virtual address corresponding to this pmd.
> + */
> +static void split_pmd(pmd_t *pmd, uintptr_t va)
> +{
> +	phys_addr_t pa = pmd_val(*pmd) & SEGMENT_ENTRY_SFAA;
> +	unsigned long i, prot;
> +	pte_t *pte;
> +
> +	assert(pmd_large(*pmd));
> +	pte = alloc_pages(PAGE_TABLE_ORDER);
> +	prot = pmd_val(*pmd) & (SEGMENT_ENTRY_IEP | SEGMENT_ENTRY_P);
> +	for (i = 0; i < PAGE_TABLE_ENTRIES; i++)
> +		pte_val(pte[i]) =  pa | PAGE_SIZE * i | prot;
> +	idte_pmdp(va, &pmd_val(*pmd));
> +	pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT;
> +
> +}
> +
> +/*
> + * Splits a huge pud (region 3) DAT table entry into equivalent 1MB large
> + * pages.
> + * @pud The pud to split, it must be huge.
> + * @va the virtual address corresponding to this pud.
> + */
> +static void split_pud(pud_t *pud, uintptr_t va)
> +{
> +	phys_addr_t pa = pud_val(*pud) & REGION3_ENTRY_RFAA;
> +	unsigned long i, prot;
> +	pmd_t *pmd;
> +
> +	assert(pud_huge(*pud));
> +	pmd = alloc_pages(SEGMENT_TABLE_ORDER);
> +	prot = pud_val(*pud) & (REGION3_ENTRY_IEP | REGION_ENTRY_P);
> +	for (i = 0; i < SEGMENT_TABLE_ENTRIES; i++)
> +		pmd_val(pmd[i]) =  pa | SZ_1M * i | prot | SEGMENT_ENTRY_FC | SEGMENT_ENTRY_TT_SEGMENT;
> +	idte_pudp(va, &pud_val(*pud));
> +	pud_val(*pud) = __pa(pmd) | REGION_ENTRY_TT_REGION3 | REGION_TABLE_LENGTH;
> +}
> +
> +void *get_dat_entry(pgd_t *pgtable, void *vaddr, enum pgt_level level)
> +{
> +	uintptr_t va = (uintptr_t)vaddr;
> +	pgd_t *pgd;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +
> +	assert(level && (level <= 5));
> +	pgd = pgd_offset(pgtable, va);
> +	if (level == pgtable_level_pgd)
> +		return pgd;
> +	p4d = p4d_alloc(pgd, va);
> +	if (level == pgtable_level_p4d)
> +		return p4d;
> +	pud = pud_alloc(p4d, va);
> +
> +	if (level == pgtable_level_pud)
> +		return pud;
> +	if (!pud_none(*pud) && pud_huge(*pud))
> +		split_pud(pud, va);
> +	pmd = get_pmd(pud, va);
> +	if (level == pgtable_level_pmd)
> +		return pmd;
> +	if (!pmd_none(*pmd) && pmd_large(*pmd))
> +		split_pmd(pmd, va);
> +	return get_pte(pmd, va);
> +}
> +
> +void *split_page(pgd_t *pgtable, void *vaddr, enum pgt_level level)
> +{
> +	assert((level >= 3) && (level <= 5));
> +	return get_dat_entry(pgtable ? pgtable : table_root, vaddr, level);
>  }
>  
>  phys_addr_t virt_to_pte_phys(pgd_t *pgtable, void *vaddr)
>  {
> -	return (*get_pte(pgtable, (uintptr_t)vaddr) & PAGE_MASK) +
> -	       ((unsigned long)vaddr & ~PAGE_MASK);
> +	uintptr_t va = (uintptr_t)vaddr;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pte_t *pte;
> +
> +	pud = get_pud(pgtable, va);
> +	if (pud_huge(*pud))
> +		return (pud_val(*pud) & REGION3_ENTRY_RFAA) | (va & ~REGION3_ENTRY_RFAA);
> +	pmd = get_pmd(pud, va);
> +	if (pmd_large(*pmd))
> +		return (pmd_val(*pmd) & SEGMENT_ENTRY_SFAA) | (va & ~SEGMENT_ENTRY_SFAA);
> +	pte = get_pte(pmd, va);
> +	return (pte_val(*pte) & PAGE_MASK) | (va & ~PAGE_MASK);
> +}
> +
> +/*
> + * Get the DAT table entry of the given level for the given address,
> + * splitting if necessary. If the entry was not invalid, invalidate it, and
> + * return the pointer to the entry and, if requested, its old value.
> + * @pgtable root of the page tables
> + * @vaddr virtual address
> + * @level 3 (for 2GB pud), 4 (for 1MB pmd) or 5 (for 4kB pages)
> + * @old if not NULL, will be written with the old value of the DAT table
> + * entry before invalidation
> + */
> +static void *dat_get_and_invalidate(pgd_t *pgtable, void *vaddr, enum pgt_level level, unsigned long *old)
> +{
> +	unsigned long va = (unsigned long)vaddr;
> +	void *ptr;
> +
> +	ptr = get_dat_entry(pgtable, vaddr, level);
> +	if (old)
> +		*old = *(unsigned long *)ptr;
> +	if ((level == pgtable_level_pgd) && !pgd_none(*(pgd_t *)ptr))
> +		idte_pgdp(va, ptr);
> +	else if ((level == pgtable_level_p4d) && !p4d_none(*(p4d_t *)ptr))
> +		idte_p4dp(va, ptr);
> +	else if ((level == pgtable_level_pud) && !pud_none(*(pud_t *)ptr))
> +		idte_pudp(va, ptr);
> +	else if ((level == pgtable_level_pmd) && !pmd_none(*(pmd_t *)ptr))
> +		idte_pmdp(va, ptr);
> +	else if (!pte_none(*(pte_t *)ptr))
> +		ipte(va, ptr);
> +	return ptr;
>  }
>  
> -static pteval_t *set_pte(pgd_t *pgtable, pteval_t val, void *vaddr)
> +static void cleanup_pmd(pmd_t *pmd)
>  {
> -	pteval_t *p_pte = get_pte(pgtable, (uintptr_t)vaddr);
> +	/* was invalid or large, nothing to do */
> +	if (pmd_none(*pmd) || pmd_large(*pmd))
> +		return;
> +	/* was not large, free the corresponding page table */
> +	free_pages((void *)(pmd_val(*pmd) & PAGE_MASK));
> +}
>  
> -	/* first flush the old entry (if we're replacing anything) */
> -	if (!(*p_pte & PAGE_ENTRY_I))
> -		ipte((uintptr_t)vaddr, p_pte);
> +static void cleanup_pud(pud_t *pud)
> +{
> +	unsigned long i;
> +	pmd_t *pmd;
>  
> -	*p_pte = val;
> -	return p_pte;
> +	/* was invalid or large, nothing to do */
> +	if (pud_none(*pud) || pud_huge(*pud))
> +		return;
> +	/* recursively clean up all pmds if needed */
> +	pmd = (pmd_t *)(pud_val(*pud) & PAGE_MASK);
> +	for (i = 0; i < SEGMENT_TABLE_ENTRIES; i++)
> +		cleanup_pmd(pmd + i);
> +	/* free the corresponding segment table */
> +	free_pages(pmd);
> +}
> +
> +/*
> + * Set the DAT entry for the given level of the given virtual address. If a
> + * mapping already existed, it is overwritten. If an existing mapping with
> + * smaller pages existed, all the lower tables are freed.
> + * Returns the pointer to the DAT table entry.
> + * @pgtable root of the page tables
> + * @val the new value for the DAT table entry
> + * @vaddr the virtual address
> + * @level 3 for pud (region 3), 4 for pmd (segment) and 5 for pte (pages)
> + */
> +static void *set_dat_entry(pgd_t *pgtable, unsigned long val, void *vaddr, enum pgt_level level)
> +{
> +	unsigned long old, *res;
> +
> +	res = dat_get_and_invalidate(pgtable, vaddr, level, &old);
> +	if (level == pgtable_level_pmd)
> +		cleanup_pmd((pmd_t *)&old);
> +	if (level == pgtable_level_pud)
> +		cleanup_pud((pud_t *)&old);
> +	*res = val;
> +	return res;
>  }
>  
>  pteval_t *install_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr)
>  {
> -	return set_pte(pgtable, __pa(phys), vaddr);
> +	assert(IS_ALIGNED(phys, PAGE_SIZE));
> +	assert(IS_ALIGNED((uintptr_t)vaddr, PAGE_SIZE));
> +	return set_dat_entry(pgtable, phys, vaddr, pgtable_level_pte);
> +}
> +
> +pmdval_t *install_large_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr)
> +{
> +	assert(IS_ALIGNED(phys, SZ_1M));
> +	assert(IS_ALIGNED((uintptr_t)vaddr, SZ_1M));
> +	return set_dat_entry(pgtable, phys | SEGMENT_ENTRY_FC, vaddr, pgtable_level_pmd);
> +}
> +
> +pudval_t *install_huge_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr)
> +{
> +	assert(IS_ALIGNED(phys, SZ_2G));
> +	assert(IS_ALIGNED((uintptr_t)vaddr, SZ_2G));
> +	return set_dat_entry(pgtable, phys | REGION3_ENTRY_FC | REGION_ENTRY_TT_REGION3, vaddr, pgtable_level_pud);
>  }
>  
> -void protect_page(void *vaddr, unsigned long prot)
> +void protect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level)
>  {
> -	pteval_t *p_pte = get_pte(table_root, (uintptr_t)vaddr);
> -	pteval_t n_pte = *p_pte | prot;
> +	unsigned long old, *ptr;
>  
> -	set_pte(table_root, n_pte, vaddr);
> +	ptr = dat_get_and_invalidate(table_root, vaddr, level, &old);
> +	*ptr = old | prot;
>  }
>  
> -void unprotect_page(void *vaddr, unsigned long prot)
> +void unprotect_dat_entry(void *vaddr, unsigned long prot, enum pgt_level level)
>  {
> -	pteval_t *p_pte = get_pte(table_root, (uintptr_t)vaddr);
> -	pteval_t n_pte = *p_pte & ~prot;
> +	unsigned long old, *ptr;
>  
> -	set_pte(table_root, n_pte, vaddr);
> +	ptr = dat_get_and_invalidate(table_root, vaddr, level, &old);
> +	*ptr = old & ~prot;
>  }
>  
>  void protect_range(void *start, unsigned long len, unsigned long prot)
> @@ -102,7 +316,7 @@ void protect_range(void *start, unsigned long len, unsigned long prot)
>  
>  	len &= PAGE_MASK;
>  	for (; len; len -= PAGE_SIZE, curr += PAGE_SIZE)
> -		protect_page((void *)curr, prot);
> +		protect_dat_entry((void *)curr, prot, 5);
>  }
>  
>  void unprotect_range(void *start, unsigned long len, unsigned long prot)
> @@ -111,7 +325,7 @@ void unprotect_range(void *start, unsigned long len, unsigned long prot)
>  
>  	len &= PAGE_MASK;
>  	for (; len; len -= PAGE_SIZE, curr += PAGE_SIZE)
> -		unprotect_page((void *)curr, prot);
> +		unprotect_dat_entry((void *)curr, prot, 5);
>  }
>  
>  static void setup_identity(pgd_t *pgtable, phys_addr_t start_addr,
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages
  2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
                   ` (6 preceding siblings ...)
  2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 7/7] s390x: edat test Claudio Imbrenda
@ 2021-06-18  7:36 ` Janosch Frank
  7 siblings, 0 replies; 10+ messages in thread
From: Janosch Frank @ 2021-06-18  7:36 UTC (permalink / raw)
  To: Claudio Imbrenda, kvm; +Cc: linux-s390, david, thuth, cohuck

On 6/11/21 4:06 PM, Claudio Imbrenda wrote:
> Introduce support for large (1M) and huge (2G) pages.
> 
> Add a simple testcase for EDAT1 and EDAT2.
> 
> v4->v5
> * fixed some typos and comment style issues
> * introduced enum pgt_level, switched all functions to use it
> 
> v3->v4
> * replace macros in patch 5 with a union representing TEID fields
> * clear the teid in expect_pgm_int and clear_pgm_int
> * update testcase to use expect_pgm_int, remove expect_dat_fault
> * update testcase to use teid union
> 
> v2->v3
> * Add proper macros for control register bits
> * Improved patch titles and descriptions
> * Moved definition of TEID bits to library
> * Rebased on the lastest upstream branch
> 
> v1->v2
> 
> * split patch 2 -> new patch 2 and new patch 3
> * patch 2: fixes pgtable.h, also fixes wrong usage of REGION_TABLE_LENGTH
>   instead of SEGMENT_TABLE_LENGTH
> * patch 3: introduces new macros and functions for large pages
> * patch 4: remove erroneous double call to pte_alloc in get_pte
> * patch 4: added comment in mmu.c to bridge the s390x architecural names
>   with the Linux ones used in the kvm-unit-tests
> * patch 5: added and fixed lots of comments to explain what's going on
> * patch 5: set FC for region 3 after writing the canary, like for segments
> * patch 5: use uintptr_t instead of intptr_t for set_prefix
> * patch 5: introduce new macro PGD_PAGE_SHIFT instead of using magic value 41
> * patch 5: use VIRT(0) instead of mem to make it more clear what we are
>   doing, even though VIRT(0) expands to mem


Thanks, picked

> 
> 
> Claudio Imbrenda (7):
>   s390x: lib: add and use macros for control register bits
>   libcflat: add SZ_1M and SZ_2G
>   s390x: lib: fix pgtable.h
>   s390x: lib: Add idte and other huge pages functions/macros
>   s390x: lib: add teid union and clear teid from lowcore
>   s390x: mmu: add support for large pages
>   s390x: edat test
> 
>  s390x/Makefile            |   1 +
>  lib/s390x/asm/arch_def.h  |  12 ++
>  lib/s390x/asm/float.h     |   4 +-
>  lib/s390x/asm/interrupt.h |  28 +++-
>  lib/s390x/asm/pgtable.h   |  44 +++++-
>  lib/libcflat.h            |   2 +
>  lib/s390x/mmu.h           |  84 +++++++++++-
>  lib/s390x/interrupt.c     |   2 +
>  lib/s390x/mmu.c           | 262 ++++++++++++++++++++++++++++++++----
>  lib/s390x/sclp.c          |   4 +-
>  s390x/diag288.c           |   2 +-
>  s390x/edat.c              | 274 ++++++++++++++++++++++++++++++++++++++
>  s390x/gs.c                |   2 +-
>  s390x/iep.c               |   4 +-
>  s390x/skrf.c              |   2 +-
>  s390x/smp.c               |   8 +-
>  s390x/vector.c            |   2 +-
>  s390x/unittests.cfg       |   3 +
>  18 files changed, 691 insertions(+), 49 deletions(-)
>  create mode 100644 s390x/edat.c
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-06-18  7:36 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-11 14:06 [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Claudio Imbrenda
2021-06-11 14:06 ` [kvm-unit-tests PATCH v5 1/7] s390x: lib: add and use macros for control register bits Claudio Imbrenda
2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 2/7] libcflat: add SZ_1M and SZ_2G Claudio Imbrenda
2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 3/7] s390x: lib: fix pgtable.h Claudio Imbrenda
2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 4/7] s390x: lib: Add idte and other huge pages functions/macros Claudio Imbrenda
2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 5/7] s390x: lib: add teid union and clear teid from lowcore Claudio Imbrenda
2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 6/7] s390x: mmu: add support for large pages Claudio Imbrenda
2021-06-18  7:36   ` Janosch Frank
2021-06-11 14:07 ` [kvm-unit-tests PATCH v5 7/7] s390x: edat test Claudio Imbrenda
2021-06-18  7:36 ` [kvm-unit-tests PATCH v5 0/7] s390: Add support for large pages Janosch Frank

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.