* [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE
@ 2021-02-02 18:00 Claudio Imbrenda
2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda
2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda
0 siblings, 2 replies; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-02 18:00 UTC (permalink / raw)
To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390
The current handling of the MVPG instruction when executed in a nested
guest is wrong, and can lead to the nested guest hanging.
This patchset fixes the behaviour to be more architecturally correct,
and fixes the hangs observed.
v1->v2
* complete rewrite
Claudio Imbrenda (2):
s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
s390/kvm: VSIE: correctly handle MVPG when in VSIE
arch/s390/kvm/gaccess.c | 26 ++++++++--
arch/s390/kvm/gaccess.h | 5 +-
arch/s390/kvm/vsie.c | 102 ++++++++++++++++++++++++++++++++++++----
3 files changed, 119 insertions(+), 14 deletions(-)
--
2.26.2
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
2021-02-02 18:00 [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE Claudio Imbrenda
@ 2021-02-02 18:00 ` Claudio Imbrenda
2021-02-04 16:34 ` Janosch Frank
2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda
1 sibling, 1 reply; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-02 18:00 UTC (permalink / raw)
To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390, stable
Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
DAT table entry, or to the invalid entry.
Also return some flags in the lower bits of the address:
DAT_PROT: indicates that DAT protection applies because of the
protection bit in the segment (or, if EDAT, region) tables
NOT_PTE: indicates that the address of the DAT table entry returned
does not refer to a PTE, but to a segment or region table.
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: stable@vger.kernel.org
---
arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
arch/s390/kvm/gaccess.h | 5 ++++-
arch/s390/kvm/vsie.c | 8 ++++----
3 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6d6b57059493..2d7bcbfb185e 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
rfte.val = ptr;
goto shadow_r2t;
}
+ *pgt = ptr + vaddr.rfx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
if (rc)
return rc;
@@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
rste.val = ptr;
goto shadow_r3t;
}
+ *pgt = ptr + vaddr.rsx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
if (rc)
return rc;
@@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
rtte.val = ptr;
goto shadow_sgt;
}
+ *pgt = ptr + vaddr.rtx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
if (rc)
return rc;
@@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
ste.val = ptr;
goto shadow_pgt;
}
+ *pgt = ptr + vaddr.sx * 8;
rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
if (rc)
return rc;
@@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
* @saddr: faulting address in the shadow gmap
+ * @pteptr: will contain the address of the faulting DAT table entry, or of
+ * the valid leaf, plus some flags
*
* Returns: - 0 if the shadow fault was successfully resolved
* - > 0 (pgm exception code) on exceptions while faulting
@@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
* - -ENOMEM if out of memory
*/
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
- unsigned long saddr)
+ unsigned long saddr, unsigned long *pteptr)
{
union vaddress vaddr;
union page_table_entry pte;
- unsigned long pgt;
+ unsigned long pgt = 0;
int dat_protection, fake;
int rc;
@@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
pte.val = pgt + vaddr.px * PAGE_SIZE;
goto shadow_page;
}
- if (!rc)
- rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+
+ switch (rc) {
+ case PGM_SEGMENT_TRANSLATION:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_FIRST_TRANS:
+ pgt |= NOT_PTE;
+ break;
+ case 0:
+ pgt += vaddr.px * 8;
+ rc = gmap_read_table(sg->parent, pgt, &pte.val);
+ }
+ if (*pteptr)
+ *pteptr = pgt | dat_protection * DAT_PROT;
if (!rc && pte.i)
rc = PGM_PAGE_TRANSLATION;
if (!rc && pte.z)
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index f4c51756c462..66a6e2cec97a 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
int ipte_lock_held(struct kvm_vcpu *vcpu);
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
+#define DAT_PROT 2
+#define NOT_PTE 4
+
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
- unsigned long saddr);
+ unsigned long saddr, unsigned long *pteptr);
#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index c5d0a58b2c29..7db022141db3 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
/* with mso/msl, the prefix lies at offset *mso* */
prefix += scb_s->mso;
- rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+ rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
if (!rc && (scb_s->ecb & ECB_TE))
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- prefix + PAGE_SIZE);
+ prefix + PAGE_SIZE, NULL);
/*
* We don't have to mprotect, we will be called for all unshadows.
* SIE will detect if protection applies and trigger a validity.
@@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
current->thread.gmap_addr, 1);
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- current->thread.gmap_addr);
+ current->thread.gmap_addr, NULL);
if (rc > 0) {
rc = inject_fault(vcpu, rc,
current->thread.gmap_addr,
@@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,
{
if (vsie_page->fault_addr)
kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- vsie_page->fault_addr);
+ vsie_page->fault_addr, NULL);
vsie_page->fault_addr = 0;
}
--
2.26.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE
2021-02-02 18:00 [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE Claudio Imbrenda
2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda
@ 2021-02-02 18:00 ` Claudio Imbrenda
2021-02-03 10:36 ` Claudio Imbrenda
2021-02-04 17:10 ` Janosch Frank
1 sibling, 2 replies; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-02 18:00 UTC (permalink / raw)
To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390, stable
Correctly handle the MVPG instruction when issued by a VSIE guest.
Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested virtualization")
Cc: stable@vger.kernel.org
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
arch/s390/kvm/vsie.c | 94 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 89 insertions(+), 5 deletions(-)
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 7db022141db3..2db49749e27b 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
memcpy((void *)((u64)scb_o + 0xc0),
(void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
break;
- case ICPT_PARTEXEC:
- /* MVPG only */
- memcpy((void *)((u64)scb_o + 0xc0),
- (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
- break;
}
if (scb_s->ihcpu != 0xffffU)
@@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return 0;
}
+static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
+{
+ reg &= 0xf;
+ switch (reg) {
+ case 15:
+ return vsie_page->scb_s.gg15;
+ case 14:
+ return vsie_page->scb_s.gg14;
+ default:
+ return vcpu->run->s.regs.gprs[reg];
+ }
+}
+
+static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ unsigned long pei1, pei2, src, dest, mask = PAGE_MASK;
+ u64 *pei_block = &vsie_page->scb_o->mcic;
+ int edat, rc1, rc2;
+ union ctlreg0 cr0;
+
+ cr0.val = vcpu->arch.sie_block->gcr[0];
+ edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+ if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT)
+ mask = 0xfff000;
+ else if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_31BIT)
+ mask = 0x7ffff000;
+
+ dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
+ src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
+
+ rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei1);
+ rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei2);
+ /*
+ * Either everything went well, or something non-critical went wrong
+ * e.g. beause of a race. In either case, simply retry.
+ */
+ if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) {
+ retry_vsie_icpt(vsie_page);
+ return -EAGAIN;
+ }
+ /* Something more serious went wrong, propagate the error */
+ if (rc1 < 0)
+ return rc1;
+ if (rc2 < 0)
+ return rc2;
+
+ /* The only possible suppressing exception: just deliver it */
+ if (rc1 == PGM_TRANSLATION_SPEC || rc2 == PGM_TRANSLATION_SPEC) {
+ clear_vsie_icpt(vsie_page);
+ rc1 = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
+ WARN_ON_ONCE(rc1);
+ return 1;
+ }
+
+ /*
+ * Forward the PEI intercept to the guest if it was a page fault, or
+ * also for segment and region table faults if EDAT applies.
+ */
+ if (edat) {
+ rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0;
+ rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0;
+ }
+ if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2 == PGM_PAGE_TRANSLATION)) {
+ pei_block[0] = pei1;
+ pei_block[1] = pei2;
+ return 1;
+ }
+
+ retry_vsie_icpt(vsie_page);
+
+ /*
+ * The host has edat, and the guest does not, or it was an ASCE type
+ * exception. The host needs to inject the appropriate DAT interrupts
+ * into the guest.
+ */
+ if (rc1)
+ return inject_fault(vcpu, rc1, dest, 1);
+ if (rc2)
+ return inject_fault(vcpu, rc2, src, 0);
+
+ /* This should never be reached */
+ return 0;
+}
+
/*
* Run the vsie on a shadow scb and a shadow gmap, without any further
* sanity checks, handling SIE faults.
@@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if ((scb_s->ipa & 0xf000) != 0xf000)
scb_s->ipa += 0x1000;
break;
+ case ICPT_PARTEXEC:
+ if (scb_s->ipa == 0xb254)
+ rc = vsie_handle_mvpg(vcpu, vsie_page);
+ break;
}
return rc;
}
--
2.26.2
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE
2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda
@ 2021-02-03 10:36 ` Claudio Imbrenda
2021-02-04 17:10 ` Janosch Frank
1 sibling, 0 replies; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-03 10:36 UTC (permalink / raw)
To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390, stable
On Tue, 2 Feb 2021 19:00:28 +0100
Claudio Imbrenda <imbrenda@linux.ibm.com> wrote:
> Correctly handle the MVPG instruction when issued by a VSIE guest.
>
> Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested
> virtualization") Cc: stable@vger.kernel.org
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
> arch/s390/kvm/vsie.c | 94
> +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89
> insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index 7db022141db3..2db49749e27b 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu,
> struct vsie_page *vsie_page) memcpy((void *)((u64)scb_o + 0xc0),
> (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
> break;
> - case ICPT_PARTEXEC:
> - /* MVPG only */
> - memcpy((void *)((u64)scb_o + 0xc0),
> - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
> - break;
> }
>
> if (scb_s->ihcpu != 0xffffU)
> @@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu,
> struct vsie_page *vsie_page) return 0;
> }
>
> +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page
> *vsie_page, u8 reg) +{
> + reg &= 0xf;
> + switch (reg) {
> + case 15:
> + return vsie_page->scb_s.gg15;
> + case 14:
> + return vsie_page->scb_s.gg14;
> + default:
> + return vcpu->run->s.regs.gprs[reg];
> + }
> +}
> +
> +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page
> *vsie_page) +{
> + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
> + unsigned long pei1, pei2, src, dest, mask = PAGE_MASK;
> + u64 *pei_block = &vsie_page->scb_o->mcic;
> + int edat, rc1, rc2;
> + union ctlreg0 cr0;
> +
> + cr0.val = vcpu->arch.sie_block->gcr[0];
> + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
> + if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT)
> + mask = 0xfff000;
> + else if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_31BIT)
> + mask = 0x7ffff000;
> +
> + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16)
> & mask;
> + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) &
> mask; +
> + rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest,
> &pei1);
> + rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src,
> &pei2);
> + /*
> + * Either everything went well, or something non-critical
> went wrong
> + * e.g. beause of a race. In either case, simply retry.
> + */
> + if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) {
> + retry_vsie_icpt(vsie_page);
> + return -EAGAIN;
> + }
> + /* Something more serious went wrong, propagate the error */
> + if (rc1 < 0)
> + return rc1;
> + if (rc2 < 0)
> + return rc2;
> +
> + /* The only possible suppressing exception: just deliver it
> */
> + if (rc1 == PGM_TRANSLATION_SPEC || rc2 ==
> PGM_TRANSLATION_SPEC) {
> + clear_vsie_icpt(vsie_page);
> + rc1 = kvm_s390_inject_program_int(vcpu,
> PGM_TRANSLATION_SPEC);
> + WARN_ON_ONCE(rc1);
> + return 1;
> + }
> +
> + /*
> + * Forward the PEI intercept to the guest if it was a page
> fault, or
> + * also for segment and region table faults if EDAT applies.
> + */
> + if (edat) {
> + rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0;
> + rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0;
> + }
I just noticed, this should actually be:
if (edat) {
rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0;
rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0;
} else {
rc1 = rc1 != PGM_PAGE_TRANSLATION ? rc1 : 0;
rc2 = rc2 != PGM_PAGE_TRANSLATION ? rc2 : 0;
}
I'll fix it in the next version
> + if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2 ==
> PGM_PAGE_TRANSLATION)) {
> + pei_block[0] = pei1;
> + pei_block[1] = pei2;
> + return 1;
> + }
> +
> + retry_vsie_icpt(vsie_page);
> +
> + /*
> + * The host has edat, and the guest does not, or it was an
> ASCE type
> + * exception. The host needs to inject the appropriate DAT
> interrupts
> + * into the guest.
> + */
> + if (rc1)
> + return inject_fault(vcpu, rc1, dest, 1);
> + if (rc2)
> + return inject_fault(vcpu, rc2, src, 0);
> +
> + /* This should never be reached */
> + return 0;
> +}
> +
> /*
> * Run the vsie on a shadow scb and a shadow gmap, without any
> further
> * sanity checks, handling SIE faults.
> @@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu,
> struct vsie_page *vsie_page) if ((scb_s->ipa & 0xf000) != 0xf000)
> scb_s->ipa += 0x1000;
> break;
> + case ICPT_PARTEXEC:
> + if (scb_s->ipa == 0xb254)
> + rc = vsie_handle_mvpg(vcpu, vsie_page);
> + break;
> }
> return rc;
> }
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda
@ 2021-02-04 16:34 ` Janosch Frank
2021-02-04 17:05 ` Janosch Frank
2021-02-05 12:15 ` Claudio Imbrenda
0 siblings, 2 replies; 12+ messages in thread
From: Janosch Frank @ 2021-02-04 16:34 UTC (permalink / raw)
To: Claudio Imbrenda, linux-kernel
Cc: borntraeger, david, kvm, linux-s390, stable
On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
> DAT table entry, or to the invalid entry.
>
> Also return some flags in the lower bits of the address:
> DAT_PROT: indicates that DAT protection applies because of the
> protection bit in the segment (or, if EDAT, region) tables
> NOT_PTE: indicates that the address of the DAT table entry returned
> does not refer to a PTE, but to a segment or region table.
>
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> Cc: stable@vger.kernel.org
> ---
> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
> arch/s390/kvm/gaccess.h | 5 ++++-
> arch/s390/kvm/vsie.c | 8 ++++----
> 3 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index 6d6b57059493..2d7bcbfb185e 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> rfte.val = ptr;
> goto shadow_r2t;
> }
> + *pgt = ptr + vaddr.rfx * 8;
So pgt either is a table entry if rc > 0 or a pointer to the first pte
on rc == 0 after this change?
Hrm, if it is really based on RCs than I might be able to come to terms
with having two things in a ptr with the name pgt. But it needs a
comment change.
> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
> if (rc)
> return rc;
> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> rste.val = ptr;
> goto shadow_r3t;
> }
> + *pgt = ptr + vaddr.rsx * 8;
> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
> if (rc)
> return rc;
> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> rtte.val = ptr;
> goto shadow_sgt;
> }
> + *pgt = ptr + vaddr.rtx * 8;
> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
> if (rc)
> return rc;
> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> ste.val = ptr;
> goto shadow_pgt;
> }
> + *pgt = ptr + vaddr.sx * 8;
> rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
> if (rc)
> return rc;
> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> * @vcpu: virtual cpu
> * @sg: pointer to the shadow guest address space structure
> * @saddr: faulting address in the shadow gmap
> + * @pteptr: will contain the address of the faulting DAT table entry, or of
> + * the valid leaf, plus some flags
pteptr is not the right name if it can be two things
> *
> * Returns: - 0 if the shadow fault was successfully resolved
> * - > 0 (pgm exception code) on exceptions while faulting
> @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> * - -ENOMEM if out of memory
> */
> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> - unsigned long saddr)
> + unsigned long saddr, unsigned long *pteptr)
> {
> union vaddress vaddr;
> union page_table_entry pte;
> - unsigned long pgt;
> + unsigned long pgt = 0;
> int dat_protection, fake;
> int rc;
>
> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> pte.val = pgt + vaddr.px * PAGE_SIZE;
> goto shadow_page;
> }
> - if (!rc)
> - rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
> +
> + switch (rc) {
> + case PGM_SEGMENT_TRANSLATION:
> + case PGM_REGION_THIRD_TRANS:
> + case PGM_REGION_SECOND_TRANS:
> + case PGM_REGION_FIRST_TRANS:
> + pgt |= NOT_PTE;
GACC_TRANSL_ENTRY_INV ?
> + break;
> + case 0:
> + pgt += vaddr.px * 8;
> + rc = gmap_read_table(sg->parent, pgt, &pte.val);
> + }
> + if (*pteptr)
> + *pteptr = pgt | dat_protection * DAT_PROT;
> if (!rc && pte.i)
> rc = PGM_PAGE_TRANSLATION;
> if (!rc && pte.z)
> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> index f4c51756c462..66a6e2cec97a 100644
> --- a/arch/s390/kvm/gaccess.h
> +++ b/arch/s390/kvm/gaccess.h
> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
> int ipte_lock_held(struct kvm_vcpu *vcpu);
> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
>
> +#define DAT_PROT 2
GACC_TRANSL_ENTRY_PROT
> +#define NOT_PTE 4
> +
> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
> - unsigned long saddr);
> + unsigned long saddr, unsigned long *pteptr);
>
> #endif /* __KVM_S390_GACCESS_H */
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index c5d0a58b2c29..7db022141db3 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> /* with mso/msl, the prefix lies at offset *mso* */
> prefix += scb_s->mso;
>
> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
> if (!rc && (scb_s->ecb & ECB_TE))
> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> - prefix + PAGE_SIZE);
> + prefix + PAGE_SIZE, NULL);
> /*
> * We don't have to mprotect, we will be called for all unshadows.
> * SIE will detect if protection applies and trigger a validity.
> @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> current->thread.gmap_addr, 1);
>
> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> - current->thread.gmap_addr);
> + current->thread.gmap_addr, NULL);
> if (rc > 0) {
> rc = inject_fault(vcpu, rc,
> current->thread.gmap_addr,
> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,
> {
> if (vsie_page->fault_addr)
> kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> - vsie_page->fault_addr);
> + vsie_page->fault_addr, NULL);
Ok
> vsie_page->fault_addr = 0;
> }
>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
2021-02-04 16:34 ` Janosch Frank
@ 2021-02-04 17:05 ` Janosch Frank
2021-02-05 12:18 ` Claudio Imbrenda
2021-02-05 12:15 ` Claudio Imbrenda
1 sibling, 1 reply; 12+ messages in thread
From: Janosch Frank @ 2021-02-04 17:05 UTC (permalink / raw)
To: Claudio Imbrenda, linux-kernel
Cc: borntraeger, david, kvm, linux-s390, stable
On 2/4/21 5:34 PM, Janosch Frank wrote:
> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
>> DAT table entry, or to the invalid entry.
>>
>> Also return some flags in the lower bits of the address:
>> DAT_PROT: indicates that DAT protection applies because of the
>> protection bit in the segment (or, if EDAT, region) tables
>> NOT_PTE: indicates that the address of the DAT table entry returned
>> does not refer to a PTE, but to a segment or region table.
>>
>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>> Cc: stable@vger.kernel.org
>> ---
>> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
>> arch/s390/kvm/gaccess.h | 5 ++++-
>> arch/s390/kvm/vsie.c | 8 ++++----
>> 3 files changed, 30 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>> index 6d6b57059493..2d7bcbfb185e 100644
>> --- a/arch/s390/kvm/gaccess.c
>> +++ b/arch/s390/kvm/gaccess.c
>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>> rfte.val = ptr;
>> goto shadow_r2t;
>> }
>> + *pgt = ptr + vaddr.rfx * 8;
>
> So pgt either is a table entry if rc > 0 or a pointer to the first pte
> on rc == 0 after this change?
>
> Hrm, if it is really based on RCs than I might be able to come to terms
> with having two things in a ptr with the name pgt. But it needs a
> comment change.
>
>> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
>> if (rc)
>> return rc;
>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>> rste.val = ptr;
>> goto shadow_r3t;
>> }
>> + *pgt = ptr + vaddr.rsx * 8;
>> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
>> if (rc)
>> return rc;
>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>> rtte.val = ptr;
>> goto shadow_sgt;
>> }
>> + *pgt = ptr + vaddr.rtx * 8;
>> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
>> if (rc)
>> return rc;
>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>> ste.val = ptr;
>> goto shadow_pgt;
>> }
>> + *pgt = ptr + vaddr.sx * 8;
>> rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
>> if (rc)
>> return rc;
>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>> * @vcpu: virtual cpu
>> * @sg: pointer to the shadow guest address space structure
>> * @saddr: faulting address in the shadow gmap
>> + * @pteptr: will contain the address of the faulting DAT table entry, or of
>> + * the valid leaf, plus some flags
>
> pteptr is not the right name if it can be two things
You use it for pei only, right?
>
>> *
>> * Returns: - 0 if the shadow fault was successfully resolved
>> * - > 0 (pgm exception code) on exceptions while faulting
>> @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>> * - -ENOMEM if out of memory
>> */
>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
>> - unsigned long saddr)
>> + unsigned long saddr, unsigned long *pteptr)
>> {
>> union vaddress vaddr;
>> union page_table_entry pte;
>> - unsigned long pgt;
>> + unsigned long pgt = 0;
>> int dat_protection, fake;
>> int rc;
>>
>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
>> pte.val = pgt + vaddr.px * PAGE_SIZE;
>> goto shadow_page;
>> }
>> - if (!rc)
>> - rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
>> +
>> + switch (rc) {
>> + case PGM_SEGMENT_TRANSLATION:
>> + case PGM_REGION_THIRD_TRANS:
>> + case PGM_REGION_SECOND_TRANS:
>> + case PGM_REGION_FIRST_TRANS:
>> + pgt |= NOT_PTE;
>
> GACC_TRANSL_ENTRY_INV ?
>
>> + break;
>> + case 0:
>> + pgt += vaddr.px * 8;
>> + rc = gmap_read_table(sg->parent, pgt, &pte.val);
>> + }
>> + if (*pteptr)
>> + *pteptr = pgt | dat_protection * DAT_PROT;
>> if (!rc && pte.i)
>> rc = PGM_PAGE_TRANSLATION;
>> if (!rc && pte.z)
>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
>> index f4c51756c462..66a6e2cec97a 100644
>> --- a/arch/s390/kvm/gaccess.h
>> +++ b/arch/s390/kvm/gaccess.h
>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
>> int ipte_lock_held(struct kvm_vcpu *vcpu);
>> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
>>
>> +#define DAT_PROT 2
>
> GACC_TRANSL_ENTRY_PROT
Ok after a second pass that's not what's going on here.
Those basically directly correspond to the MVPG PEI indication bits, right?
Do we also need to consider bit 63?
>
>> +#define NOT_PTE 4
>> +
>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
>> - unsigned long saddr);
>> + unsigned long saddr, unsigned long *pteptr);
>>
>> #endif /* __KVM_S390_GACCESS_H */
>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
>> index c5d0a58b2c29..7db022141db3 100644
>> --- a/arch/s390/kvm/vsie.c
>> +++ b/arch/s390/kvm/vsie.c
>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>> /* with mso/msl, the prefix lies at offset *mso* */
>> prefix += scb_s->mso;
>>
>> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
>> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
>> if (!rc && (scb_s->ecb & ECB_TE))
>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>> - prefix + PAGE_SIZE);
>> + prefix + PAGE_SIZE, NULL);
>> /*
>> * We don't have to mprotect, we will be called for all unshadows.
>> * SIE will detect if protection applies and trigger a validity.
>> @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>> current->thread.gmap_addr, 1);
>>
>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>> - current->thread.gmap_addr);
>> + current->thread.gmap_addr, NULL);
>> if (rc > 0) {
>> rc = inject_fault(vcpu, rc,
>> current->thread.gmap_addr,
>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,
>> {
>> if (vsie_page->fault_addr)
>> kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>> - vsie_page->fault_addr);
>> + vsie_page->fault_addr, NULL);
>
> Ok
>
>> vsie_page->fault_addr = 0;
>> }
>>
>>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE
2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda
2021-02-03 10:36 ` Claudio Imbrenda
@ 2021-02-04 17:10 ` Janosch Frank
2021-02-05 12:20 ` Claudio Imbrenda
1 sibling, 1 reply; 12+ messages in thread
From: Janosch Frank @ 2021-02-04 17:10 UTC (permalink / raw)
To: Claudio Imbrenda, linux-kernel
Cc: borntraeger, david, kvm, linux-s390, stable
On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> Correctly handle the MVPG instruction when issued by a VSIE guest.
>
> Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested virtualization")
> Cc: stable@vger.kernel.org
> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
So far the patch looks ok to me and way better to understand than v1,
good job
> ---
> arch/s390/kvm/vsie.c | 94 +++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 89 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index 7db022141db3..2db49749e27b 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> memcpy((void *)((u64)scb_o + 0xc0),
> (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
Magic offsets being magic
Another item for my todo list.
> break;
> - case ICPT_PARTEXEC:
> - /* MVPG only */
> - memcpy((void *)((u64)scb_o + 0xc0),
> - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
> - break;
> }
>
> if (scb_s->ihcpu != 0xffffU)
> @@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> return 0;
> }
>
> +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
> +{
> + reg &= 0xf;
> + switch (reg) {
> + case 15:
> + return vsie_page->scb_s.gg15;
> + case 14:
> + return vsie_page->scb_s.gg14;
> + default:
> + return vcpu->run->s.regs.gprs[reg];
> + }
> +}
> +
> +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> +{
> + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
> + unsigned long pei1, pei2, src, dest, mask = PAGE_MASK;
> + u64 *pei_block = &vsie_page->scb_o->mcic;
> + int edat, rc1, rc2;
Can use a src/dst prefix or suffix please?
1/2 is confusing.
> + union ctlreg0 cr0;
> +
> + cr0.val = vcpu->arch.sie_block->gcr[0];
> + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
> + if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT)
> + mask = 0xfff000;
> + else if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_31BIT)
> + mask = 0x7ffff000;
> +
> + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
> + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
> +
> + rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei1);
> + rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei2);
> + /*
> + * Either everything went well, or something non-critical went wrong
> + * e.g. beause of a race. In either case, simply retry.
> + */
> + if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) {
> + retry_vsie_icpt(vsie_page);
> + return -EAGAIN;
> + }
> + /* Something more serious went wrong, propagate the error */
> + if (rc1 < 0)
> + return rc1;
> + if (rc2 < 0)
> + return rc2;
> +
> + /* The only possible suppressing exception: just deliver it */
> + if (rc1 == PGM_TRANSLATION_SPEC || rc2 == PGM_TRANSLATION_SPEC) {
> + clear_vsie_icpt(vsie_page);
> + rc1 = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
> + WARN_ON_ONCE(rc1);
> + return 1;
> + }
> +
> + /*
> + * Forward the PEI intercept to the guest if it was a page fault, or
> + * also for segment and region table faults if EDAT applies.
> + */
> + if (edat) {
> + rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0;
> + rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0;
> + }
> + if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2 == PGM_PAGE_TRANSLATION)) {
> + pei_block[0] = pei1;
> + pei_block[1] = pei2;
> + return 1;
> + }
> +
> + retry_vsie_icpt(vsie_page);
> +
> + /*
> + * The host has edat, and the guest does not, or it was an ASCE type
> + * exception. The host needs to inject the appropriate DAT interrupts
> + * into the guest.
> + */
> + if (rc1)
> + return inject_fault(vcpu, rc1, dest, 1);
> + if (rc2)> + return inject_fault(vcpu, rc2, src, 0);
> +
> + /* This should never be reached */
BUG()?
> + return 0;
> +}
> +
> /*
> * Run the vsie on a shadow scb and a shadow gmap, without any further
> * sanity checks, handling SIE faults.
> @@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> if ((scb_s->ipa & 0xf000) != 0xf000)
> scb_s->ipa += 0x1000;
> break;
> + case ICPT_PARTEXEC:
> + if (scb_s->ipa == 0xb254)
> + rc = vsie_handle_mvpg(vcpu, vsie_page);
> + break;
> }
> return rc;
> }
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
2021-02-04 16:34 ` Janosch Frank
2021-02-04 17:05 ` Janosch Frank
@ 2021-02-05 12:15 ` Claudio Imbrenda
2021-02-05 12:56 ` Janosch Frank
1 sibling, 1 reply; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-05 12:15 UTC (permalink / raw)
To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable
On Thu, 4 Feb 2021 17:34:00 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:
> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> > Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
> > DAT table entry, or to the invalid entry.
> >
> > Also return some flags in the lower bits of the address:
> > DAT_PROT: indicates that DAT protection applies because of the
> > protection bit in the segment (or, if EDAT, region) tables
> > NOT_PTE: indicates that the address of the DAT table entry returned
> > does not refer to a PTE, but to a segment or region table.
> >
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > Cc: stable@vger.kernel.org
> > ---
> > arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
> > arch/s390/kvm/gaccess.h | 5 ++++-
> > arch/s390/kvm/vsie.c | 8 ++++----
> > 3 files changed, 30 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> > index 6d6b57059493..2d7bcbfb185e 100644
> > --- a/arch/s390/kvm/gaccess.c
> > +++ b/arch/s390/kvm/gaccess.c
> > @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, rfte.val = ptr;
> > goto shadow_r2t;
> > }
> > + *pgt = ptr + vaddr.rfx * 8;
>
> So pgt either is a table entry if rc > 0 or a pointer to the first pte
> on rc == 0 after this change?
yes
> Hrm, if it is really based on RCs than I might be able to come to
> terms with having two things in a ptr with the name pgt. But it needs
> a comment change.
will do.
> > rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
> > &rfte.val); if (rc)
> > return rc;
> > @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, rste.val = ptr;
> > goto shadow_r3t;
> > }
> > + *pgt = ptr + vaddr.rsx * 8;
> > rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
> > &rste.val); if (rc)
> > return rc;
> > @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, rtte.val = ptr;
> > goto shadow_sgt;
> > }
> > + *pgt = ptr + vaddr.rtx * 8;
> > rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
> > &rtte.val); if (rc)
> > return rc;
> > @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr, ste.val = ptr;
> > goto shadow_pgt;
> > }
> > + *pgt = ptr + vaddr.sx * 8;
> > rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
> > &ste.val); if (rc)
> > return rc;
> > @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap
> > *sg, unsigned long saddr,
> > * @vcpu: virtual cpu
> > * @sg: pointer to the shadow guest address space structure
> > * @saddr: faulting address in the shadow gmap
> > + * @pteptr: will contain the address of the faulting DAT table
> > entry, or of
> > + * the valid leaf, plus some flags
>
> pteptr is not the right name if it can be two things
it cannot be two things there, kvm_s390_shadow_fault always returns a
DAT _entry_ (pte, segment, region).
> > *
> > * Returns: - 0 if the shadow fault was successfully resolved
> > * - > 0 (pgm exception code) on exceptions while
> > faulting @@ -1165,11 +1171,11 @@ static int
> > kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> > * - -ENOMEM if out of memory
> > */
> > int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> > - unsigned long saddr)
> > + unsigned long saddr, unsigned long
> > *pteptr) {
> > union vaddress vaddr;
> > union page_table_entry pte;
> > - unsigned long pgt;
> > + unsigned long pgt = 0;
> > int dat_protection, fake;
> > int rc;
> >
> > @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
> > *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
> > goto shadow_page;
> > }
> > - if (!rc)
> > - rc = gmap_read_table(sg->parent, pgt + vaddr.px *
> > 8, &pte.val); +
> > + switch (rc) {
> > + case PGM_SEGMENT_TRANSLATION:
> > + case PGM_REGION_THIRD_TRANS:
> > + case PGM_REGION_SECOND_TRANS:
> > + case PGM_REGION_FIRST_TRANS:
> > + pgt |= NOT_PTE;
>
> GACC_TRANSL_ENTRY_INV ?
no, this is only for non-pte entries
> > + break;
> > + case 0:
> > + pgt += vaddr.px * 8;
> > + rc = gmap_read_table(sg->parent, pgt, &pte.val);
> > + }
> > + if (*pteptr)
> > + *pteptr = pgt | dat_protection * DAT_PROT;
> > if (!rc && pte.i)
> > rc = PGM_PAGE_TRANSLATION;
> > if (!rc && pte.z)
> > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> > index f4c51756c462..66a6e2cec97a 100644
> > --- a/arch/s390/kvm/gaccess.h
> > +++ b/arch/s390/kvm/gaccess.h
> > @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
> > int ipte_lock_held(struct kvm_vcpu *vcpu);
> > int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
> > unsigned long gra);
> > +#define DAT_PROT 2
>
> GACC_TRANSL_ENTRY_PROT
this is also only for non-pte entries
> > +#define NOT_PTE 4
> > +
> > int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
> > *shadow,
> > - unsigned long saddr);
> > + unsigned long saddr, unsigned long
> > *pteptr);
> > #endif /* __KVM_S390_GACCESS_H */
> > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> > index c5d0a58b2c29..7db022141db3 100644
> > --- a/arch/s390/kvm/vsie.c
> > +++ b/arch/s390/kvm/vsie.c
> > @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
> > struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
> > offset *mso* */ prefix += scb_s->mso;
> >
> > - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
> > + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
> > NULL); if (!rc && (scb_s->ecb & ECB_TE))
> > rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> > - prefix + PAGE_SIZE);
> > + prefix + PAGE_SIZE,
> > NULL); /*
> > * We don't have to mprotect, we will be called for all
> > unshadows.
> > * SIE will detect if protection applies and trigger a
> > validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
> > kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> > current->thread.gmap_addr, 1);
> > rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> > - current->thread.gmap_addr);
> > + current->thread.gmap_addr,
> > NULL); if (rc > 0) {
> > rc = inject_fault(vcpu, rc,
> > current->thread.gmap_addr,
> > @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
> > *vcpu, {
> > if (vsie_page->fault_addr)
> > kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> > - vsie_page->fault_addr);
> > + vsie_page->fault_addr,
> > NULL);
>
> Ok
>
> > vsie_page->fault_addr = 0;
> > }
> >
> >
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
2021-02-04 17:05 ` Janosch Frank
@ 2021-02-05 12:18 ` Claudio Imbrenda
0 siblings, 0 replies; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-05 12:18 UTC (permalink / raw)
To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable
On Thu, 4 Feb 2021 18:05:15 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:
> On 2/4/21 5:34 PM, Janosch Frank wrote:
> > On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> >> Extend kvm_s390_shadow_fault to return the pointer to the valid
> >> leaf DAT table entry, or to the invalid entry.
> >>
> >> Also return some flags in the lower bits of the address:
> >> DAT_PROT: indicates that DAT protection applies because of the
> >> protection bit in the segment (or, if EDAT, region)
> >> tables NOT_PTE: indicates that the address of the DAT table entry
> >> returned does not refer to a PTE, but to a segment or region table.
> >>
> >> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> >> Cc: stable@vger.kernel.org
> >> ---
> >> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
> >> arch/s390/kvm/gaccess.h | 5 ++++-
> >> arch/s390/kvm/vsie.c | 8 ++++----
> >> 3 files changed, 30 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> >> index 6d6b57059493..2d7bcbfb185e 100644
> >> --- a/arch/s390/kvm/gaccess.c
> >> +++ b/arch/s390/kvm/gaccess.c
> >> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, rfte.val = ptr;
> >> goto shadow_r2t;
> >> }
> >> + *pgt = ptr + vaddr.rfx * 8;
> >
> > So pgt either is a table entry if rc > 0 or a pointer to the first
> > pte on rc == 0 after this change?
> >
> > Hrm, if it is really based on RCs than I might be able to come to
> > terms with having two things in a ptr with the name pgt. But it
> > needs a comment change.
> >
> >> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
> >> &rfte.val); if (rc)
> >> return rc;
> >> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, rste.val = ptr;
> >> goto shadow_r3t;
> >> }
> >> + *pgt = ptr + vaddr.rsx * 8;
> >> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
> >> &rste.val); if (rc)
> >> return rc;
> >> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, rtte.val = ptr;
> >> goto shadow_sgt;
> >> }
> >> + *pgt = ptr + vaddr.rtx * 8;
> >> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
> >> &rtte.val); if (rc)
> >> return rc;
> >> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr, ste.val = ptr;
> >> goto shadow_pgt;
> >> }
> >> + *pgt = ptr + vaddr.sx * 8;
> >> rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
> >> &ste.val); if (rc)
> >> return rc;
> >> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct
> >> gmap *sg, unsigned long saddr,
> >> * @vcpu: virtual cpu
> >> * @sg: pointer to the shadow guest address space structure
> >> * @saddr: faulting address in the shadow gmap
> >> + * @pteptr: will contain the address of the faulting DAT table
> >> entry, or of
> >> + * the valid leaf, plus some flags
> >
> > pteptr is not the right name if it can be two things
>
> You use it for pei only, right?
yes
> >
> >> *
> >> * Returns: - 0 if the shadow fault was successfully resolved
> >> * - > 0 (pgm exception code) on exceptions while
> >> faulting @@ -1165,11 +1171,11 @@ static int
> >> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> >> * - -ENOMEM if out of memory
> >> */
> >> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> >> - unsigned long saddr)
> >> + unsigned long saddr, unsigned long
> >> *pteptr) {
> >> union vaddress vaddr;
> >> union page_table_entry pte;
> >> - unsigned long pgt;
> >> + unsigned long pgt = 0;
> >> int dat_protection, fake;
> >> int rc;
> >>
> >> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
> >> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
> >> goto shadow_page;
> >> }
> >> - if (!rc)
> >> - rc = gmap_read_table(sg->parent, pgt + vaddr.px *
> >> 8, &pte.val); +
> >> + switch (rc) {
> >> + case PGM_SEGMENT_TRANSLATION:
> >> + case PGM_REGION_THIRD_TRANS:
> >> + case PGM_REGION_SECOND_TRANS:
> >> + case PGM_REGION_FIRST_TRANS:
> >> + pgt |= NOT_PTE;
> >
> > GACC_TRANSL_ENTRY_INV ?
> >
> >> + break;
> >> + case 0:
> >> + pgt += vaddr.px * 8;
> >> + rc = gmap_read_table(sg->parent, pgt, &pte.val);
> >> + }
> >> + if (*pteptr)
> >> + *pteptr = pgt | dat_protection * DAT_PROT;
> >> if (!rc && pte.i)
> >> rc = PGM_PAGE_TRANSLATION;
> >> if (!rc && pte.z)
> >> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> >> index f4c51756c462..66a6e2cec97a 100644
> >> --- a/arch/s390/kvm/gaccess.h
> >> +++ b/arch/s390/kvm/gaccess.h
> >> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
> >> int ipte_lock_held(struct kvm_vcpu *vcpu);
> >> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
> >> unsigned long gra);
> >> +#define DAT_PROT 2
> >
> > GACC_TRANSL_ENTRY_PROT
>
> Ok after a second pass that's not what's going on here.
> Those basically directly correspond to the MVPG PEI indication bits,
> right?
yes :)
> Do we also need to consider bit 63?
no, that can only happen if a specific SIE feature is used, which KVM
neither uses nor supports for VSIE, so it cannot happen
> >
> >> +#define NOT_PTE 4
> >> +
> >> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
> >> *shadow,
> >> - unsigned long saddr);
> >> + unsigned long saddr, unsigned long
> >> *pteptr);
> >> #endif /* __KVM_S390_GACCESS_H */
> >> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> >> index c5d0a58b2c29..7db022141db3 100644
> >> --- a/arch/s390/kvm/vsie.c
> >> +++ b/arch/s390/kvm/vsie.c
> >> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
> >> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
> >> offset *mso* */ prefix += scb_s->mso;
> >>
> >> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
> >> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
> >> NULL); if (!rc && (scb_s->ecb & ECB_TE))
> >> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >> - prefix + PAGE_SIZE);
> >> + prefix + PAGE_SIZE,
> >> NULL); /*
> >> * We don't have to mprotect, we will be called for all
> >> unshadows.
> >> * SIE will detect if protection applies and trigger a
> >> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
> >> kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> >> current->thread.gmap_addr, 1);
> >> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >> - current->thread.gmap_addr);
> >> + current->thread.gmap_addr,
> >> NULL); if (rc > 0) {
> >> rc = inject_fault(vcpu, rc,
> >> current->thread.gmap_addr,
> >> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
> >> *vcpu, {
> >> if (vsie_page->fault_addr)
> >> kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >> - vsie_page->fault_addr);
> >> + vsie_page->fault_addr,
> >> NULL);
> >
> > Ok
> >
> >> vsie_page->fault_addr = 0;
> >> }
> >>
> >>
> >
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE
2021-02-04 17:10 ` Janosch Frank
@ 2021-02-05 12:20 ` Claudio Imbrenda
0 siblings, 0 replies; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-05 12:20 UTC (permalink / raw)
To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable
On Thu, 4 Feb 2021 18:10:01 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:
> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> > Correctly handle the MVPG instruction when issued by a VSIE guest.
> >
> > Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested
> > virtualization") Cc: stable@vger.kernel.org
> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>
> So far the patch looks ok to me and way better to understand than v1,
> good job
>
> > ---
> > arch/s390/kvm/vsie.c | 94
> > +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89
> > insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> > index 7db022141db3..2db49749e27b 100644
> > --- a/arch/s390/kvm/vsie.c
> > +++ b/arch/s390/kvm/vsie.c
> > @@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu
> > *vcpu, struct vsie_page *vsie_page) memcpy((void *)((u64)scb_o +
> > 0xc0), (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
>
> Magic offsets being magic
> Another item for my todo list.
>
> > break;
> > - case ICPT_PARTEXEC:
> > - /* MVPG only */
> > - memcpy((void *)((u64)scb_o + 0xc0),
> > - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
> > - break;
> > }
> >
> > if (scb_s->ihcpu != 0xffffU)
> > @@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu,
> > struct vsie_page *vsie_page) return 0;
> > }
> >
> > +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct
> > vsie_page *vsie_page, u8 reg) +{
> > + reg &= 0xf;
> > + switch (reg) {
> > + case 15:
> > + return vsie_page->scb_s.gg15;
> > + case 14:
> > + return vsie_page->scb_s.gg14;
> > + default:
> > + return vcpu->run->s.regs.gprs[reg];
> > + }
> > +}
> > +
> > +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct
> > vsie_page *vsie_page) +{
> > + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
> > + unsigned long pei1, pei2, src, dest, mask = PAGE_MASK;
> > + u64 *pei_block = &vsie_page->scb_o->mcic;
> > + int edat, rc1, rc2;
>
> Can use a src/dst prefix or suffix please?
> 1/2 is confusing.
will do
> > + union ctlreg0 cr0;
> > +
> > + cr0.val = vcpu->arch.sie_block->gcr[0];
> > + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
> > + if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT)
> > + mask = 0xfff000;
> > + else if (psw_bits(scb_s->gpsw).eaba ==
> > PSW_BITS_AMODE_31BIT)
> > + mask = 0x7ffff000;
> > +
> > + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >>
> > 16) & mask;
> > + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20)
> > & mask; +
> > + rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest,
> > &pei1);
> > + rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src,
> > &pei2);
> > + /*
> > + * Either everything went well, or something non-critical
> > went wrong
> > + * e.g. beause of a race. In either case, simply retry.
> > + */
> > + if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) {
> > + retry_vsie_icpt(vsie_page);
> > + return -EAGAIN;
> > + }
> > + /* Something more serious went wrong, propagate the error
> > */
> > + if (rc1 < 0)
> > + return rc1;
> > + if (rc2 < 0)
> > + return rc2;
> > +
> > + /* The only possible suppressing exception: just deliver
> > it */
> > + if (rc1 == PGM_TRANSLATION_SPEC || rc2 ==
> > PGM_TRANSLATION_SPEC) {
> > + clear_vsie_icpt(vsie_page);
> > + rc1 = kvm_s390_inject_program_int(vcpu,
> > PGM_TRANSLATION_SPEC);
> > + WARN_ON_ONCE(rc1);
> > + return 1;
> > + }
> > +
> > + /*
> > + * Forward the PEI intercept to the guest if it was a page
> > fault, or
> > + * also for segment and region table faults if EDAT
> > applies.
> > + */
> > + if (edat) {
> > + rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0;
> > + rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0;
> > + }
> > + if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2
> > == PGM_PAGE_TRANSLATION)) {
> > + pei_block[0] = pei1;
> > + pei_block[1] = pei2;
> > + return 1;
> > + }
> > +
> > + retry_vsie_icpt(vsie_page);
> > +
> > + /*
> > + * The host has edat, and the guest does not, or it was an
> > ASCE type
> > + * exception. The host needs to inject the appropriate DAT
> > interrupts
> > + * into the guest.
> > + */
> > + if (rc1)
> > + return inject_fault(vcpu, rc1, dest, 1);
> > + if (rc2)> + return inject_fault(vcpu, rc2,
> > src, 0); +
> > + /* This should never be reached */
>
> BUG()?
look at the code, if it's reached, it's a bug in the compiler :)
maybe I should rewrite it so that there won't be any unreachable code at
all
> > + return 0;
> > +}
> > +
> > /*
> > * Run the vsie on a shadow scb and a shadow gmap, without any
> > further
> > * sanity checks, handling SIE faults.
> > @@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu
> > *vcpu, struct vsie_page *vsie_page) if ((scb_s->ipa & 0xf000) !=
> > 0xf000) scb_s->ipa += 0x1000;
> > break;
> > + case ICPT_PARTEXEC:
> > + if (scb_s->ipa == 0xb254)
> > + rc = vsie_handle_mvpg(vcpu, vsie_page);
> > + break;
> > }
> > return rc;
> > }
> >
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
2021-02-05 12:15 ` Claudio Imbrenda
@ 2021-02-05 12:56 ` Janosch Frank
2021-02-05 14:05 ` Claudio Imbrenda
0 siblings, 1 reply; 12+ messages in thread
From: Janosch Frank @ 2021-02-05 12:56 UTC (permalink / raw)
To: Claudio Imbrenda
Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable
On 2/5/21 1:15 PM, Claudio Imbrenda wrote:
> On Thu, 4 Feb 2021 17:34:00 +0100
> Janosch Frank <frankja@linux.ibm.com> wrote:
>
>> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
>>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
>>> DAT table entry, or to the invalid entry.
>>>
>>> Also return some flags in the lower bits of the address:
>>> DAT_PROT: indicates that DAT protection applies because of the
>>> protection bit in the segment (or, if EDAT, region) tables
>>> NOT_PTE: indicates that the address of the DAT table entry returned
>>> does not refer to a PTE, but to a segment or region table.
>>>
>>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>> Cc: stable@vger.kernel.org
>>> ---
>>> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
>>> arch/s390/kvm/gaccess.h | 5 ++++-
>>> arch/s390/kvm/vsie.c | 8 ++++----
>>> 3 files changed, 30 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
>>> index 6d6b57059493..2d7bcbfb185e 100644
>>> --- a/arch/s390/kvm/gaccess.c
>>> +++ b/arch/s390/kvm/gaccess.c
>>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, rfte.val = ptr;
>>> goto shadow_r2t;
>>> }
>>> + *pgt = ptr + vaddr.rfx * 8;
>>
>> So pgt either is a table entry if rc > 0 or a pointer to the first pte
>> on rc == 0 after this change?
>
> yes
>
>> Hrm, if it is really based on RCs than I might be able to come to
>> terms with having two things in a ptr with the name pgt. But it needs
>> a comment change.
>
> will do.
>
>>> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
>>> &rfte.val); if (rc)
>>> return rc;
>>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, rste.val = ptr;
>>> goto shadow_r3t;
>>> }
>>> + *pgt = ptr + vaddr.rsx * 8;
>>> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
>>> &rste.val); if (rc)
>>> return rc;
>>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, rtte.val = ptr;
>>> goto shadow_sgt;
>>> }
>>> + *pgt = ptr + vaddr.rtx * 8;
>>> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
>>> &rtte.val); if (rc)
>>> return rc;
>>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr, ste.val = ptr;
>>> goto shadow_pgt;
>>> }
>>> + *pgt = ptr + vaddr.sx * 8;
>>> rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
>>> &ste.val); if (rc)
>>> return rc;
>>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap
>>> *sg, unsigned long saddr,
>>> * @vcpu: virtual cpu
>>> * @sg: pointer to the shadow guest address space structure
>>> * @saddr: faulting address in the shadow gmap
>>> + * @pteptr: will contain the address of the faulting DAT table
>>> entry, or of
>>> + * the valid leaf, plus some flags
>>
>> pteptr is not the right name if it can be two things
>
> it cannot be two things there, kvm_s390_shadow_fault always returns a
> DAT _entry_ (pte, segment, region).
And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t *)
as the name would suggest.
>
>>> *
>>> * Returns: - 0 if the shadow fault was successfully resolved
>>> * - > 0 (pgm exception code) on exceptions while
>>> faulting @@ -1165,11 +1171,11 @@ static int
>>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
>>> * - -ENOMEM if out of memory
>>> */
>>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
>>> - unsigned long saddr)
>>> + unsigned long saddr, unsigned long
>>> *pteptr) {
>>> union vaddress vaddr;
>>> union page_table_entry pte;
>>> - unsigned long pgt;
>>> + unsigned long pgt = 0;
>>> int dat_protection, fake;
>>> int rc;
>>>
>>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
>>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
>>> goto shadow_page;
>>> }
>>> - if (!rc)
>>> - rc = gmap_read_table(sg->parent, pgt + vaddr.px *
>>> 8, &pte.val); +
>>> + switch (rc) {
>>> + case PGM_SEGMENT_TRANSLATION:
>>> + case PGM_REGION_THIRD_TRANS:
>>> + case PGM_REGION_SECOND_TRANS:
>>> + case PGM_REGION_FIRST_TRANS:
>>> + pgt |= NOT_PTE;
>>
>> GACC_TRANSL_ENTRY_INV ?
>
> no, this is only for non-pte entries
>
>>> + break;
>>> + case 0:
>>> + pgt += vaddr.px * 8;
>>> + rc = gmap_read_table(sg->parent, pgt, &pte.val);
>>> + }
>>> + if (*pteptr)
>>> + *pteptr = pgt | dat_protection * DAT_PROT;
>>> if (!rc && pte.i)
>>> rc = PGM_PAGE_TRANSLATION;
>>> if (!rc && pte.z)
>>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
>>> index f4c51756c462..66a6e2cec97a 100644
>>> --- a/arch/s390/kvm/gaccess.h
>>> +++ b/arch/s390/kvm/gaccess.h
>>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
>>> int ipte_lock_held(struct kvm_vcpu *vcpu);
>>> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
>>> unsigned long gra);
>>> +#define DAT_PROT 2
>>
>> GACC_TRANSL_ENTRY_PROT
>
> this is also only for non-pte entries
>
>>> +#define NOT_PTE 4
>>> +
>>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
>>> *shadow,
>>> - unsigned long saddr);
>>> + unsigned long saddr, unsigned long
>>> *pteptr);
>>> #endif /* __KVM_S390_GACCESS_H */
>>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
>>> index c5d0a58b2c29..7db022141db3 100644
>>> --- a/arch/s390/kvm/vsie.c
>>> +++ b/arch/s390/kvm/vsie.c
>>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
>>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
>>> offset *mso* */ prefix += scb_s->mso;
>>>
>>> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
>>> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
>>> NULL); if (!rc && (scb_s->ecb & ECB_TE))
>>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>>> - prefix + PAGE_SIZE);
>>> + prefix + PAGE_SIZE,
>>> NULL); /*
>>> * We don't have to mprotect, we will be called for all
>>> unshadows.
>>> * SIE will detect if protection applies and trigger a
>>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
>>> kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>>> current->thread.gmap_addr, 1);
>>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>>> - current->thread.gmap_addr);
>>> + current->thread.gmap_addr,
>>> NULL); if (rc > 0) {
>>> rc = inject_fault(vcpu, rc,
>>> current->thread.gmap_addr,
>>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
>>> *vcpu, {
>>> if (vsie_page->fault_addr)
>>> kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
>>> - vsie_page->fault_addr);
>>> + vsie_page->fault_addr,
>>> NULL);
>>
>> Ok
>>
>>> vsie_page->fault_addr = 0;
>>> }
>>>
>>>
>>
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer
2021-02-05 12:56 ` Janosch Frank
@ 2021-02-05 14:05 ` Claudio Imbrenda
0 siblings, 0 replies; 12+ messages in thread
From: Claudio Imbrenda @ 2021-02-05 14:05 UTC (permalink / raw)
To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable
On Fri, 5 Feb 2021 13:56:53 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:
> On 2/5/21 1:15 PM, Claudio Imbrenda wrote:
> > On Thu, 4 Feb 2021 17:34:00 +0100
> > Janosch Frank <frankja@linux.ibm.com> wrote:
> >
> >> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> >>> Extend kvm_s390_shadow_fault to return the pointer to the valid
> >>> leaf DAT table entry, or to the invalid entry.
> >>>
> >>> Also return some flags in the lower bits of the address:
> >>> DAT_PROT: indicates that DAT protection applies because of the
> >>> protection bit in the segment (or, if EDAT, region)
> >>> tables NOT_PTE: indicates that the address of the DAT table entry
> >>> returned does not refer to a PTE, but to a segment or region
> >>> table.
> >>>
> >>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> >>> Cc: stable@vger.kernel.org
> >>> ---
> >>> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
> >>> arch/s390/kvm/gaccess.h | 5 ++++-
> >>> arch/s390/kvm/vsie.c | 8 ++++----
> >>> 3 files changed, 30 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> >>> index 6d6b57059493..2d7bcbfb185e 100644
> >>> --- a/arch/s390/kvm/gaccess.c
> >>> +++ b/arch/s390/kvm/gaccess.c
> >>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, rfte.val = ptr;
> >>> goto shadow_r2t;
> >>> }
> >>> + *pgt = ptr + vaddr.rfx * 8;
> >>
> >> So pgt either is a table entry if rc > 0 or a pointer to the first
> >> pte on rc == 0 after this change?
> >
> > yes
> >
> >> Hrm, if it is really based on RCs than I might be able to come to
> >> terms with having two things in a ptr with the name pgt. But it
> >> needs a comment change.
> >
> > will do.
> >
> >>> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,
> >>> &rfte.val); if (rc)
> >>> return rc;
> >>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, rste.val = ptr;
> >>> goto shadow_r3t;
> >>> }
> >>> + *pgt = ptr + vaddr.rsx * 8;
> >>> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,
> >>> &rste.val); if (rc)
> >>> return rc;
> >>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, rtte.val = ptr;
> >>> goto shadow_sgt;
> >>> }
> >>> + *pgt = ptr + vaddr.rtx * 8;
> >>> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,
> >>> &rtte.val); if (rc)
> >>> return rc;
> >>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr, ste.val = ptr;
> >>> goto shadow_pgt;
> >>> }
> >>> + *pgt = ptr + vaddr.sx * 8;
> >>> rc = gmap_read_table(parent, ptr + vaddr.sx * 8,
> >>> &ste.val); if (rc)
> >>> return rc;
> >>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct
> >>> gmap *sg, unsigned long saddr,
> >>> * @vcpu: virtual cpu
> >>> * @sg: pointer to the shadow guest address space structure
> >>> * @saddr: faulting address in the shadow gmap
> >>> + * @pteptr: will contain the address of the faulting DAT table
> >>> entry, or of
> >>> + * the valid leaf, plus some flags
> >>
> >> pteptr is not the right name if it can be two things
> >
> > it cannot be two things there, kvm_s390_shadow_fault always returns
> > a DAT _entry_ (pte, segment, region).
>
> And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t
> *) as the name would suggest.
fair enough, I'll rename it to something like entryptr or so
>
> >
> >>> *
> >>> * Returns: - 0 if the shadow fault was successfully resolved
> >>> * - > 0 (pgm exception code) on exceptions while
> >>> faulting @@ -1165,11 +1171,11 @@ static int
> >>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
> >>> * - -ENOMEM if out of memory
> >>> */
> >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
> >>> - unsigned long saddr)
> >>> + unsigned long saddr, unsigned long
> >>> *pteptr) {
> >>> union vaddress vaddr;
> >>> union page_table_entry pte;
> >>> - unsigned long pgt;
> >>> + unsigned long pgt = 0;
> >>> int dat_protection, fake;
> >>> int rc;
> >>>
> >>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu
> >>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;
> >>> goto shadow_page;
> >>> }
> >>> - if (!rc)
> >>> - rc = gmap_read_table(sg->parent, pgt + vaddr.px *
> >>> 8, &pte.val); +
> >>> + switch (rc) {
> >>> + case PGM_SEGMENT_TRANSLATION:
> >>> + case PGM_REGION_THIRD_TRANS:
> >>> + case PGM_REGION_SECOND_TRANS:
> >>> + case PGM_REGION_FIRST_TRANS:
> >>> + pgt |= NOT_PTE;
> >>
> >> GACC_TRANSL_ENTRY_INV ?
> >
> > no, this is only for non-pte entries
> >
> >>> + break;
> >>> + case 0:
> >>> + pgt += vaddr.px * 8;
> >>> + rc = gmap_read_table(sg->parent, pgt, &pte.val);
> >>> + }
> >>> + if (*pteptr)
> >>> + *pteptr = pgt | dat_protection * DAT_PROT;
> >>> if (!rc && pte.i)
> >>> rc = PGM_PAGE_TRANSLATION;
> >>> if (!rc && pte.z)
> >>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
> >>> index f4c51756c462..66a6e2cec97a 100644
> >>> --- a/arch/s390/kvm/gaccess.h
> >>> +++ b/arch/s390/kvm/gaccess.h
> >>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
> >>> int ipte_lock_held(struct kvm_vcpu *vcpu);
> >>> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,
> >>> unsigned long gra);
> >>> +#define DAT_PROT 2
> >>
> >> GACC_TRANSL_ENTRY_PROT
> >
> > this is also only for non-pte entries
> >
> >>> +#define NOT_PTE 4
> >>> +
> >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap
> >>> *shadow,
> >>> - unsigned long saddr);
> >>> + unsigned long saddr, unsigned long
> >>> *pteptr);
> >>> #endif /* __KVM_S390_GACCESS_H */
> >>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> >>> index c5d0a58b2c29..7db022141db3 100644
> >>> --- a/arch/s390/kvm/vsie.c
> >>> +++ b/arch/s390/kvm/vsie.c
> >>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,
> >>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at
> >>> offset *mso* */ prefix += scb_s->mso;
> >>>
> >>> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> prefix);
> >>> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,
> >>> NULL); if (!rc && (scb_s->ecb & ECB_TE))
> >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> - prefix + PAGE_SIZE);
> >>> + prefix + PAGE_SIZE,
> >>> NULL); /*
> >>> * We don't have to mprotect, we will be called for all
> >>> unshadows.
> >>> * SIE will detect if protection applies and trigger a
> >>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct
> >>> kvm_vcpu *vcpu, struct vsie_page *vsie_page)
> >>> current->thread.gmap_addr, 1);
> >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> - current->thread.gmap_addr);
> >>> + current->thread.gmap_addr,
> >>> NULL); if (rc > 0) {
> >>> rc = inject_fault(vcpu, rc,
> >>> current->thread.gmap_addr,
> >>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu
> >>> *vcpu, {
> >>> if (vsie_page->fault_addr)
> >>> kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
> >>> - vsie_page->fault_addr);
> >>> + vsie_page->fault_addr,
> >>> NULL);
> >>
> >> Ok
> >>
> >>> vsie_page->fault_addr = 0;
> >>> }
> >>>
> >>>
> >>
> >
>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2021-02-06 0:29 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-02 18:00 [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE Claudio Imbrenda
2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda
2021-02-04 16:34 ` Janosch Frank
2021-02-04 17:05 ` Janosch Frank
2021-02-05 12:18 ` Claudio Imbrenda
2021-02-05 12:15 ` Claudio Imbrenda
2021-02-05 12:56 ` Janosch Frank
2021-02-05 14:05 ` Claudio Imbrenda
2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda
2021-02-03 10:36 ` Claudio Imbrenda
2021-02-04 17:10 ` Janosch Frank
2021-02-05 12:20 ` Claudio Imbrenda
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.