* [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE @ 2021-02-02 18:00 Claudio Imbrenda 2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda 2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda 0 siblings, 2 replies; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-02 18:00 UTC (permalink / raw) To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390 The current handling of the MVPG instruction when executed in a nested guest is wrong, and can lead to the nested guest hanging. This patchset fixes the behaviour to be more architecturally correct, and fixes the hangs observed. v1->v2 * complete rewrite Claudio Imbrenda (2): s390/kvm: extend kvm_s390_shadow_fault to return entry pointer s390/kvm: VSIE: correctly handle MVPG when in VSIE arch/s390/kvm/gaccess.c | 26 ++++++++-- arch/s390/kvm/gaccess.h | 5 +- arch/s390/kvm/vsie.c | 102 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 119 insertions(+), 14 deletions(-) -- 2.26.2 ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer 2021-02-02 18:00 [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE Claudio Imbrenda @ 2021-02-02 18:00 ` Claudio Imbrenda 2021-02-04 16:34 ` Janosch Frank 2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda 1 sibling, 1 reply; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-02 18:00 UTC (permalink / raw) To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390, stable Extend kvm_s390_shadow_fault to return the pointer to the valid leaf DAT table entry, or to the invalid entry. Also return some flags in the lower bits of the address: DAT_PROT: indicates that DAT protection applies because of the protection bit in the segment (or, if EDAT, region) tables NOT_PTE: indicates that the address of the DAT table entry returned does not refer to a PTE, but to a segment or region table. Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> Cc: stable@vger.kernel.org --- arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- arch/s390/kvm/gaccess.h | 5 ++++- arch/s390/kvm/vsie.c | 8 ++++---- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 6d6b57059493..2d7bcbfb185e 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rfte.val = ptr; goto shadow_r2t; } + *pgt = ptr + vaddr.rfx * 8; rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); if (rc) return rc; @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rste.val = ptr; goto shadow_r3t; } + *pgt = ptr + vaddr.rsx * 8; rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); if (rc) return rc; @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rtte.val = ptr; goto shadow_sgt; } + *pgt = ptr + vaddr.rtx * 8; rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); if (rc) return rc; @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, ste.val = ptr; goto shadow_pgt; } + *pgt = ptr + vaddr.sx * 8; rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); if (rc) return rc; @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, * @vcpu: virtual cpu * @sg: pointer to the shadow guest address space structure * @saddr: faulting address in the shadow gmap + * @pteptr: will contain the address of the faulting DAT table entry, or of + * the valid leaf, plus some flags * * Returns: - 0 if the shadow fault was successfully resolved * - > 0 (pgm exception code) on exceptions while faulting @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, * - -ENOMEM if out of memory */ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, - unsigned long saddr) + unsigned long saddr, unsigned long *pteptr) { union vaddress vaddr; union page_table_entry pte; - unsigned long pgt; + unsigned long pgt = 0; int dat_protection, fake; int rc; @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; goto shadow_page; } - if (!rc) - rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); + + switch (rc) { + case PGM_SEGMENT_TRANSLATION: + case PGM_REGION_THIRD_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_FIRST_TRANS: + pgt |= NOT_PTE; + break; + case 0: + pgt += vaddr.px * 8; + rc = gmap_read_table(sg->parent, pgt, &pte.val); + } + if (*pteptr) + *pteptr = pgt | dat_protection * DAT_PROT; if (!rc && pte.i) rc = PGM_PAGE_TRANSLATION; if (!rc && pte.z) diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index f4c51756c462..66a6e2cec97a 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); +#define DAT_PROT 2 +#define NOT_PTE 4 + int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow, - unsigned long saddr); + unsigned long saddr, unsigned long *pteptr); #endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c5d0a58b2c29..7db022141db3 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at offset *mso* */ prefix += scb_s->mso; - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL); if (!rc && (scb_s->ecb & ECB_TE)) rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, - prefix + PAGE_SIZE); + prefix + PAGE_SIZE, NULL); /* * We don't have to mprotect, we will be called for all unshadows. * SIE will detect if protection applies and trigger a validity. @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) current->thread.gmap_addr, 1); rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, - current->thread.gmap_addr); + current->thread.gmap_addr, NULL); if (rc > 0) { rc = inject_fault(vcpu, rc, current->thread.gmap_addr, @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu, { if (vsie_page->fault_addr) kvm_s390_shadow_fault(vcpu, vsie_page->gmap, - vsie_page->fault_addr); + vsie_page->fault_addr, NULL); vsie_page->fault_addr = 0; } -- 2.26.2 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer 2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda @ 2021-02-04 16:34 ` Janosch Frank 2021-02-04 17:05 ` Janosch Frank 2021-02-05 12:15 ` Claudio Imbrenda 0 siblings, 2 replies; 12+ messages in thread From: Janosch Frank @ 2021-02-04 16:34 UTC (permalink / raw) To: Claudio Imbrenda, linux-kernel Cc: borntraeger, david, kvm, linux-s390, stable On 2/2/21 7:00 PM, Claudio Imbrenda wrote: > Extend kvm_s390_shadow_fault to return the pointer to the valid leaf > DAT table entry, or to the invalid entry. > > Also return some flags in the lower bits of the address: > DAT_PROT: indicates that DAT protection applies because of the > protection bit in the segment (or, if EDAT, region) tables > NOT_PTE: indicates that the address of the DAT table entry returned > does not refer to a PTE, but to a segment or region table. > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> > Cc: stable@vger.kernel.org > --- > arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- > arch/s390/kvm/gaccess.h | 5 ++++- > arch/s390/kvm/vsie.c | 8 ++++---- > 3 files changed, 30 insertions(+), 9 deletions(-) > > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c > index 6d6b57059493..2d7bcbfb185e 100644 > --- a/arch/s390/kvm/gaccess.c > +++ b/arch/s390/kvm/gaccess.c > @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > rfte.val = ptr; > goto shadow_r2t; > } > + *pgt = ptr + vaddr.rfx * 8; So pgt either is a table entry if rc > 0 or a pointer to the first pte on rc == 0 after this change? Hrm, if it is really based on RCs than I might be able to come to terms with having two things in a ptr with the name pgt. But it needs a comment change. > rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); > if (rc) > return rc; > @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > rste.val = ptr; > goto shadow_r3t; > } > + *pgt = ptr + vaddr.rsx * 8; > rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); > if (rc) > return rc; > @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > rtte.val = ptr; > goto shadow_sgt; > } > + *pgt = ptr + vaddr.rtx * 8; > rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); > if (rc) > return rc; > @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > ste.val = ptr; > goto shadow_pgt; > } > + *pgt = ptr + vaddr.sx * 8; > rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); > if (rc) > return rc; > @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > * @vcpu: virtual cpu > * @sg: pointer to the shadow guest address space structure > * @saddr: faulting address in the shadow gmap > + * @pteptr: will contain the address of the faulting DAT table entry, or of > + * the valid leaf, plus some flags pteptr is not the right name if it can be two things > * > * Returns: - 0 if the shadow fault was successfully resolved > * - > 0 (pgm exception code) on exceptions while faulting > @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > * - -ENOMEM if out of memory > */ > int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, > - unsigned long saddr) > + unsigned long saddr, unsigned long *pteptr) > { > union vaddress vaddr; > union page_table_entry pte; > - unsigned long pgt; > + unsigned long pgt = 0; > int dat_protection, fake; > int rc; > > @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, > pte.val = pgt + vaddr.px * PAGE_SIZE; > goto shadow_page; > } > - if (!rc) > - rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); > + > + switch (rc) { > + case PGM_SEGMENT_TRANSLATION: > + case PGM_REGION_THIRD_TRANS: > + case PGM_REGION_SECOND_TRANS: > + case PGM_REGION_FIRST_TRANS: > + pgt |= NOT_PTE; GACC_TRANSL_ENTRY_INV ? > + break; > + case 0: > + pgt += vaddr.px * 8; > + rc = gmap_read_table(sg->parent, pgt, &pte.val); > + } > + if (*pteptr) > + *pteptr = pgt | dat_protection * DAT_PROT; > if (!rc && pte.i) > rc = PGM_PAGE_TRANSLATION; > if (!rc && pte.z) > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h > index f4c51756c462..66a6e2cec97a 100644 > --- a/arch/s390/kvm/gaccess.h > +++ b/arch/s390/kvm/gaccess.h > @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); > int ipte_lock_held(struct kvm_vcpu *vcpu); > int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); > > +#define DAT_PROT 2 GACC_TRANSL_ENTRY_PROT > +#define NOT_PTE 4 > + > int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow, > - unsigned long saddr); > + unsigned long saddr, unsigned long *pteptr); > > #endif /* __KVM_S390_GACCESS_H */ > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > index c5d0a58b2c29..7db022141db3 100644 > --- a/arch/s390/kvm/vsie.c > +++ b/arch/s390/kvm/vsie.c > @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > /* with mso/msl, the prefix lies at offset *mso* */ > prefix += scb_s->mso; > > - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); > + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL); > if (!rc && (scb_s->ecb & ECB_TE)) > rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > - prefix + PAGE_SIZE); > + prefix + PAGE_SIZE, NULL); > /* > * We don't have to mprotect, we will be called for all unshadows. > * SIE will detect if protection applies and trigger a validity. > @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > current->thread.gmap_addr, 1); > > rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > - current->thread.gmap_addr); > + current->thread.gmap_addr, NULL); > if (rc > 0) { > rc = inject_fault(vcpu, rc, > current->thread.gmap_addr, > @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu, > { > if (vsie_page->fault_addr) > kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > - vsie_page->fault_addr); > + vsie_page->fault_addr, NULL); Ok > vsie_page->fault_addr = 0; > } > > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer 2021-02-04 16:34 ` Janosch Frank @ 2021-02-04 17:05 ` Janosch Frank 2021-02-05 12:18 ` Claudio Imbrenda 2021-02-05 12:15 ` Claudio Imbrenda 1 sibling, 1 reply; 12+ messages in thread From: Janosch Frank @ 2021-02-04 17:05 UTC (permalink / raw) To: Claudio Imbrenda, linux-kernel Cc: borntraeger, david, kvm, linux-s390, stable On 2/4/21 5:34 PM, Janosch Frank wrote: > On 2/2/21 7:00 PM, Claudio Imbrenda wrote: >> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf >> DAT table entry, or to the invalid entry. >> >> Also return some flags in the lower bits of the address: >> DAT_PROT: indicates that DAT protection applies because of the >> protection bit in the segment (or, if EDAT, region) tables >> NOT_PTE: indicates that the address of the DAT table entry returned >> does not refer to a PTE, but to a segment or region table. >> >> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> >> Cc: stable@vger.kernel.org >> --- >> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- >> arch/s390/kvm/gaccess.h | 5 ++++- >> arch/s390/kvm/vsie.c | 8 ++++---- >> 3 files changed, 30 insertions(+), 9 deletions(-) >> >> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c >> index 6d6b57059493..2d7bcbfb185e 100644 >> --- a/arch/s390/kvm/gaccess.c >> +++ b/arch/s390/kvm/gaccess.c >> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >> rfte.val = ptr; >> goto shadow_r2t; >> } >> + *pgt = ptr + vaddr.rfx * 8; > > So pgt either is a table entry if rc > 0 or a pointer to the first pte > on rc == 0 after this change? > > Hrm, if it is really based on RCs than I might be able to come to terms > with having two things in a ptr with the name pgt. But it needs a > comment change. > >> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); >> if (rc) >> return rc; >> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >> rste.val = ptr; >> goto shadow_r3t; >> } >> + *pgt = ptr + vaddr.rsx * 8; >> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); >> if (rc) >> return rc; >> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >> rtte.val = ptr; >> goto shadow_sgt; >> } >> + *pgt = ptr + vaddr.rtx * 8; >> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); >> if (rc) >> return rc; >> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >> ste.val = ptr; >> goto shadow_pgt; >> } >> + *pgt = ptr + vaddr.sx * 8; >> rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); >> if (rc) >> return rc; >> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >> * @vcpu: virtual cpu >> * @sg: pointer to the shadow guest address space structure >> * @saddr: faulting address in the shadow gmap >> + * @pteptr: will contain the address of the faulting DAT table entry, or of >> + * the valid leaf, plus some flags > > pteptr is not the right name if it can be two things You use it for pei only, right? > >> * >> * Returns: - 0 if the shadow fault was successfully resolved >> * - > 0 (pgm exception code) on exceptions while faulting >> @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >> * - -ENOMEM if out of memory >> */ >> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, >> - unsigned long saddr) >> + unsigned long saddr, unsigned long *pteptr) >> { >> union vaddress vaddr; >> union page_table_entry pte; >> - unsigned long pgt; >> + unsigned long pgt = 0; >> int dat_protection, fake; >> int rc; >> >> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, >> pte.val = pgt + vaddr.px * PAGE_SIZE; >> goto shadow_page; >> } >> - if (!rc) >> - rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); >> + >> + switch (rc) { >> + case PGM_SEGMENT_TRANSLATION: >> + case PGM_REGION_THIRD_TRANS: >> + case PGM_REGION_SECOND_TRANS: >> + case PGM_REGION_FIRST_TRANS: >> + pgt |= NOT_PTE; > > GACC_TRANSL_ENTRY_INV ? > >> + break; >> + case 0: >> + pgt += vaddr.px * 8; >> + rc = gmap_read_table(sg->parent, pgt, &pte.val); >> + } >> + if (*pteptr) >> + *pteptr = pgt | dat_protection * DAT_PROT; >> if (!rc && pte.i) >> rc = PGM_PAGE_TRANSLATION; >> if (!rc && pte.z) >> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h >> index f4c51756c462..66a6e2cec97a 100644 >> --- a/arch/s390/kvm/gaccess.h >> +++ b/arch/s390/kvm/gaccess.h >> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); >> int ipte_lock_held(struct kvm_vcpu *vcpu); >> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); >> >> +#define DAT_PROT 2 > > GACC_TRANSL_ENTRY_PROT Ok after a second pass that's not what's going on here. Those basically directly correspond to the MVPG PEI indication bits, right? Do we also need to consider bit 63? > >> +#define NOT_PTE 4 >> + >> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow, >> - unsigned long saddr); >> + unsigned long saddr, unsigned long *pteptr); >> >> #endif /* __KVM_S390_GACCESS_H */ >> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c >> index c5d0a58b2c29..7db022141db3 100644 >> --- a/arch/s390/kvm/vsie.c >> +++ b/arch/s390/kvm/vsie.c >> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) >> /* with mso/msl, the prefix lies at offset *mso* */ >> prefix += scb_s->mso; >> >> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); >> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL); >> if (!rc && (scb_s->ecb & ECB_TE)) >> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >> - prefix + PAGE_SIZE); >> + prefix + PAGE_SIZE, NULL); >> /* >> * We don't have to mprotect, we will be called for all unshadows. >> * SIE will detect if protection applies and trigger a validity. >> @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) >> current->thread.gmap_addr, 1); >> >> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >> - current->thread.gmap_addr); >> + current->thread.gmap_addr, NULL); >> if (rc > 0) { >> rc = inject_fault(vcpu, rc, >> current->thread.gmap_addr, >> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu, >> { >> if (vsie_page->fault_addr) >> kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >> - vsie_page->fault_addr); >> + vsie_page->fault_addr, NULL); > > Ok > >> vsie_page->fault_addr = 0; >> } >> >> > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer 2021-02-04 17:05 ` Janosch Frank @ 2021-02-05 12:18 ` Claudio Imbrenda 0 siblings, 0 replies; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-05 12:18 UTC (permalink / raw) To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable On Thu, 4 Feb 2021 18:05:15 +0100 Janosch Frank <frankja@linux.ibm.com> wrote: > On 2/4/21 5:34 PM, Janosch Frank wrote: > > On 2/2/21 7:00 PM, Claudio Imbrenda wrote: > >> Extend kvm_s390_shadow_fault to return the pointer to the valid > >> leaf DAT table entry, or to the invalid entry. > >> > >> Also return some flags in the lower bits of the address: > >> DAT_PROT: indicates that DAT protection applies because of the > >> protection bit in the segment (or, if EDAT, region) > >> tables NOT_PTE: indicates that the address of the DAT table entry > >> returned does not refer to a PTE, but to a segment or region table. > >> > >> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> > >> Cc: stable@vger.kernel.org > >> --- > >> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- > >> arch/s390/kvm/gaccess.h | 5 ++++- > >> arch/s390/kvm/vsie.c | 8 ++++---- > >> 3 files changed, 30 insertions(+), 9 deletions(-) > >> > >> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c > >> index 6d6b57059493..2d7bcbfb185e 100644 > >> --- a/arch/s390/kvm/gaccess.c > >> +++ b/arch/s390/kvm/gaccess.c > >> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct > >> gmap *sg, unsigned long saddr, rfte.val = ptr; > >> goto shadow_r2t; > >> } > >> + *pgt = ptr + vaddr.rfx * 8; > > > > So pgt either is a table entry if rc > 0 or a pointer to the first > > pte on rc == 0 after this change? > > > > Hrm, if it is really based on RCs than I might be able to come to > > terms with having two things in a ptr with the name pgt. But it > > needs a comment change. > > > >> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, > >> &rfte.val); if (rc) > >> return rc; > >> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct > >> gmap *sg, unsigned long saddr, rste.val = ptr; > >> goto shadow_r3t; > >> } > >> + *pgt = ptr + vaddr.rsx * 8; > >> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, > >> &rste.val); if (rc) > >> return rc; > >> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct > >> gmap *sg, unsigned long saddr, rtte.val = ptr; > >> goto shadow_sgt; > >> } > >> + *pgt = ptr + vaddr.rtx * 8; > >> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, > >> &rtte.val); if (rc) > >> return rc; > >> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct > >> gmap *sg, unsigned long saddr, ste.val = ptr; > >> goto shadow_pgt; > >> } > >> + *pgt = ptr + vaddr.sx * 8; > >> rc = gmap_read_table(parent, ptr + vaddr.sx * 8, > >> &ste.val); if (rc) > >> return rc; > >> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct > >> gmap *sg, unsigned long saddr, > >> * @vcpu: virtual cpu > >> * @sg: pointer to the shadow guest address space structure > >> * @saddr: faulting address in the shadow gmap > >> + * @pteptr: will contain the address of the faulting DAT table > >> entry, or of > >> + * the valid leaf, plus some flags > > > > pteptr is not the right name if it can be two things > > You use it for pei only, right? yes > > > >> * > >> * Returns: - 0 if the shadow fault was successfully resolved > >> * - > 0 (pgm exception code) on exceptions while > >> faulting @@ -1165,11 +1171,11 @@ static int > >> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > >> * - -ENOMEM if out of memory > >> */ > >> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, > >> - unsigned long saddr) > >> + unsigned long saddr, unsigned long > >> *pteptr) { > >> union vaddress vaddr; > >> union page_table_entry pte; > >> - unsigned long pgt; > >> + unsigned long pgt = 0; > >> int dat_protection, fake; > >> int rc; > >> > >> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu > >> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; > >> goto shadow_page; > >> } > >> - if (!rc) > >> - rc = gmap_read_table(sg->parent, pgt + vaddr.px * > >> 8, &pte.val); + > >> + switch (rc) { > >> + case PGM_SEGMENT_TRANSLATION: > >> + case PGM_REGION_THIRD_TRANS: > >> + case PGM_REGION_SECOND_TRANS: > >> + case PGM_REGION_FIRST_TRANS: > >> + pgt |= NOT_PTE; > > > > GACC_TRANSL_ENTRY_INV ? > > > >> + break; > >> + case 0: > >> + pgt += vaddr.px * 8; > >> + rc = gmap_read_table(sg->parent, pgt, &pte.val); > >> + } > >> + if (*pteptr) > >> + *pteptr = pgt | dat_protection * DAT_PROT; > >> if (!rc && pte.i) > >> rc = PGM_PAGE_TRANSLATION; > >> if (!rc && pte.z) > >> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h > >> index f4c51756c462..66a6e2cec97a 100644 > >> --- a/arch/s390/kvm/gaccess.h > >> +++ b/arch/s390/kvm/gaccess.h > >> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); > >> int ipte_lock_held(struct kvm_vcpu *vcpu); > >> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, > >> unsigned long gra); > >> +#define DAT_PROT 2 > > > > GACC_TRANSL_ENTRY_PROT > > Ok after a second pass that's not what's going on here. > Those basically directly correspond to the MVPG PEI indication bits, > right? yes :) > Do we also need to consider bit 63? no, that can only happen if a specific SIE feature is used, which KVM neither uses nor supports for VSIE, so it cannot happen > > > >> +#define NOT_PTE 4 > >> + > >> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap > >> *shadow, > >> - unsigned long saddr); > >> + unsigned long saddr, unsigned long > >> *pteptr); > >> #endif /* __KVM_S390_GACCESS_H */ > >> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > >> index c5d0a58b2c29..7db022141db3 100644 > >> --- a/arch/s390/kvm/vsie.c > >> +++ b/arch/s390/kvm/vsie.c > >> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, > >> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at > >> offset *mso* */ prefix += scb_s->mso; > >> > >> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); > >> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, > >> NULL); if (!rc && (scb_s->ecb & ECB_TE)) > >> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > >> - prefix + PAGE_SIZE); > >> + prefix + PAGE_SIZE, > >> NULL); /* > >> * We don't have to mprotect, we will be called for all > >> unshadows. > >> * SIE will detect if protection applies and trigger a > >> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct > >> kvm_vcpu *vcpu, struct vsie_page *vsie_page) > >> current->thread.gmap_addr, 1); > >> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > >> - current->thread.gmap_addr); > >> + current->thread.gmap_addr, > >> NULL); if (rc > 0) { > >> rc = inject_fault(vcpu, rc, > >> current->thread.gmap_addr, > >> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu > >> *vcpu, { > >> if (vsie_page->fault_addr) > >> kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > >> - vsie_page->fault_addr); > >> + vsie_page->fault_addr, > >> NULL); > > > > Ok > > > >> vsie_page->fault_addr = 0; > >> } > >> > >> > > > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer 2021-02-04 16:34 ` Janosch Frank 2021-02-04 17:05 ` Janosch Frank @ 2021-02-05 12:15 ` Claudio Imbrenda 2021-02-05 12:56 ` Janosch Frank 1 sibling, 1 reply; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-05 12:15 UTC (permalink / raw) To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable On Thu, 4 Feb 2021 17:34:00 +0100 Janosch Frank <frankja@linux.ibm.com> wrote: > On 2/2/21 7:00 PM, Claudio Imbrenda wrote: > > Extend kvm_s390_shadow_fault to return the pointer to the valid leaf > > DAT table entry, or to the invalid entry. > > > > Also return some flags in the lower bits of the address: > > DAT_PROT: indicates that DAT protection applies because of the > > protection bit in the segment (or, if EDAT, region) tables > > NOT_PTE: indicates that the address of the DAT table entry returned > > does not refer to a PTE, but to a segment or region table. > > > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> > > Cc: stable@vger.kernel.org > > --- > > arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- > > arch/s390/kvm/gaccess.h | 5 ++++- > > arch/s390/kvm/vsie.c | 8 ++++---- > > 3 files changed, 30 insertions(+), 9 deletions(-) > > > > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c > > index 6d6b57059493..2d7bcbfb185e 100644 > > --- a/arch/s390/kvm/gaccess.c > > +++ b/arch/s390/kvm/gaccess.c > > @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap > > *sg, unsigned long saddr, rfte.val = ptr; > > goto shadow_r2t; > > } > > + *pgt = ptr + vaddr.rfx * 8; > > So pgt either is a table entry if rc > 0 or a pointer to the first pte > on rc == 0 after this change? yes > Hrm, if it is really based on RCs than I might be able to come to > terms with having two things in a ptr with the name pgt. But it needs > a comment change. will do. > > rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, > > &rfte.val); if (rc) > > return rc; > > @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap > > *sg, unsigned long saddr, rste.val = ptr; > > goto shadow_r3t; > > } > > + *pgt = ptr + vaddr.rsx * 8; > > rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, > > &rste.val); if (rc) > > return rc; > > @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap > > *sg, unsigned long saddr, rtte.val = ptr; > > goto shadow_sgt; > > } > > + *pgt = ptr + vaddr.rtx * 8; > > rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, > > &rtte.val); if (rc) > > return rc; > > @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap > > *sg, unsigned long saddr, ste.val = ptr; > > goto shadow_pgt; > > } > > + *pgt = ptr + vaddr.sx * 8; > > rc = gmap_read_table(parent, ptr + vaddr.sx * 8, > > &ste.val); if (rc) > > return rc; > > @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap > > *sg, unsigned long saddr, > > * @vcpu: virtual cpu > > * @sg: pointer to the shadow guest address space structure > > * @saddr: faulting address in the shadow gmap > > + * @pteptr: will contain the address of the faulting DAT table > > entry, or of > > + * the valid leaf, plus some flags > > pteptr is not the right name if it can be two things it cannot be two things there, kvm_s390_shadow_fault always returns a DAT _entry_ (pte, segment, region). > > * > > * Returns: - 0 if the shadow fault was successfully resolved > > * - > 0 (pgm exception code) on exceptions while > > faulting @@ -1165,11 +1171,11 @@ static int > > kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > > * - -ENOMEM if out of memory > > */ > > int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, > > - unsigned long saddr) > > + unsigned long saddr, unsigned long > > *pteptr) { > > union vaddress vaddr; > > union page_table_entry pte; > > - unsigned long pgt; > > + unsigned long pgt = 0; > > int dat_protection, fake; > > int rc; > > > > @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu > > *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; > > goto shadow_page; > > } > > - if (!rc) > > - rc = gmap_read_table(sg->parent, pgt + vaddr.px * > > 8, &pte.val); + > > + switch (rc) { > > + case PGM_SEGMENT_TRANSLATION: > > + case PGM_REGION_THIRD_TRANS: > > + case PGM_REGION_SECOND_TRANS: > > + case PGM_REGION_FIRST_TRANS: > > + pgt |= NOT_PTE; > > GACC_TRANSL_ENTRY_INV ? no, this is only for non-pte entries > > + break; > > + case 0: > > + pgt += vaddr.px * 8; > > + rc = gmap_read_table(sg->parent, pgt, &pte.val); > > + } > > + if (*pteptr) > > + *pteptr = pgt | dat_protection * DAT_PROT; > > if (!rc && pte.i) > > rc = PGM_PAGE_TRANSLATION; > > if (!rc && pte.z) > > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h > > index f4c51756c462..66a6e2cec97a 100644 > > --- a/arch/s390/kvm/gaccess.h > > +++ b/arch/s390/kvm/gaccess.h > > @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); > > int ipte_lock_held(struct kvm_vcpu *vcpu); > > int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, > > unsigned long gra); > > +#define DAT_PROT 2 > > GACC_TRANSL_ENTRY_PROT this is also only for non-pte entries > > +#define NOT_PTE 4 > > + > > int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap > > *shadow, > > - unsigned long saddr); > > + unsigned long saddr, unsigned long > > *pteptr); > > #endif /* __KVM_S390_GACCESS_H */ > > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > > index c5d0a58b2c29..7db022141db3 100644 > > --- a/arch/s390/kvm/vsie.c > > +++ b/arch/s390/kvm/vsie.c > > @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, > > struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at > > offset *mso* */ prefix += scb_s->mso; > > > > - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); > > + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, > > NULL); if (!rc && (scb_s->ecb & ECB_TE)) > > rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > > - prefix + PAGE_SIZE); > > + prefix + PAGE_SIZE, > > NULL); /* > > * We don't have to mprotect, we will be called for all > > unshadows. > > * SIE will detect if protection applies and trigger a > > validity. @@ -913,7 +913,7 @@ static int handle_fault(struct > > kvm_vcpu *vcpu, struct vsie_page *vsie_page) > > current->thread.gmap_addr, 1); > > rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > > - current->thread.gmap_addr); > > + current->thread.gmap_addr, > > NULL); if (rc > 0) { > > rc = inject_fault(vcpu, rc, > > current->thread.gmap_addr, > > @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu > > *vcpu, { > > if (vsie_page->fault_addr) > > kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > > - vsie_page->fault_addr); > > + vsie_page->fault_addr, > > NULL); > > Ok > > > vsie_page->fault_addr = 0; > > } > > > > > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer 2021-02-05 12:15 ` Claudio Imbrenda @ 2021-02-05 12:56 ` Janosch Frank 2021-02-05 14:05 ` Claudio Imbrenda 0 siblings, 1 reply; 12+ messages in thread From: Janosch Frank @ 2021-02-05 12:56 UTC (permalink / raw) To: Claudio Imbrenda Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable On 2/5/21 1:15 PM, Claudio Imbrenda wrote: > On Thu, 4 Feb 2021 17:34:00 +0100 > Janosch Frank <frankja@linux.ibm.com> wrote: > >> On 2/2/21 7:00 PM, Claudio Imbrenda wrote: >>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf >>> DAT table entry, or to the invalid entry. >>> >>> Also return some flags in the lower bits of the address: >>> DAT_PROT: indicates that DAT protection applies because of the >>> protection bit in the segment (or, if EDAT, region) tables >>> NOT_PTE: indicates that the address of the DAT table entry returned >>> does not refer to a PTE, but to a segment or region table. >>> >>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> >>> Cc: stable@vger.kernel.org >>> --- >>> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- >>> arch/s390/kvm/gaccess.h | 5 ++++- >>> arch/s390/kvm/vsie.c | 8 ++++---- >>> 3 files changed, 30 insertions(+), 9 deletions(-) >>> >>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c >>> index 6d6b57059493..2d7bcbfb185e 100644 >>> --- a/arch/s390/kvm/gaccess.c >>> +++ b/arch/s390/kvm/gaccess.c >>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, rfte.val = ptr; >>> goto shadow_r2t; >>> } >>> + *pgt = ptr + vaddr.rfx * 8; >> >> So pgt either is a table entry if rc > 0 or a pointer to the first pte >> on rc == 0 after this change? > > yes > >> Hrm, if it is really based on RCs than I might be able to come to >> terms with having two things in a ptr with the name pgt. But it needs >> a comment change. > > will do. > >>> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, >>> &rfte.val); if (rc) >>> return rc; >>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, rste.val = ptr; >>> goto shadow_r3t; >>> } >>> + *pgt = ptr + vaddr.rsx * 8; >>> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, >>> &rste.val); if (rc) >>> return rc; >>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, rtte.val = ptr; >>> goto shadow_sgt; >>> } >>> + *pgt = ptr + vaddr.rtx * 8; >>> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, >>> &rtte.val); if (rc) >>> return rc; >>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, ste.val = ptr; >>> goto shadow_pgt; >>> } >>> + *pgt = ptr + vaddr.sx * 8; >>> rc = gmap_read_table(parent, ptr + vaddr.sx * 8, >>> &ste.val); if (rc) >>> return rc; >>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap >>> *sg, unsigned long saddr, >>> * @vcpu: virtual cpu >>> * @sg: pointer to the shadow guest address space structure >>> * @saddr: faulting address in the shadow gmap >>> + * @pteptr: will contain the address of the faulting DAT table >>> entry, or of >>> + * the valid leaf, plus some flags >> >> pteptr is not the right name if it can be two things > > it cannot be two things there, kvm_s390_shadow_fault always returns a > DAT _entry_ (pte, segment, region). And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t *) as the name would suggest. > >>> * >>> * Returns: - 0 if the shadow fault was successfully resolved >>> * - > 0 (pgm exception code) on exceptions while >>> faulting @@ -1165,11 +1171,11 @@ static int >>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, >>> * - -ENOMEM if out of memory >>> */ >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, >>> - unsigned long saddr) >>> + unsigned long saddr, unsigned long >>> *pteptr) { >>> union vaddress vaddr; >>> union page_table_entry pte; >>> - unsigned long pgt; >>> + unsigned long pgt = 0; >>> int dat_protection, fake; >>> int rc; >>> >>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu >>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; >>> goto shadow_page; >>> } >>> - if (!rc) >>> - rc = gmap_read_table(sg->parent, pgt + vaddr.px * >>> 8, &pte.val); + >>> + switch (rc) { >>> + case PGM_SEGMENT_TRANSLATION: >>> + case PGM_REGION_THIRD_TRANS: >>> + case PGM_REGION_SECOND_TRANS: >>> + case PGM_REGION_FIRST_TRANS: >>> + pgt |= NOT_PTE; >> >> GACC_TRANSL_ENTRY_INV ? > > no, this is only for non-pte entries > >>> + break; >>> + case 0: >>> + pgt += vaddr.px * 8; >>> + rc = gmap_read_table(sg->parent, pgt, &pte.val); >>> + } >>> + if (*pteptr) >>> + *pteptr = pgt | dat_protection * DAT_PROT; >>> if (!rc && pte.i) >>> rc = PGM_PAGE_TRANSLATION; >>> if (!rc && pte.z) >>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h >>> index f4c51756c462..66a6e2cec97a 100644 >>> --- a/arch/s390/kvm/gaccess.h >>> +++ b/arch/s390/kvm/gaccess.h >>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); >>> int ipte_lock_held(struct kvm_vcpu *vcpu); >>> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, >>> unsigned long gra); >>> +#define DAT_PROT 2 >> >> GACC_TRANSL_ENTRY_PROT > > this is also only for non-pte entries > >>> +#define NOT_PTE 4 >>> + >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap >>> *shadow, >>> - unsigned long saddr); >>> + unsigned long saddr, unsigned long >>> *pteptr); >>> #endif /* __KVM_S390_GACCESS_H */ >>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c >>> index c5d0a58b2c29..7db022141db3 100644 >>> --- a/arch/s390/kvm/vsie.c >>> +++ b/arch/s390/kvm/vsie.c >>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, >>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at >>> offset *mso* */ prefix += scb_s->mso; >>> >>> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); >>> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, >>> NULL); if (!rc && (scb_s->ecb & ECB_TE)) >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >>> - prefix + PAGE_SIZE); >>> + prefix + PAGE_SIZE, >>> NULL); /* >>> * We don't have to mprotect, we will be called for all >>> unshadows. >>> * SIE will detect if protection applies and trigger a >>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct >>> kvm_vcpu *vcpu, struct vsie_page *vsie_page) >>> current->thread.gmap_addr, 1); >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >>> - current->thread.gmap_addr); >>> + current->thread.gmap_addr, >>> NULL); if (rc > 0) { >>> rc = inject_fault(vcpu, rc, >>> current->thread.gmap_addr, >>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu >>> *vcpu, { >>> if (vsie_page->fault_addr) >>> kvm_s390_shadow_fault(vcpu, vsie_page->gmap, >>> - vsie_page->fault_addr); >>> + vsie_page->fault_addr, >>> NULL); >> >> Ok >> >>> vsie_page->fault_addr = 0; >>> } >>> >>> >> > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer 2021-02-05 12:56 ` Janosch Frank @ 2021-02-05 14:05 ` Claudio Imbrenda 0 siblings, 0 replies; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-05 14:05 UTC (permalink / raw) To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable On Fri, 5 Feb 2021 13:56:53 +0100 Janosch Frank <frankja@linux.ibm.com> wrote: > On 2/5/21 1:15 PM, Claudio Imbrenda wrote: > > On Thu, 4 Feb 2021 17:34:00 +0100 > > Janosch Frank <frankja@linux.ibm.com> wrote: > > > >> On 2/2/21 7:00 PM, Claudio Imbrenda wrote: > >>> Extend kvm_s390_shadow_fault to return the pointer to the valid > >>> leaf DAT table entry, or to the invalid entry. > >>> > >>> Also return some flags in the lower bits of the address: > >>> DAT_PROT: indicates that DAT protection applies because of the > >>> protection bit in the segment (or, if EDAT, region) > >>> tables NOT_PTE: indicates that the address of the DAT table entry > >>> returned does not refer to a PTE, but to a segment or region > >>> table. > >>> > >>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> > >>> Cc: stable@vger.kernel.org > >>> --- > >>> arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++---- > >>> arch/s390/kvm/gaccess.h | 5 ++++- > >>> arch/s390/kvm/vsie.c | 8 ++++---- > >>> 3 files changed, 30 insertions(+), 9 deletions(-) > >>> > >>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c > >>> index 6d6b57059493..2d7bcbfb185e 100644 > >>> --- a/arch/s390/kvm/gaccess.c > >>> +++ b/arch/s390/kvm/gaccess.c > >>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct > >>> gmap *sg, unsigned long saddr, rfte.val = ptr; > >>> goto shadow_r2t; > >>> } > >>> + *pgt = ptr + vaddr.rfx * 8; > >> > >> So pgt either is a table entry if rc > 0 or a pointer to the first > >> pte on rc == 0 after this change? > > > > yes > > > >> Hrm, if it is really based on RCs than I might be able to come to > >> terms with having two things in a ptr with the name pgt. But it > >> needs a comment change. > > > > will do. > > > >>> rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, > >>> &rfte.val); if (rc) > >>> return rc; > >>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct > >>> gmap *sg, unsigned long saddr, rste.val = ptr; > >>> goto shadow_r3t; > >>> } > >>> + *pgt = ptr + vaddr.rsx * 8; > >>> rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, > >>> &rste.val); if (rc) > >>> return rc; > >>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct > >>> gmap *sg, unsigned long saddr, rtte.val = ptr; > >>> goto shadow_sgt; > >>> } > >>> + *pgt = ptr + vaddr.rtx * 8; > >>> rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, > >>> &rtte.val); if (rc) > >>> return rc; > >>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct > >>> gmap *sg, unsigned long saddr, ste.val = ptr; > >>> goto shadow_pgt; > >>> } > >>> + *pgt = ptr + vaddr.sx * 8; > >>> rc = gmap_read_table(parent, ptr + vaddr.sx * 8, > >>> &ste.val); if (rc) > >>> return rc; > >>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct > >>> gmap *sg, unsigned long saddr, > >>> * @vcpu: virtual cpu > >>> * @sg: pointer to the shadow guest address space structure > >>> * @saddr: faulting address in the shadow gmap > >>> + * @pteptr: will contain the address of the faulting DAT table > >>> entry, or of > >>> + * the valid leaf, plus some flags > >> > >> pteptr is not the right name if it can be two things > > > > it cannot be two things there, kvm_s390_shadow_fault always returns > > a DAT _entry_ (pte, segment, region). > > And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t > *) as the name would suggest. fair enough, I'll rename it to something like entryptr or so > > > > >>> * > >>> * Returns: - 0 if the shadow fault was successfully resolved > >>> * - > 0 (pgm exception code) on exceptions while > >>> faulting @@ -1165,11 +1171,11 @@ static int > >>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, > >>> * - -ENOMEM if out of memory > >>> */ > >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, > >>> - unsigned long saddr) > >>> + unsigned long saddr, unsigned long > >>> *pteptr) { > >>> union vaddress vaddr; > >>> union page_table_entry pte; > >>> - unsigned long pgt; > >>> + unsigned long pgt = 0; > >>> int dat_protection, fake; > >>> int rc; > >>> > >>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu > >>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; > >>> goto shadow_page; > >>> } > >>> - if (!rc) > >>> - rc = gmap_read_table(sg->parent, pgt + vaddr.px * > >>> 8, &pte.val); + > >>> + switch (rc) { > >>> + case PGM_SEGMENT_TRANSLATION: > >>> + case PGM_REGION_THIRD_TRANS: > >>> + case PGM_REGION_SECOND_TRANS: > >>> + case PGM_REGION_FIRST_TRANS: > >>> + pgt |= NOT_PTE; > >> > >> GACC_TRANSL_ENTRY_INV ? > > > > no, this is only for non-pte entries > > > >>> + break; > >>> + case 0: > >>> + pgt += vaddr.px * 8; > >>> + rc = gmap_read_table(sg->parent, pgt, &pte.val); > >>> + } > >>> + if (*pteptr) > >>> + *pteptr = pgt | dat_protection * DAT_PROT; > >>> if (!rc && pte.i) > >>> rc = PGM_PAGE_TRANSLATION; > >>> if (!rc && pte.z) > >>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h > >>> index f4c51756c462..66a6e2cec97a 100644 > >>> --- a/arch/s390/kvm/gaccess.h > >>> +++ b/arch/s390/kvm/gaccess.h > >>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu); > >>> int ipte_lock_held(struct kvm_vcpu *vcpu); > >>> int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, > >>> unsigned long gra); > >>> +#define DAT_PROT 2 > >> > >> GACC_TRANSL_ENTRY_PROT > > > > this is also only for non-pte entries > > > >>> +#define NOT_PTE 4 > >>> + > >>> int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap > >>> *shadow, > >>> - unsigned long saddr); > >>> + unsigned long saddr, unsigned long > >>> *pteptr); > >>> #endif /* __KVM_S390_GACCESS_H */ > >>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > >>> index c5d0a58b2c29..7db022141db3 100644 > >>> --- a/arch/s390/kvm/vsie.c > >>> +++ b/arch/s390/kvm/vsie.c > >>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, > >>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at > >>> offset *mso* */ prefix += scb_s->mso; > >>> > >>> - rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > >>> prefix); > >>> + rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, > >>> NULL); if (!rc && (scb_s->ecb & ECB_TE)) > >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > >>> - prefix + PAGE_SIZE); > >>> + prefix + PAGE_SIZE, > >>> NULL); /* > >>> * We don't have to mprotect, we will be called for all > >>> unshadows. > >>> * SIE will detect if protection applies and trigger a > >>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct > >>> kvm_vcpu *vcpu, struct vsie_page *vsie_page) > >>> current->thread.gmap_addr, 1); > >>> rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > >>> - current->thread.gmap_addr); > >>> + current->thread.gmap_addr, > >>> NULL); if (rc > 0) { > >>> rc = inject_fault(vcpu, rc, > >>> current->thread.gmap_addr, > >>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu > >>> *vcpu, { > >>> if (vsie_page->fault_addr) > >>> kvm_s390_shadow_fault(vcpu, vsie_page->gmap, > >>> - vsie_page->fault_addr); > >>> + vsie_page->fault_addr, > >>> NULL); > >> > >> Ok > >> > >>> vsie_page->fault_addr = 0; > >>> } > >>> > >>> > >> > > > ^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE 2021-02-02 18:00 [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE Claudio Imbrenda 2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda @ 2021-02-02 18:00 ` Claudio Imbrenda 2021-02-03 10:36 ` Claudio Imbrenda 2021-02-04 17:10 ` Janosch Frank 1 sibling, 2 replies; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-02 18:00 UTC (permalink / raw) To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390, stable Correctly handle the MVPG instruction when issued by a VSIE guest. Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested virtualization") Cc: stable@vger.kernel.org Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> --- arch/s390/kvm/vsie.c | 94 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 7db022141db3..2db49749e27b 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) memcpy((void *)((u64)scb_o + 0xc0), (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0); break; - case ICPT_PARTEXEC: - /* MVPG only */ - memcpy((void *)((u64)scb_o + 0xc0), - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0); - break; } if (scb_s->ihcpu != 0xffffU) @@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return 0; } +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg) +{ + reg &= 0xf; + switch (reg) { + case 15: + return vsie_page->scb_s.gg15; + case 14: + return vsie_page->scb_s.gg14; + default: + return vcpu->run->s.regs.gprs[reg]; + } +} + +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) +{ + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; + unsigned long pei1, pei2, src, dest, mask = PAGE_MASK; + u64 *pei_block = &vsie_page->scb_o->mcic; + int edat, rc1, rc2; + union ctlreg0 cr0; + + cr0.val = vcpu->arch.sie_block->gcr[0]; + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8); + if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT) + mask = 0xfff000; + else if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_31BIT) + mask = 0x7ffff000; + + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask; + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask; + + rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei1); + rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei2); + /* + * Either everything went well, or something non-critical went wrong + * e.g. beause of a race. In either case, simply retry. + */ + if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) { + retry_vsie_icpt(vsie_page); + return -EAGAIN; + } + /* Something more serious went wrong, propagate the error */ + if (rc1 < 0) + return rc1; + if (rc2 < 0) + return rc2; + + /* The only possible suppressing exception: just deliver it */ + if (rc1 == PGM_TRANSLATION_SPEC || rc2 == PGM_TRANSLATION_SPEC) { + clear_vsie_icpt(vsie_page); + rc1 = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC); + WARN_ON_ONCE(rc1); + return 1; + } + + /* + * Forward the PEI intercept to the guest if it was a page fault, or + * also for segment and region table faults if EDAT applies. + */ + if (edat) { + rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0; + rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0; + } + if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2 == PGM_PAGE_TRANSLATION)) { + pei_block[0] = pei1; + pei_block[1] = pei2; + return 1; + } + + retry_vsie_icpt(vsie_page); + + /* + * The host has edat, and the guest does not, or it was an ASCE type + * exception. The host needs to inject the appropriate DAT interrupts + * into the guest. + */ + if (rc1) + return inject_fault(vcpu, rc1, dest, 1); + if (rc2) + return inject_fault(vcpu, rc2, src, 0); + + /* This should never be reached */ + return 0; +} + /* * Run the vsie on a shadow scb and a shadow gmap, without any further * sanity checks, handling SIE faults. @@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if ((scb_s->ipa & 0xf000) != 0xf000) scb_s->ipa += 0x1000; break; + case ICPT_PARTEXEC: + if (scb_s->ipa == 0xb254) + rc = vsie_handle_mvpg(vcpu, vsie_page); + break; } return rc; } -- 2.26.2 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE 2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda @ 2021-02-03 10:36 ` Claudio Imbrenda 2021-02-04 17:10 ` Janosch Frank 1 sibling, 0 replies; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-03 10:36 UTC (permalink / raw) To: linux-kernel; +Cc: borntraeger, frankja, david, kvm, linux-s390, stable On Tue, 2 Feb 2021 19:00:28 +0100 Claudio Imbrenda <imbrenda@linux.ibm.com> wrote: > Correctly handle the MVPG instruction when issued by a VSIE guest. > > Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested > virtualization") Cc: stable@vger.kernel.org > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> > --- > arch/s390/kvm/vsie.c | 94 > +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 > insertions(+), 5 deletions(-) > > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > index 7db022141db3..2db49749e27b 100644 > --- a/arch/s390/kvm/vsie.c > +++ b/arch/s390/kvm/vsie.c > @@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, > struct vsie_page *vsie_page) memcpy((void *)((u64)scb_o + 0xc0), > (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0); > break; > - case ICPT_PARTEXEC: > - /* MVPG only */ > - memcpy((void *)((u64)scb_o + 0xc0), > - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0); > - break; > } > > if (scb_s->ihcpu != 0xffffU) > @@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu, > struct vsie_page *vsie_page) return 0; > } > > +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page > *vsie_page, u8 reg) +{ > + reg &= 0xf; > + switch (reg) { > + case 15: > + return vsie_page->scb_s.gg15; > + case 14: > + return vsie_page->scb_s.gg14; > + default: > + return vcpu->run->s.regs.gprs[reg]; > + } > +} > + > +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page > *vsie_page) +{ > + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; > + unsigned long pei1, pei2, src, dest, mask = PAGE_MASK; > + u64 *pei_block = &vsie_page->scb_o->mcic; > + int edat, rc1, rc2; > + union ctlreg0 cr0; > + > + cr0.val = vcpu->arch.sie_block->gcr[0]; > + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8); > + if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT) > + mask = 0xfff000; > + else if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_31BIT) > + mask = 0x7ffff000; > + > + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) > & mask; > + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & > mask; + > + rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, > &pei1); > + rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, > &pei2); > + /* > + * Either everything went well, or something non-critical > went wrong > + * e.g. beause of a race. In either case, simply retry. > + */ > + if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) { > + retry_vsie_icpt(vsie_page); > + return -EAGAIN; > + } > + /* Something more serious went wrong, propagate the error */ > + if (rc1 < 0) > + return rc1; > + if (rc2 < 0) > + return rc2; > + > + /* The only possible suppressing exception: just deliver it > */ > + if (rc1 == PGM_TRANSLATION_SPEC || rc2 == > PGM_TRANSLATION_SPEC) { > + clear_vsie_icpt(vsie_page); > + rc1 = kvm_s390_inject_program_int(vcpu, > PGM_TRANSLATION_SPEC); > + WARN_ON_ONCE(rc1); > + return 1; > + } > + > + /* > + * Forward the PEI intercept to the guest if it was a page > fault, or > + * also for segment and region table faults if EDAT applies. > + */ > + if (edat) { > + rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0; > + rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0; > + } I just noticed, this should actually be: if (edat) { rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0; rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0; } else { rc1 = rc1 != PGM_PAGE_TRANSLATION ? rc1 : 0; rc2 = rc2 != PGM_PAGE_TRANSLATION ? rc2 : 0; } I'll fix it in the next version > + if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2 == > PGM_PAGE_TRANSLATION)) { > + pei_block[0] = pei1; > + pei_block[1] = pei2; > + return 1; > + } > + > + retry_vsie_icpt(vsie_page); > + > + /* > + * The host has edat, and the guest does not, or it was an > ASCE type > + * exception. The host needs to inject the appropriate DAT > interrupts > + * into the guest. > + */ > + if (rc1) > + return inject_fault(vcpu, rc1, dest, 1); > + if (rc2) > + return inject_fault(vcpu, rc2, src, 0); > + > + /* This should never be reached */ > + return 0; > +} > + > /* > * Run the vsie on a shadow scb and a shadow gmap, without any > further > * sanity checks, handling SIE faults. > @@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, > struct vsie_page *vsie_page) if ((scb_s->ipa & 0xf000) != 0xf000) > scb_s->ipa += 0x1000; > break; > + case ICPT_PARTEXEC: > + if (scb_s->ipa == 0xb254) > + rc = vsie_handle_mvpg(vcpu, vsie_page); > + break; > } > return rc; > } ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE 2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda 2021-02-03 10:36 ` Claudio Imbrenda @ 2021-02-04 17:10 ` Janosch Frank 2021-02-05 12:20 ` Claudio Imbrenda 1 sibling, 1 reply; 12+ messages in thread From: Janosch Frank @ 2021-02-04 17:10 UTC (permalink / raw) To: Claudio Imbrenda, linux-kernel Cc: borntraeger, david, kvm, linux-s390, stable On 2/2/21 7:00 PM, Claudio Imbrenda wrote: > Correctly handle the MVPG instruction when issued by a VSIE guest. > > Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested virtualization") > Cc: stable@vger.kernel.org > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> So far the patch looks ok to me and way better to understand than v1, good job > --- > arch/s390/kvm/vsie.c | 94 +++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 89 insertions(+), 5 deletions(-) > > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > index 7db022141db3..2db49749e27b 100644 > --- a/arch/s390/kvm/vsie.c > +++ b/arch/s390/kvm/vsie.c > @@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > memcpy((void *)((u64)scb_o + 0xc0), > (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0); Magic offsets being magic Another item for my todo list. > break; > - case ICPT_PARTEXEC: > - /* MVPG only */ > - memcpy((void *)((u64)scb_o + 0xc0), > - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0); > - break; > } > > if (scb_s->ihcpu != 0xffffU) > @@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > return 0; > } > > +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg) > +{ > + reg &= 0xf; > + switch (reg) { > + case 15: > + return vsie_page->scb_s.gg15; > + case 14: > + return vsie_page->scb_s.gg14; > + default: > + return vcpu->run->s.regs.gprs[reg]; > + } > +} > + > +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > +{ > + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; > + unsigned long pei1, pei2, src, dest, mask = PAGE_MASK; > + u64 *pei_block = &vsie_page->scb_o->mcic; > + int edat, rc1, rc2; Can use a src/dst prefix or suffix please? 1/2 is confusing. > + union ctlreg0 cr0; > + > + cr0.val = vcpu->arch.sie_block->gcr[0]; > + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8); > + if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT) > + mask = 0xfff000; > + else if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_31BIT) > + mask = 0x7ffff000; > + > + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask; > + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask; > + > + rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei1); > + rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei2); > + /* > + * Either everything went well, or something non-critical went wrong > + * e.g. beause of a race. In either case, simply retry. > + */ > + if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) { > + retry_vsie_icpt(vsie_page); > + return -EAGAIN; > + } > + /* Something more serious went wrong, propagate the error */ > + if (rc1 < 0) > + return rc1; > + if (rc2 < 0) > + return rc2; > + > + /* The only possible suppressing exception: just deliver it */ > + if (rc1 == PGM_TRANSLATION_SPEC || rc2 == PGM_TRANSLATION_SPEC) { > + clear_vsie_icpt(vsie_page); > + rc1 = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC); > + WARN_ON_ONCE(rc1); > + return 1; > + } > + > + /* > + * Forward the PEI intercept to the guest if it was a page fault, or > + * also for segment and region table faults if EDAT applies. > + */ > + if (edat) { > + rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0; > + rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0; > + } > + if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2 == PGM_PAGE_TRANSLATION)) { > + pei_block[0] = pei1; > + pei_block[1] = pei2; > + return 1; > + } > + > + retry_vsie_icpt(vsie_page); > + > + /* > + * The host has edat, and the guest does not, or it was an ASCE type > + * exception. The host needs to inject the appropriate DAT interrupts > + * into the guest. > + */ > + if (rc1) > + return inject_fault(vcpu, rc1, dest, 1); > + if (rc2)> + return inject_fault(vcpu, rc2, src, 0); > + > + /* This should never be reached */ BUG()? > + return 0; > +} > + > /* > * Run the vsie on a shadow scb and a shadow gmap, without any further > * sanity checks, handling SIE faults. > @@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > if ((scb_s->ipa & 0xf000) != 0xf000) > scb_s->ipa += 0x1000; > break; > + case ICPT_PARTEXEC: > + if (scb_s->ipa == 0xb254) > + rc = vsie_handle_mvpg(vcpu, vsie_page); > + break; > } > return rc; > } > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE 2021-02-04 17:10 ` Janosch Frank @ 2021-02-05 12:20 ` Claudio Imbrenda 0 siblings, 0 replies; 12+ messages in thread From: Claudio Imbrenda @ 2021-02-05 12:20 UTC (permalink / raw) To: Janosch Frank; +Cc: linux-kernel, borntraeger, david, kvm, linux-s390, stable On Thu, 4 Feb 2021 18:10:01 +0100 Janosch Frank <frankja@linux.ibm.com> wrote: > On 2/2/21 7:00 PM, Claudio Imbrenda wrote: > > Correctly handle the MVPG instruction when issued by a VSIE guest. > > > > Fixes: a3508fbe9dc6d ("KVM: s390: vsie: initial support for nested > > virtualization") Cc: stable@vger.kernel.org > > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com> > > So far the patch looks ok to me and way better to understand than v1, > good job > > > --- > > arch/s390/kvm/vsie.c | 94 > > +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 > > insertions(+), 5 deletions(-) > > > > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > > index 7db022141db3..2db49749e27b 100644 > > --- a/arch/s390/kvm/vsie.c > > +++ b/arch/s390/kvm/vsie.c > > @@ -416,11 +416,6 @@ static void unshadow_scb(struct kvm_vcpu > > *vcpu, struct vsie_page *vsie_page) memcpy((void *)((u64)scb_o + > > 0xc0), (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0); > > Magic offsets being magic > Another item for my todo list. > > > break; > > - case ICPT_PARTEXEC: > > - /* MVPG only */ > > - memcpy((void *)((u64)scb_o + 0xc0), > > - (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0); > > - break; > > } > > > > if (scb_s->ihcpu != 0xffffU) > > @@ -982,6 +977,91 @@ static int handle_stfle(struct kvm_vcpu *vcpu, > > struct vsie_page *vsie_page) return 0; > > } > > > > +static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct > > vsie_page *vsie_page, u8 reg) +{ > > + reg &= 0xf; > > + switch (reg) { > > + case 15: > > + return vsie_page->scb_s.gg15; > > + case 14: > > + return vsie_page->scb_s.gg14; > > + default: > > + return vcpu->run->s.regs.gprs[reg]; > > + } > > +} > > + > > +static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct > > vsie_page *vsie_page) +{ > > + struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; > > + unsigned long pei1, pei2, src, dest, mask = PAGE_MASK; > > + u64 *pei_block = &vsie_page->scb_o->mcic; > > + int edat, rc1, rc2; > > Can use a src/dst prefix or suffix please? > 1/2 is confusing. will do > > + union ctlreg0 cr0; > > + > > + cr0.val = vcpu->arch.sie_block->gcr[0]; > > + edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8); > > + if (psw_bits(scb_s->gpsw).eaba == PSW_BITS_AMODE_24BIT) > > + mask = 0xfff000; > > + else if (psw_bits(scb_s->gpsw).eaba == > > PSW_BITS_AMODE_31BIT) > > + mask = 0x7ffff000; > > + > > + dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> > > 16) & mask; > > + src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) > > & mask; + > > + rc1 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, > > &pei1); > > + rc2 = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, > > &pei2); > > + /* > > + * Either everything went well, or something non-critical > > went wrong > > + * e.g. beause of a race. In either case, simply retry. > > + */ > > + if (rc1 == -EAGAIN || rc2 == -EAGAIN || (!rc1 && !rc2)) { > > + retry_vsie_icpt(vsie_page); > > + return -EAGAIN; > > + } > > + /* Something more serious went wrong, propagate the error > > */ > > + if (rc1 < 0) > > + return rc1; > > + if (rc2 < 0) > > + return rc2; > > + > > + /* The only possible suppressing exception: just deliver > > it */ > > + if (rc1 == PGM_TRANSLATION_SPEC || rc2 == > > PGM_TRANSLATION_SPEC) { > > + clear_vsie_icpt(vsie_page); > > + rc1 = kvm_s390_inject_program_int(vcpu, > > PGM_TRANSLATION_SPEC); > > + WARN_ON_ONCE(rc1); > > + return 1; > > + } > > + > > + /* > > + * Forward the PEI intercept to the guest if it was a page > > fault, or > > + * also for segment and region table faults if EDAT > > applies. > > + */ > > + if (edat) { > > + rc1 = rc1 == PGM_ASCE_TYPE ? rc1 : 0; > > + rc2 = rc2 == PGM_ASCE_TYPE ? rc2 : 0; > > + } > > + if ((!rc1 || rc1 == PGM_PAGE_TRANSLATION) && (!rc2 || rc2 > > == PGM_PAGE_TRANSLATION)) { > > + pei_block[0] = pei1; > > + pei_block[1] = pei2; > > + return 1; > > + } > > + > > + retry_vsie_icpt(vsie_page); > > + > > + /* > > + * The host has edat, and the guest does not, or it was an > > ASCE type > > + * exception. The host needs to inject the appropriate DAT > > interrupts > > + * into the guest. > > + */ > > + if (rc1) > > + return inject_fault(vcpu, rc1, dest, 1); > > + if (rc2)> + return inject_fault(vcpu, rc2, > > src, 0); + > > + /* This should never be reached */ > > BUG()? look at the code, if it's reached, it's a bug in the compiler :) maybe I should rewrite it so that there won't be any unreachable code at all > > + return 0; > > +} > > + > > /* > > * Run the vsie on a shadow scb and a shadow gmap, without any > > further > > * sanity checks, handling SIE faults. > > @@ -1068,6 +1148,10 @@ static int do_vsie_run(struct kvm_vcpu > > *vcpu, struct vsie_page *vsie_page) if ((scb_s->ipa & 0xf000) != > > 0xf000) scb_s->ipa += 0x1000; > > break; > > + case ICPT_PARTEXEC: > > + if (scb_s->ipa == 0xb254) > > + rc = vsie_handle_mvpg(vcpu, vsie_page); > > + break; > > } > > return rc; > > } > > > ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2021-02-06 0:29 UTC | newest] Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-02-02 18:00 [PATCH v2 0/2] s390/kvm: fix MVPG when in VSIE Claudio Imbrenda 2021-02-02 18:00 ` [PATCH v2 1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer Claudio Imbrenda 2021-02-04 16:34 ` Janosch Frank 2021-02-04 17:05 ` Janosch Frank 2021-02-05 12:18 ` Claudio Imbrenda 2021-02-05 12:15 ` Claudio Imbrenda 2021-02-05 12:56 ` Janosch Frank 2021-02-05 14:05 ` Claudio Imbrenda 2021-02-02 18:00 ` [PATCH v2 2/2] s390/kvm: VSIE: correctly handle MVPG when in VSIE Claudio Imbrenda 2021-02-03 10:36 ` Claudio Imbrenda 2021-02-04 17:10 ` Janosch Frank 2021-02-05 12:20 ` Claudio Imbrenda
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).