All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping
@ 2018-05-06  7:37 Nicholas Piggin
  2018-05-07  2:00 ` Paul Mackerras
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-05-06  7:37 UTC (permalink / raw)
  To: kvm-ppc

The radix fault handler will unmap a page table if it wants to install
a huge page there instead. It will just clear that page table entry,
free the page, and flush the pwc.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 147 +++++++++++++++++--------
 1 file changed, 100 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index ad6675a9485a..da507b41fdd3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -236,6 +236,104 @@ static void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte,
 	}
 }
 
+/*
+ * kvmppc_free_p?d are used to free existing page tables, and recursively
+ * descend and clear and free children.
+ * Callers are responsible for flushing the PWC.
+ */
+static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full)
+{
+	if (full) {
+		memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
+	} else {
+		pte_t *p = pte;
+		unsigned long it;
+
+		for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
+			if (pte_val(*p) = 0)
+				continue;
+			kvmppc_unmap_pte(kvm, p,
+					 pte_pfn(*p) << PAGE_SHIFT,
+					 PAGE_SHIFT);
+		}
+	}
+
+	kvmppc_pte_free(pte);
+}
+
+static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full)
+{
+	unsigned long im;
+	pmd_t *p = pmd;
+
+	for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
+		if (!pmd_present(*p))
+			continue;
+		if (pmd_is_leaf(*p)) {
+			if (full)
+				pmd_clear(p);
+			else
+				kvmppc_unmap_pte(kvm, (pte_t *)p,
+					 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
+					 PMD_SHIFT);
+		} else {
+			pte_t *pte;
+
+			pte = pte_offset_map(p, 0);
+			kvmppc_unmap_free_pte(kvm, pte, full);
+			pmd_clear(p);
+		}
+	}
+	pmd_free(kvm->mm, pmd);
+}
+
+static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, bool full)
+{
+	unsigned long iu;
+	pud_t *p = pud;
+
+	for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
+		if (!pud_present(*p))
+			continue;
+		if (pud_huge(*p)) {
+			if (full)
+				pud_clear(p);
+			else
+				kvmppc_unmap_pte(kvm, (pte_t *)p,
+					 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
+					 PUD_SHIFT);
+		} else {
+			pmd_t *pmd;
+
+			pmd = pmd_offset(p, 0);
+			kvmppc_unmap_free_pmd(kvm, pmd, full);
+			pud_clear(p);
+		}
+	}
+	pud_free(kvm->mm, pud);
+}
+
+void kvmppc_free_radix(struct kvm *kvm)
+{
+	unsigned long ig;
+	pgd_t *pgd;
+
+	if (!kvm->arch.pgtable)
+		return;
+	pgd = kvm->arch.pgtable;
+	for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+		pud_t *pud;
+
+		if (!pgd_present(*pgd))
+			continue;
+		pud = pud_offset(pgd, 0);
+		kvmppc_unmap_free_pud(kvm, pud, true);
+		pgd_clear(pgd);
+	}
+	pgd_free(kvm->mm, kvm->arch.pgtable);
+	kvm->arch.pgtable = NULL;
+}
+
 static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			     unsigned int level, unsigned long mmu_seq)
 {
@@ -303,7 +401,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			 * install a large page, so remove and free the page
 			 * table page.  new_pmd will be NULL since level = 2.
 			 */
-			new_pmd = pmd_offset(pud, 0);
+			kvmppc_unmap_free_pmd(kvm, pmd_offset(pud, 0), false);
 			pud_clear(pud);
 			kvmppc_radix_flush_pwc(kvm, gpa);
 		}
@@ -344,7 +442,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			 * install a large page, so remove and free the page
 			 * table page.  new_ptep will be NULL since level = 1.
 			 */
-			new_ptep = pte_offset_kernel(pmd, 0);
+			kvmppc_unmap_free_pte(kvm, pte_offset_kernel(pmd, 0), false);
 			pmd_clear(pmd);
 			kvmppc_radix_flush_pwc(kvm, gpa);
 		}
@@ -723,51 +821,6 @@ int kvmppc_init_vm_radix(struct kvm *kvm)
 	return 0;
 }
 
-void kvmppc_free_radix(struct kvm *kvm)
-{
-	unsigned long ig, iu, im;
-	pte_t *pte;
-	pmd_t *pmd;
-	pud_t *pud;
-	pgd_t *pgd;
-
-	if (!kvm->arch.pgtable)
-		return;
-	pgd = kvm->arch.pgtable;
-	for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
-		if (!pgd_present(*pgd))
-			continue;
-		pud = pud_offset(pgd, 0);
-		for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
-			if (!pud_present(*pud))
-				continue;
-			if (pud_huge(*pud)) {
-				pud_clear(pud);
-				continue;
-			}
-			pmd = pmd_offset(pud, 0);
-			for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
-				if (pmd_is_leaf(*pmd)) {
-					pmd_clear(pmd);
-					continue;
-				}
-				if (!pmd_present(*pmd))
-					continue;
-				pte = pte_offset_map(pmd, 0);
-				memset(pte, 0, sizeof(long) << PTE_INDEX_SIZE);
-				kvmppc_pte_free(pte);
-				pmd_clear(pmd);
-			}
-			pmd_free(kvm->mm, pmd_offset(pud, 0));
-			pud_clear(pud);
-		}
-		pud_free(kvm->mm, pud_offset(pgd, 0));
-		pgd_clear(pgd);
-	}
-	pgd_free(kvm->mm, kvm->arch.pgtable);
-	kvm->arch.pgtable = NULL;
-}
-
 static void pte_ctor(void *addr)
 {
 	memset(addr, 0, PTE_TABLE_SIZE);
-- 
2.17.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping
  2018-05-06  7:37 [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping Nicholas Piggin
@ 2018-05-07  2:00 ` Paul Mackerras
  2018-05-07  2:53 ` Nicholas Piggin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Paul Mackerras @ 2018-05-07  2:00 UTC (permalink / raw)
  To: kvm-ppc

On Sun, May 06, 2018 at 05:37:25PM +1000, Nicholas Piggin wrote:
> The radix fault handler will unmap a page table if it wants to install
> a huge page there instead. It will just clear that page table entry,
> free the page, and flush the pwc.

This is a pretty inadequate commit description.  You are describing
what the code does before your patch, but you don't say why that is
bad or what your patch does and why that is better.

In particular, I don't see how we could ever get the situation where
we are replacing a page table page with a huge page and there are any
valid leaf PTEs in (or below) that page table page.  I accept that I
am leaking the page table pages but I don't see why tlbies would be
necessary.

What scenario did you have in mind where we would need to do the full
unmap, that is, where would would find any valid leaf PTEs in a page
table page that we are replacing?

Paul.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping
  2018-05-06  7:37 [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping Nicholas Piggin
  2018-05-07  2:00 ` Paul Mackerras
@ 2018-05-07  2:53 ` Nicholas Piggin
  2018-05-07  3:35 ` Paul Mackerras
  2018-05-07  9:34 ` Nicholas Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-05-07  2:53 UTC (permalink / raw)
  To: kvm-ppc

On Mon, 7 May 2018 12:00:58 +1000
Paul Mackerras <paulus@ozlabs.org> wrote:

> On Sun, May 06, 2018 at 05:37:25PM +1000, Nicholas Piggin wrote:
> > The radix fault handler will unmap a page table if it wants to install
> > a huge page there instead. It will just clear that page table entry,
> > free the page, and flush the pwc.  
> 
> This is a pretty inadequate commit description.  You are describing
> what the code does before your patch, but you don't say why that is
> bad or what your patch does and why that is better.
> 
> In particular, I don't see how we could ever get the situation where
> we are replacing a page table page with a huge page and there are any
> valid leaf PTEs in (or below) that page table page.  I accept that I
> am leaking the page table pages but I don't see why tlbies would be
> necessary.
>
> What scenario did you have in mind where we would need to do the full
> unmap, that is, where would would find any valid leaf PTEs in a page
> table page that we are replacing?

I actually wasn't 100% sure if a fault could ever try to install a
64k pte when another could install a 2MB (without an interleaving
invalidation).

If that's not required I can take it out of this patch, I'd like to
add it back in a subsequent change to do proper range invalidates
that don't leave these stale page tables around for the fault path
to clean up, but that discussion can be left until then.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping
  2018-05-06  7:37 [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping Nicholas Piggin
  2018-05-07  2:00 ` Paul Mackerras
  2018-05-07  2:53 ` Nicholas Piggin
@ 2018-05-07  3:35 ` Paul Mackerras
  2018-05-07  9:34 ` Nicholas Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Paul Mackerras @ 2018-05-07  3:35 UTC (permalink / raw)
  To: kvm-ppc

On Mon, May 07, 2018 at 12:53:42PM +1000, Nicholas Piggin wrote:
> On Mon, 7 May 2018 12:00:58 +1000
> Paul Mackerras <paulus@ozlabs.org> wrote:
> 
> > On Sun, May 06, 2018 at 05:37:25PM +1000, Nicholas Piggin wrote:
> > > The radix fault handler will unmap a page table if it wants to install
> > > a huge page there instead. It will just clear that page table entry,
> > > free the page, and flush the pwc.  
> > 
> > This is a pretty inadequate commit description.  You are describing
> > what the code does before your patch, but you don't say why that is
> > bad or what your patch does and why that is better.
> > 
> > In particular, I don't see how we could ever get the situation where
> > we are replacing a page table page with a huge page and there are any
> > valid leaf PTEs in (or below) that page table page.  I accept that I
> > am leaking the page table pages but I don't see why tlbies would be
> > necessary.
> >
> > What scenario did you have in mind where we would need to do the full
> > unmap, that is, where would would find any valid leaf PTEs in a page
> > table page that we are replacing?
> 
> I actually wasn't 100% sure if a fault could ever try to install a
> 64k pte when another could install a 2MB (without an interleaving
> invalidation).

I don't see how that could happen.  A THP collapse should invalidate
the range before installing the 2M PTE.  The code in
kvmppc_book3s_radix_page_fault() wouldn't make a different decision
for two subpages of the same 2M page.  That said, that code should be
including the memslot address limits in deciding whether to use a
large/huge page or not.

> If that's not required I can take it out of this patch, I'd like to
> add it back in a subsequent change to do proper range invalidates
> that don't leave these stale page tables around for the fault path
> to clean up, but that discussion can be left until then.

If you're using the kvmppc_unmap_pte helper to do a range invalidate,
it would be nice to avoid doing the gfn_to_memslot each time.

Paul.



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping
  2018-05-06  7:37 [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping Nicholas Piggin
                   ` (2 preceding siblings ...)
  2018-05-07  3:35 ` Paul Mackerras
@ 2018-05-07  9:34 ` Nicholas Piggin
  3 siblings, 0 replies; 5+ messages in thread
From: Nicholas Piggin @ 2018-05-07  9:34 UTC (permalink / raw)
  To: kvm-ppc

On Mon, 7 May 2018 13:35:58 +1000
Paul Mackerras <paulus@ozlabs.org> wrote:

> On Mon, May 07, 2018 at 12:53:42PM +1000, Nicholas Piggin wrote:
> > On Mon, 7 May 2018 12:00:58 +1000
> > Paul Mackerras <paulus@ozlabs.org> wrote:
> >   
> > > On Sun, May 06, 2018 at 05:37:25PM +1000, Nicholas Piggin wrote:  
> > > > The radix fault handler will unmap a page table if it wants to install
> > > > a huge page there instead. It will just clear that page table entry,
> > > > free the page, and flush the pwc.    
> > > 
> > > This is a pretty inadequate commit description.  You are describing
> > > what the code does before your patch, but you don't say why that is
> > > bad or what your patch does and why that is better.
> > > 
> > > In particular, I don't see how we could ever get the situation where
> > > we are replacing a page table page with a huge page and there are any
> > > valid leaf PTEs in (or below) that page table page.  I accept that I
> > > am leaking the page table pages but I don't see why tlbies would be
> > > necessary.
> > >
> > > What scenario did you have in mind where we would need to do the full
> > > unmap, that is, where would would find any valid leaf PTEs in a page
> > > table page that we are replacing?  
> > 
> > I actually wasn't 100% sure if a fault could ever try to install a
> > 64k pte when another could install a 2MB (without an interleaving
> > invalidation).  
> 
> I don't see how that could happen.  A THP collapse should invalidate
> the range before installing the 2M PTE.  The code in
> kvmppc_book3s_radix_page_fault() wouldn't make a different decision
> for two subpages of the same 2M page.

Okay I'll change the patch.

>  That said, that code should be
> including the memslot address limits in deciding whether to use a
> large/huge page or not.
> 
> > If that's not required I can take it out of this patch, I'd like to
> > add it back in a subsequent change to do proper range invalidates
> > that don't leave these stale page tables around for the fault path
> > to clean up, but that discussion can be left until then.  
> 
> If you're using the kvmppc_unmap_pte helper to do a range invalidate,
> it would be nice to avoid doing the gfn_to_memslot each time.

That seems like it should be possible, I'll see if I can make it work.

Thanks,
Nick


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-05-07  9:34 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-06  7:37 [PATCH 04/10] KVM: PPC: Book3S HV: recursively unmap all page table entries when unmapping Nicholas Piggin
2018-05-07  2:00 ` Paul Mackerras
2018-05-07  2:53 ` Nicholas Piggin
2018-05-07  3:35 ` Paul Mackerras
2018-05-07  9:34 ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.