All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats
@ 2022-03-11  0:12 Yosry Ahmed
  2022-03-11 19:20 ` Shakeel Butt
  2022-03-30  0:48 ` Sean Christopherson
  0 siblings, 2 replies; 5+ messages in thread
From: Yosry Ahmed @ 2022-03-11  0:12 UTC (permalink / raw)
  To: Paolo Bonzini, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Johannes Weiner, Michal Hocko,
	Roman Gushchin
  Cc: Shakeel Butt, Junaid Shahid, David Matlack, kvm, Yosry Ahmed

Count the pages used by KVM for page tables in pagetable memcg stats in
memory.stat.

Most pages used for KVM page tables come from the mmu_shadow_page_cache,
in addition to a few allocations in __kvm_mmu_create() and
mmu_alloc_special_roots().

For allocations from the mmu_shadow_page_cache, the pages are counted as
pagetables when they are actually used by KVM (when
mmu_memory_cache_alloc_obj() is called), rather than when they are
allocated in the cache itself. In other words, pages sitting in the
cache are not counted as pagetables (they are still accounted as kernel
memory).

The reason for this is to avoid the complexity and confusion of
incrementing the stats in the cache layer, while decerementing them
by the cache users when they are being freed (pages are freed directly
and not returned to the cache).
For the sake of simplicity, the stats are incremented and decremented by
the users of the cache when they get the page and when they free it.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/mmu/mmu.c          | 19 +++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.c      |  4 ++++
 virt/kvm/kvm_main.c             | 17 +++++++++++++++++
 4 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f72e80178ffc..4a1dda2f56e1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -458,6 +458,13 @@ struct kvm_mmu {
 	*/
 	u32 pkru_mask;
 
+	/*
+	 * After a page is allocated for any of these roots,
+	 * increment per-memcg pagetable stats by calling:
+	 * inc_lruvec_page_state(page, NR_PAGETABLE)
+	 * Before the page is freed, decrement the stats by calling:
+	 * dec_lruvec_page_state(page, NR_PAGETABLE).
+	 */
 	u64 *pae_root;
 	u64 *pml4_root;
 	u64 *pml5_root;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3b8da8b0745e..5f87e1b0da91 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1673,7 +1673,10 @@ static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
 	MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
 	hlist_del(&sp->hash_link);
 	list_del(&sp->link);
+
+	dec_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
 	free_page((unsigned long)sp->spt);
+
 	if (!sp->role.direct)
 		free_page((unsigned long)sp->gfns);
 	kmem_cache_free(mmu_page_header_cache, sp);
@@ -1711,7 +1714,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
 	struct kvm_mmu_page *sp;
 
 	sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
+
 	sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
+	inc_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
+
 	if (!direct)
 		sp->gfns = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_gfn_array_cache);
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
@@ -3602,6 +3608,10 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	mmu->pml4_root = pml4_root;
 	mmu->pml5_root = pml5_root;
 
+	/* Update per-memcg pagetable stats */
+	inc_lruvec_page_state(virt_to_page(pae_root), NR_PAGETABLE);
+	inc_lruvec_page_state(virt_to_page(pml4_root), NR_PAGETABLE);
+	inc_lruvec_page_state(virt_to_page(pml5_root), NR_PAGETABLE);
 	return 0;
 
 #ifdef CONFIG_X86_64
@@ -5554,6 +5564,12 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
 {
 	if (!tdp_enabled && mmu->pae_root)
 		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
+
+	/* Update per-memcg pagetable stats */
+	dec_lruvec_page_state(virt_to_page(mmu->pae_root), NR_PAGETABLE);
+	dec_lruvec_page_state(virt_to_page(mmu->pml4_root), NR_PAGETABLE);
+	dec_lruvec_page_state(virt_to_page(mmu->pml5_root), NR_PAGETABLE);
+
 	free_page((unsigned long)mmu->pae_root);
 	free_page((unsigned long)mmu->pml4_root);
 	free_page((unsigned long)mmu->pml5_root);
@@ -5591,6 +5607,9 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 	if (!page)
 		return -ENOMEM;
 
+	/* Update per-memcg pagetable stats */
+	inc_lruvec_page_state(page, NR_PAGETABLE);
+
 	mmu->pae_root = page_address(page);
 
 	/*
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index af60922906ef..ce8930fd0835 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -64,6 +64,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
 
 static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
 {
+	dec_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
 	free_page((unsigned long)sp->spt);
 	kmem_cache_free(mmu_page_header_cache, sp);
 }
@@ -273,7 +274,9 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
 	struct kvm_mmu_page *sp;
 
 	sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
+
 	sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
+	inc_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
 
 	return sp;
 }
@@ -1410,6 +1413,7 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
 		return NULL;
 	}
 
+	inc_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
 	return sp;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9581a24c3d17..3c8cce440c34 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -397,6 +397,23 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
 	}
 }
 
+/*
+ * After this funciton is called to get a page from mmu_shadow_page_cache,
+ * increment per-memcg pagetable stats by calling:
+ * inc_lruvec_page_state(page, NR_PAGETABLE)
+ * Before the page is freed, decrement the stats by calling:
+ * dec_lruvec_page_state(page, NR_PAGETABLE).
+ *
+ * Note that for the sake per-memcg stats in memory.stat, the pages will be
+ * counted as pagetable pages only they are allocated from the cache. This means
+ * that pages sitting in the mmu_shadow_page_cache will not be counted as
+ * pagetable pages (but will still be counted as kernel memory).
+ *
+ * Counting pages in the cache can introduce unnecessary complexity as we will
+ * need to increment the stats in the cache layer when pages are allocated, and
+ * decrement the stats outside the cache layer when pages are freed. This can be
+ * confusing for new users of the cache.
+ */
 void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 {
 	void *p;
-- 
2.35.1.723.g4982287a31-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats
  2022-03-11  0:12 [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats Yosry Ahmed
@ 2022-03-11 19:20 ` Shakeel Butt
  2022-04-05 18:43   ` Yosry Ahmed
  2022-03-30  0:48 ` Sean Christopherson
  1 sibling, 1 reply; 5+ messages in thread
From: Shakeel Butt @ 2022-03-11 19:20 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Paolo Bonzini, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Johannes Weiner, Michal Hocko,
	Roman Gushchin, Junaid Shahid, David Matlack, kvm

On Fri, Mar 11, 2022 at 12:12:52AM +0000, Yosry Ahmed wrote:
> Count the pages used by KVM for page tables in pagetable memcg stats in
> memory.stat.


This is not just the memcg stats. NR_PAGETABLE is exposed in system-wide
meminfo as well as per-node meminfo. Memcg infrastructure maintaining
per-memcg (& per-node) NR_PAGETABLE stat should be transparent here.
Please update your commit message.

BTW you are only updating the stats for x86. What about other archs?
Also does those arch allocated pagetable memory from page allocator or
kmem cache?


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats
  2022-03-11  0:12 [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats Yosry Ahmed
  2022-03-11 19:20 ` Shakeel Butt
@ 2022-03-30  0:48 ` Sean Christopherson
  2022-04-05 18:42   ` Yosry Ahmed
  1 sibling, 1 reply; 5+ messages in thread
From: Sean Christopherson @ 2022-03-30  0:48 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Johannes Weiner, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Junaid Shahid, David Matlack, kvm

On Fri, Mar 11, 2022, Yosry Ahmed wrote:
> Count the pages used by KVM for page tables in pagetable memcg stats in
> memory.stat.

Why?  Is it problematic to count these as kernel memory as opposed to page tables?
What is gained/lost by tracking these as page table allocations?  E.g. won't this
pollute the information about the host page tables for the userpace process?

When you asked about stats, I thought you meant KVM stats :-)

> Most pages used for KVM page tables come from the mmu_shadow_page_cache,
> in addition to a few allocations in __kvm_mmu_create() and
> mmu_alloc_special_roots().
> 
> For allocations from the mmu_shadow_page_cache, the pages are counted as
> pagetables when they are actually used by KVM (when
> mmu_memory_cache_alloc_obj() is called), rather than when they are
> allocated in the cache itself. In other words, pages sitting in the
> cache are not counted as pagetables (they are still accounted as kernel
> memory).
> 
> The reason for this is to avoid the complexity and confusion of
> incrementing the stats in the cache layer, while decerementing them
> by the cache users when they are being freed (pages are freed directly
> and not returned to the cache).
> For the sake of simplicity, the stats are incremented and decremented by
> the users of the cache when they get the page and when they free it.
> 
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  7 +++++++
>  arch/x86/kvm/mmu/mmu.c          | 19 +++++++++++++++++++
>  arch/x86/kvm/mmu/tdp_mmu.c      |  4 ++++
>  virt/kvm/kvm_main.c             | 17 +++++++++++++++++
>  4 files changed, 47 insertions(+)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f72e80178ffc..4a1dda2f56e1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -458,6 +458,13 @@ struct kvm_mmu {
>  	*/
>  	u32 pkru_mask;
>  
> +	/*
> +	 * After a page is allocated for any of these roots,
> +	 * increment per-memcg pagetable stats by calling:
> +	 * inc_lruvec_page_state(page, NR_PAGETABLE)
> +	 * Before the page is freed, decrement the stats by calling:
> +	 * dec_lruvec_page_state(page, NR_PAGETABLE).
> +	 */
>  	u64 *pae_root;
>  	u64 *pml4_root;
>  	u64 *pml5_root;

Eh, I would much prefer we don't bother counting these.  They're barely page
tables, more like necessary evils.  And hopefully they'll be gone soon[*].

[*] https://lore.kernel.org/all/20220329153604.507475-1-jiangshanlai@gmail.com

> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 3b8da8b0745e..5f87e1b0da91 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -1673,7 +1673,10 @@ static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>  	MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
>  	hlist_del(&sp->hash_link);
>  	list_del(&sp->link);
> +
> +	dec_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);

I would strongly prefer to add new helpers to combine this accounting with KVM's
existing accounting.  E.g. for the legacy (not tdp_mmu.c) MMU code

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1361eb4599b4..c2cb642157cc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1668,6 +1668,18 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
        percpu_counter_add(&kvm_total_used_mmu_pages, nr);
 }

+static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+       kvm_mod_used_mmu_pages(kvm, 1);
+       inc_lruvec_page_state(..., NR_PAGETABLE);
+}
+
+static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+       kvm_mod_used_mmu_pages(kvm, -1);
+       dec_lruvec_page_state(..., NR_PAGETABLE);
+}
+
 static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
 {
        MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
@@ -1723,7 +1735,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
         */
        sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
        list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-       kvm_mod_used_mmu_pages(vcpu->kvm, +1);
+       kvm_account_mmu_page(vcpu->kvm, sp);
        return sp;
 }

@@ -2339,7 +2351,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
                        list_add(&sp->link, invalid_list);
                else
                        list_move(&sp->link, invalid_list);
-               kvm_mod_used_mmu_pages(kvm, -1);
+               kvm_unaccount_mmu_page(kvm, sp);
        } else {
                /*
                 * Remove the active root from the active page list, the root


>  	free_page((unsigned long)sp->spt);
> +

There's a lot of spurious whitespace change in this patch.

>  	if (!sp->role.direct)
>  		free_page((unsigned long)sp->gfns);
>  	kmem_cache_free(mmu_page_header_cache, sp);
> @@ -1711,7 +1714,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
>  	struct kvm_mmu_page *sp;
>  
>  	sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
> +

More whitespace, though it should just naturally go away.

>  	sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
> +	inc_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
> +
>  	if (!direct)
>  		sp->gfns = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_gfn_array_cache);
>  	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
> @@ -3602,6 +3608,10 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
>  	mmu->pml4_root = pml4_root;
>  	mmu->pml5_root = pml5_root;
>  
> +	/* Update per-memcg pagetable stats */
> +	inc_lruvec_page_state(virt_to_page(pae_root), NR_PAGETABLE);
> +	inc_lruvec_page_state(virt_to_page(pml4_root), NR_PAGETABLE);
> +	inc_lruvec_page_state(virt_to_page(pml5_root), NR_PAGETABLE);
>  	return 0;
>  
>  #ifdef CONFIG_X86_64
> @@ -5554,6 +5564,12 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
>  {
>  	if (!tdp_enabled && mmu->pae_root)
>  		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
> +
> +	/* Update per-memcg pagetable stats */
> +	dec_lruvec_page_state(virt_to_page(mmu->pae_root), NR_PAGETABLE);
> +	dec_lruvec_page_state(virt_to_page(mmu->pml4_root), NR_PAGETABLE);
> +	dec_lruvec_page_state(virt_to_page(mmu->pml5_root), NR_PAGETABLE);
> +
>  	free_page((unsigned long)mmu->pae_root);
>  	free_page((unsigned long)mmu->pml4_root);
>  	free_page((unsigned long)mmu->pml5_root);
> @@ -5591,6 +5607,9 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
>  	if (!page)
>  		return -ENOMEM;
>  
> +	/* Update per-memcg pagetable stats */
> +	inc_lruvec_page_state(page, NR_PAGETABLE);
> +
>  	mmu->pae_root = page_address(page);
>  
>  	/*
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index af60922906ef..ce8930fd0835 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -64,6 +64,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
>  
>  static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
>  {
> +	dec_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);

I'd prefer to do this in tdp_mmu_{,un}link_sp(), it saves having to add calls in
all paths that allocate TDP MMU pages.

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats
  2022-03-30  0:48 ` Sean Christopherson
@ 2022-04-05 18:42   ` Yosry Ahmed
  0 siblings, 0 replies; 5+ messages in thread
From: Yosry Ahmed @ 2022-04-05 18:42 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: Paolo Bonzini, Vitaly Kuznetsov, Wanpeng Li, Jim Mattson,
	Joerg Roedel, Johannes Weiner, Michal Hocko, Roman Gushchin,
	Shakeel Butt, Junaid Shahid, David Matlack, kvm

On Tue, Mar 29, 2022 at 5:48 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Fri, Mar 11, 2022, Yosry Ahmed wrote:
> > Count the pages used by KVM for page tables in pagetable memcg stats in
> > memory.stat.
>
> Why?  Is it problematic to count these as kernel memory as opposed to page tables?
> What is gained/lost by tracking these as page table allocations?  E.g. won't this
> pollute the information about the host page tables for the userpace process?
>
> When you asked about stats, I thought you meant KVM stats :-)

Thanks for taking the time to look into this. I followed up with a v2
patch series with the rationale behind this change in the cover
letter.

Basically we maintain different kernel memory stats on different
levels (global, per-node, per-memcg, ..), this includes the total kmem
usage, in addition to the significant components (page tables,
vmalloc, stack, ...). This gives users insights to what the kmem usage
is made up of.

As for polluting the information about the host page tables, the pages
used by KVM for page tables are still page tables, and the user should
be aware of the workload they are running. If I am running a VM I
should be expecting more page tables usage for the guest as well.

>
> > Most pages used for KVM page tables come from the mmu_shadow_page_cache,
> > in addition to a few allocations in __kvm_mmu_create() and
> > mmu_alloc_special_roots().
> >
> > For allocations from the mmu_shadow_page_cache, the pages are counted as
> > pagetables when they are actually used by KVM (when
> > mmu_memory_cache_alloc_obj() is called), rather than when they are
> > allocated in the cache itself. In other words, pages sitting in the
> > cache are not counted as pagetables (they are still accounted as kernel
> > memory).
> >
> > The reason for this is to avoid the complexity and confusion of
> > incrementing the stats in the cache layer, while decerementing them
> > by the cache users when they are being freed (pages are freed directly
> > and not returned to the cache).
> > For the sake of simplicity, the stats are incremented and decremented by
> > the users of the cache when they get the page and when they free it.
> >
> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  7 +++++++
> >  arch/x86/kvm/mmu/mmu.c          | 19 +++++++++++++++++++
> >  arch/x86/kvm/mmu/tdp_mmu.c      |  4 ++++
> >  virt/kvm/kvm_main.c             | 17 +++++++++++++++++
> >  4 files changed, 47 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index f72e80178ffc..4a1dda2f56e1 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -458,6 +458,13 @@ struct kvm_mmu {
> >       */
> >       u32 pkru_mask;
> >
> > +     /*
> > +      * After a page is allocated for any of these roots,
> > +      * increment per-memcg pagetable stats by calling:
> > +      * inc_lruvec_page_state(page, NR_PAGETABLE)
> > +      * Before the page is freed, decrement the stats by calling:
> > +      * dec_lruvec_page_state(page, NR_PAGETABLE).
> > +      */
> >       u64 *pae_root;
> >       u64 *pml4_root;
> >       u64 *pml5_root;
>
> Eh, I would much prefer we don't bother counting these.  They're barely page
> tables, more like necessary evils.  And hopefully they'll be gone soon[*].

These were ignored in v2.

>
> [*] https://lore.kernel.org/all/20220329153604.507475-1-jiangshanlai@gmail.com
>
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 3b8da8b0745e..5f87e1b0da91 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -1673,7 +1673,10 @@ static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
> >       MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
> >       hlist_del(&sp->hash_link);
> >       list_del(&sp->link);
> > +
> > +     dec_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
>
> I would strongly prefer to add new helpers to combine this accounting with KVM's
> existing accounting.  E.g. for the legacy (not tdp_mmu.c) MMU code

Thanks a lot for the suggestion. I followed this in both legacy and
tdp mmu code for x86 and the diff is much cleaner.

>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 1361eb4599b4..c2cb642157cc 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -1668,6 +1668,18 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
>         percpu_counter_add(&kvm_total_used_mmu_pages, nr);
>  }
>
> +static void kvm_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> +{
> +       kvm_mod_used_mmu_pages(kvm, 1);
> +       inc_lruvec_page_state(..., NR_PAGETABLE);
> +}
> +
> +static void kvm_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
> +{
> +       kvm_mod_used_mmu_pages(kvm, -1);
> +       dec_lruvec_page_state(..., NR_PAGETABLE);
> +}
> +
>  static void kvm_mmu_free_page(struct kvm_mmu_page *sp)
>  {
>         MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
> @@ -1723,7 +1735,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
>          */
>         sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
>         list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
> -       kvm_mod_used_mmu_pages(vcpu->kvm, +1);
> +       kvm_account_mmu_page(vcpu->kvm, sp);
>         return sp;
>  }
>
> @@ -2339,7 +2351,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
>                         list_add(&sp->link, invalid_list);
>                 else
>                         list_move(&sp->link, invalid_list);
> -               kvm_mod_used_mmu_pages(kvm, -1);
> +               kvm_unaccount_mmu_page(kvm, sp);
>         } else {
>                 /*
>                  * Remove the active root from the active page list, the root
>
>
> >       free_page((unsigned long)sp->spt);
> > +
>
> There's a lot of spurious whitespace change in this patch.

Took care of this in v2. I was trying to separate the stats update
from kvm logic visually.

>
> >       if (!sp->role.direct)
> >               free_page((unsigned long)sp->gfns);
> >       kmem_cache_free(mmu_page_header_cache, sp);
> > @@ -1711,7 +1714,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
> >       struct kvm_mmu_page *sp;
> >
> >       sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
> > +
>
> More whitespace, though it should just naturally go away.
>
> >       sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache);
> > +     inc_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
> > +
> >       if (!direct)
> >               sp->gfns = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_gfn_array_cache);
> >       set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
> > @@ -3602,6 +3608,10 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
> >       mmu->pml4_root = pml4_root;
> >       mmu->pml5_root = pml5_root;
> >
> > +     /* Update per-memcg pagetable stats */
> > +     inc_lruvec_page_state(virt_to_page(pae_root), NR_PAGETABLE);
> > +     inc_lruvec_page_state(virt_to_page(pml4_root), NR_PAGETABLE);
> > +     inc_lruvec_page_state(virt_to_page(pml5_root), NR_PAGETABLE);
> >       return 0;
> >
> >  #ifdef CONFIG_X86_64
> > @@ -5554,6 +5564,12 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
> >  {
> >       if (!tdp_enabled && mmu->pae_root)
> >               set_memory_encrypted((unsigned long)mmu->pae_root, 1);
> > +
> > +     /* Update per-memcg pagetable stats */
> > +     dec_lruvec_page_state(virt_to_page(mmu->pae_root), NR_PAGETABLE);
> > +     dec_lruvec_page_state(virt_to_page(mmu->pml4_root), NR_PAGETABLE);
> > +     dec_lruvec_page_state(virt_to_page(mmu->pml5_root), NR_PAGETABLE);
> > +
> >       free_page((unsigned long)mmu->pae_root);
> >       free_page((unsigned long)mmu->pml4_root);
> >       free_page((unsigned long)mmu->pml5_root);
> > @@ -5591,6 +5607,9 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
> >       if (!page)
> >               return -ENOMEM;
> >
> > +     /* Update per-memcg pagetable stats */
> > +     inc_lruvec_page_state(page, NR_PAGETABLE);
> > +
> >       mmu->pae_root = page_address(page);
> >
> >       /*
> > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> > index af60922906ef..ce8930fd0835 100644
> > --- a/arch/x86/kvm/mmu/tdp_mmu.c
> > +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> > @@ -64,6 +64,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
> >
> >  static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
> >  {
> > +     dec_lruvec_page_state(virt_to_page(sp->spt), NR_PAGETABLE);
>
> I'd prefer to do this in tdp_mmu_{,un}link_sp(), it saves having to add calls in
> all paths that allocate TDP MMU pages.

Followed this in v2.

Thanks for your review, the version of this patch in v2 is indeed much
cleaner and more readable!

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats
  2022-03-11 19:20 ` Shakeel Butt
@ 2022-04-05 18:43   ` Yosry Ahmed
  0 siblings, 0 replies; 5+ messages in thread
From: Yosry Ahmed @ 2022-04-05 18:43 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Paolo Bonzini, Sean Christopherson, Vitaly Kuznetsov, Wanpeng Li,
	Jim Mattson, Joerg Roedel, Johannes Weiner, Michal Hocko,
	Roman Gushchin, Junaid Shahid, David Matlack, kvm

On Fri, Mar 11, 2022 at 11:20 AM Shakeel Butt <shakeelb@google.com> wrote:
>
> On Fri, Mar 11, 2022 at 12:12:52AM +0000, Yosry Ahmed wrote:
> > Count the pages used by KVM for page tables in pagetable memcg stats in
> > memory.stat.
>
>
> This is not just the memcg stats. NR_PAGETABLE is exposed in system-wide
> meminfo as well as per-node meminfo. Memcg infrastructure maintaining
> per-memcg (& per-node) NR_PAGETABLE stat should be transparent here.
> Please update your commit message.

Thanks for pointing this out! Updated in v2 patch series.

>
> BTW you are only updating the stats for x86. What about other archs?
> Also does those arch allocated pagetable memory from page allocator or
> kmem cache?
>

v2 patch series includes patches for other archs as well. Thanks!

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-04-05 22:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-11  0:12 [PATCH] KVM: memcg: count KVM page table pages used by KVM in memcg pagetable stats Yosry Ahmed
2022-03-11 19:20 ` Shakeel Butt
2022-04-05 18:43   ` Yosry Ahmed
2022-03-30  0:48 ` Sean Christopherson
2022-04-05 18:42   ` Yosry Ahmed

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.