All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ricardo Koller <ricarkol@google.com>
To: Oliver Upton <oupton@google.com>
Cc: kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org,
	Marc Zyngier <maz@kernel.org>, James Morse <james.morse@arm.com>,
	Alexandru Elisei <alexandru.elisei@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	linux-arm-kernel@lists.infradead.org,
	Peter Shier <pshier@google.com>,
	Reiji Watanabe <reijiw@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Ben Gardon <bgardon@google.com>,
	David Matlack <dmatlack@google.com>
Subject: Re: [RFC PATCH 14/17] KVM: arm64: Punt last page reference to rcu callback for parallel walk
Date: Mon, 18 Apr 2022 19:59:04 -0700	[thread overview]
Message-ID: <Yl4leEoIg+dr/1QM@google.com> (raw)
In-Reply-To: <20220415215901.1737897-15-oupton@google.com>

On Fri, Apr 15, 2022 at 09:58:58PM +0000, Oliver Upton wrote:
> It is possible that a table page remains visible to another thread until
> the next rcu synchronization event. To that end, we cannot drop the last
> page reference synchronous with post-order traversal for a parallel
> table walk.
> 
> Schedule an rcu callback to clean up the child table page for parallel
> walks.
> 
> Signed-off-by: Oliver Upton <oupton@google.com>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h |  3 ++
>  arch/arm64/kvm/hyp/pgtable.c         | 24 +++++++++++++--
>  arch/arm64/kvm/mmu.c                 | 44 +++++++++++++++++++++++++++-
>  3 files changed, 67 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index 74955aba5918..52e55e00f0ca 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -81,6 +81,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
>   * @put_page:			Decrement the refcount on a page. When the
>   *				refcount reaches 0 the page is automatically
>   *				freed.
> + * @free_table:			Drop the last page reference, possibly in the
> + *				next RCU sync if doing a shared walk.
>   * @page_count:			Return the refcount of a page.
>   * @phys_to_virt:		Convert a physical address into a virtual
>   *				address	mapped in the current context.
> @@ -98,6 +100,7 @@ struct kvm_pgtable_mm_ops {
>  	void		(*get_page)(void *addr);
>  	void		(*put_page)(void *addr);
>  	int		(*page_count)(void *addr);
> +	void		(*free_table)(void *addr, bool shared);
>  	void*		(*phys_to_virt)(phys_addr_t phys);
>  	phys_addr_t	(*virt_to_phys)(void *addr);
>  	void		(*dcache_clean_inval_poc)(void *addr, size_t size);
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 121818d4c33e..a9a48edba63b 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -147,12 +147,19 @@ static inline void kvm_pgtable_walk_end(void)
>  {}
>  
>  #define kvm_dereference_ptep	rcu_dereference_raw
> +
> +static inline void kvm_pgtable_destroy_barrier(void)
> +{}
> +
>  #else
>  #define kvm_pgtable_walk_begin	rcu_read_lock
>  
>  #define kvm_pgtable_walk_end	rcu_read_unlock
>  
>  #define kvm_dereference_ptep	rcu_dereference
> +
> +#define kvm_pgtable_destroy_barrier	rcu_barrier
> +
>  #endif
>  
>  static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
> @@ -1063,7 +1070,12 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
>  		childp = kvm_pte_follow(*old, mm_ops);
>  	}
>  
> -	mm_ops->put_page(childp);
> +	/*
> +	 * If we do not have exclusive access to the page tables it is possible
> +	 * the unlinked table remains visible to another thread until the next
> +	 * rcu synchronization.
> +	 */
> +	mm_ops->free_table(childp, shared);
>  	mm_ops->put_page(ptep);
>  
>  	return ret;
> @@ -1203,7 +1215,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  					       kvm_granule_size(level));
>  
>  	if (childp)
> -		mm_ops->put_page(childp);
> +		mm_ops->free_table(childp, shared);
>  
>  	return 0;
>  }
> @@ -1433,7 +1445,7 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  	mm_ops->put_page(ptep);
>  
>  	if (kvm_pte_table(*old, level))
> -		mm_ops->put_page(kvm_pte_follow(*old, mm_ops));
> +		mm_ops->free_table(kvm_pte_follow(*old, mm_ops), shared);
>  
>  	return 0;
>  }
> @@ -1452,4 +1464,10 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
>  	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
>  	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
>  	pgt->pgd = NULL;
> +
> +	/*
> +	 * Guarantee that all unlinked subtrees associated with the stage2 page
> +	 * table have also been freed before returning.
> +	 */
> +	kvm_pgtable_destroy_barrier();
>  }
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index cc6ed6b06ec2..6ecf37009c21 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -98,9 +98,50 @@ static bool kvm_is_device_pfn(unsigned long pfn)
>  static void *stage2_memcache_zalloc_page(void *arg)
>  {
>  	struct kvm_mmu_caches *mmu_caches = arg;
> +	struct stage2_page_header *hdr;
> +	void *addr;
>  
>  	/* Allocated with __GFP_ZERO, so no need to zero */
> -	return kvm_mmu_memory_cache_alloc(&mmu_caches->page_cache);
> +	addr = kvm_mmu_memory_cache_alloc(&mmu_caches->page_cache);
> +	if (!addr)
> +		return NULL;
> +
> +	hdr = kvm_mmu_memory_cache_alloc(&mmu_caches->header_cache);
> +	if (!hdr) {
> +		free_page((unsigned long)addr);
> +		return NULL;
> +	}
> +
> +	hdr->page = virt_to_page(addr);
> +	set_page_private(hdr->page, (unsigned long)hdr);
> +	return addr;
> +}
> +
> +static void stage2_free_page_now(struct stage2_page_header *hdr)
> +{
> +	WARN_ON(page_ref_count(hdr->page) != 1);
> +
> +	__free_page(hdr->page);
> +	kmem_cache_free(stage2_page_header_cache, hdr);
> +}
> +
> +static void stage2_free_page_rcu_cb(struct rcu_head *head)
> +{
> +	struct stage2_page_header *hdr = container_of(head, struct stage2_page_header,
> +						      rcu_head);
> +
> +	stage2_free_page_now(hdr);
> +}
> +
> +static void stage2_free_table(void *addr, bool shared)
> +{
> +	struct page *page = virt_to_page(addr);
> +	struct stage2_page_header *hdr = (struct stage2_page_header *)page_private(page);
> +
> +	if (shared)
> +		call_rcu(&hdr->rcu_head, stage2_free_page_rcu_cb);

Can the number of callbacks grow to "dangerous" numbers? can it be
bounded with something like the following?

if number of readers is really high:
	synchronize_rcu() 
else
	call_rcu()

maybe the rcu API has an option for that.

> +	else
> +		stage2_free_page_now(hdr);
>  }
>  
>  static void *kvm_host_zalloc_pages_exact(size_t size)
> @@ -613,6 +654,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
>  	.free_pages_exact	= free_pages_exact,
>  	.get_page		= kvm_host_get_page,
>  	.put_page		= kvm_host_put_page,
> +	.free_table		= stage2_free_table,
>  	.page_count		= kvm_host_page_count,
>  	.phys_to_virt		= kvm_host_va,
>  	.virt_to_phys		= kvm_host_pa,
> -- 
> 2.36.0.rc0.470.gd361397f0d-goog
> 

WARNING: multiple messages have this Message-ID (diff)
From: Ricardo Koller <ricarkol@google.com>
To: Oliver Upton <oupton@google.com>
Cc: kvm@vger.kernel.org, Marc Zyngier <maz@kernel.org>,
	Ben Gardon <bgardon@google.com>, Peter Shier <pshier@google.com>,
	David Matlack <dmatlack@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	kvmarm@lists.cs.columbia.edu,
	linux-arm-kernel@lists.infradead.org
Subject: Re: [RFC PATCH 14/17] KVM: arm64: Punt last page reference to rcu callback for parallel walk
Date: Mon, 18 Apr 2022 19:59:04 -0700	[thread overview]
Message-ID: <Yl4leEoIg+dr/1QM@google.com> (raw)
In-Reply-To: <20220415215901.1737897-15-oupton@google.com>

On Fri, Apr 15, 2022 at 09:58:58PM +0000, Oliver Upton wrote:
> It is possible that a table page remains visible to another thread until
> the next rcu synchronization event. To that end, we cannot drop the last
> page reference synchronous with post-order traversal for a parallel
> table walk.
> 
> Schedule an rcu callback to clean up the child table page for parallel
> walks.
> 
> Signed-off-by: Oliver Upton <oupton@google.com>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h |  3 ++
>  arch/arm64/kvm/hyp/pgtable.c         | 24 +++++++++++++--
>  arch/arm64/kvm/mmu.c                 | 44 +++++++++++++++++++++++++++-
>  3 files changed, 67 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index 74955aba5918..52e55e00f0ca 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -81,6 +81,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
>   * @put_page:			Decrement the refcount on a page. When the
>   *				refcount reaches 0 the page is automatically
>   *				freed.
> + * @free_table:			Drop the last page reference, possibly in the
> + *				next RCU sync if doing a shared walk.
>   * @page_count:			Return the refcount of a page.
>   * @phys_to_virt:		Convert a physical address into a virtual
>   *				address	mapped in the current context.
> @@ -98,6 +100,7 @@ struct kvm_pgtable_mm_ops {
>  	void		(*get_page)(void *addr);
>  	void		(*put_page)(void *addr);
>  	int		(*page_count)(void *addr);
> +	void		(*free_table)(void *addr, bool shared);
>  	void*		(*phys_to_virt)(phys_addr_t phys);
>  	phys_addr_t	(*virt_to_phys)(void *addr);
>  	void		(*dcache_clean_inval_poc)(void *addr, size_t size);
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 121818d4c33e..a9a48edba63b 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -147,12 +147,19 @@ static inline void kvm_pgtable_walk_end(void)
>  {}
>  
>  #define kvm_dereference_ptep	rcu_dereference_raw
> +
> +static inline void kvm_pgtable_destroy_barrier(void)
> +{}
> +
>  #else
>  #define kvm_pgtable_walk_begin	rcu_read_lock
>  
>  #define kvm_pgtable_walk_end	rcu_read_unlock
>  
>  #define kvm_dereference_ptep	rcu_dereference
> +
> +#define kvm_pgtable_destroy_barrier	rcu_barrier
> +
>  #endif
>  
>  static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
> @@ -1063,7 +1070,12 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
>  		childp = kvm_pte_follow(*old, mm_ops);
>  	}
>  
> -	mm_ops->put_page(childp);
> +	/*
> +	 * If we do not have exclusive access to the page tables it is possible
> +	 * the unlinked table remains visible to another thread until the next
> +	 * rcu synchronization.
> +	 */
> +	mm_ops->free_table(childp, shared);
>  	mm_ops->put_page(ptep);
>  
>  	return ret;
> @@ -1203,7 +1215,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  					       kvm_granule_size(level));
>  
>  	if (childp)
> -		mm_ops->put_page(childp);
> +		mm_ops->free_table(childp, shared);
>  
>  	return 0;
>  }
> @@ -1433,7 +1445,7 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  	mm_ops->put_page(ptep);
>  
>  	if (kvm_pte_table(*old, level))
> -		mm_ops->put_page(kvm_pte_follow(*old, mm_ops));
> +		mm_ops->free_table(kvm_pte_follow(*old, mm_ops), shared);
>  
>  	return 0;
>  }
> @@ -1452,4 +1464,10 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
>  	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
>  	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
>  	pgt->pgd = NULL;
> +
> +	/*
> +	 * Guarantee that all unlinked subtrees associated with the stage2 page
> +	 * table have also been freed before returning.
> +	 */
> +	kvm_pgtable_destroy_barrier();
>  }
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index cc6ed6b06ec2..6ecf37009c21 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -98,9 +98,50 @@ static bool kvm_is_device_pfn(unsigned long pfn)
>  static void *stage2_memcache_zalloc_page(void *arg)
>  {
>  	struct kvm_mmu_caches *mmu_caches = arg;
> +	struct stage2_page_header *hdr;
> +	void *addr;
>  
>  	/* Allocated with __GFP_ZERO, so no need to zero */
> -	return kvm_mmu_memory_cache_alloc(&mmu_caches->page_cache);
> +	addr = kvm_mmu_memory_cache_alloc(&mmu_caches->page_cache);
> +	if (!addr)
> +		return NULL;
> +
> +	hdr = kvm_mmu_memory_cache_alloc(&mmu_caches->header_cache);
> +	if (!hdr) {
> +		free_page((unsigned long)addr);
> +		return NULL;
> +	}
> +
> +	hdr->page = virt_to_page(addr);
> +	set_page_private(hdr->page, (unsigned long)hdr);
> +	return addr;
> +}
> +
> +static void stage2_free_page_now(struct stage2_page_header *hdr)
> +{
> +	WARN_ON(page_ref_count(hdr->page) != 1);
> +
> +	__free_page(hdr->page);
> +	kmem_cache_free(stage2_page_header_cache, hdr);
> +}
> +
> +static void stage2_free_page_rcu_cb(struct rcu_head *head)
> +{
> +	struct stage2_page_header *hdr = container_of(head, struct stage2_page_header,
> +						      rcu_head);
> +
> +	stage2_free_page_now(hdr);
> +}
> +
> +static void stage2_free_table(void *addr, bool shared)
> +{
> +	struct page *page = virt_to_page(addr);
> +	struct stage2_page_header *hdr = (struct stage2_page_header *)page_private(page);
> +
> +	if (shared)
> +		call_rcu(&hdr->rcu_head, stage2_free_page_rcu_cb);

Can the number of callbacks grow to "dangerous" numbers? can it be
bounded with something like the following?

if number of readers is really high:
	synchronize_rcu() 
else
	call_rcu()

maybe the rcu API has an option for that.

> +	else
> +		stage2_free_page_now(hdr);
>  }
>  
>  static void *kvm_host_zalloc_pages_exact(size_t size)
> @@ -613,6 +654,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
>  	.free_pages_exact	= free_pages_exact,
>  	.get_page		= kvm_host_get_page,
>  	.put_page		= kvm_host_put_page,
> +	.free_table		= stage2_free_table,
>  	.page_count		= kvm_host_page_count,
>  	.phys_to_virt		= kvm_host_va,
>  	.virt_to_phys		= kvm_host_pa,
> -- 
> 2.36.0.rc0.470.gd361397f0d-goog
> 
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

WARNING: multiple messages have this Message-ID (diff)
From: Ricardo Koller <ricarkol@google.com>
To: Oliver Upton <oupton@google.com>
Cc: kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org,
	Marc Zyngier <maz@kernel.org>, James Morse <james.morse@arm.com>,
	Alexandru Elisei <alexandru.elisei@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	linux-arm-kernel@lists.infradead.org,
	Peter Shier <pshier@google.com>,
	Reiji Watanabe <reijiw@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Ben Gardon <bgardon@google.com>,
	David Matlack <dmatlack@google.com>
Subject: Re: [RFC PATCH 14/17] KVM: arm64: Punt last page reference to rcu callback for parallel walk
Date: Mon, 18 Apr 2022 19:59:04 -0700	[thread overview]
Message-ID: <Yl4leEoIg+dr/1QM@google.com> (raw)
In-Reply-To: <20220415215901.1737897-15-oupton@google.com>

On Fri, Apr 15, 2022 at 09:58:58PM +0000, Oliver Upton wrote:
> It is possible that a table page remains visible to another thread until
> the next rcu synchronization event. To that end, we cannot drop the last
> page reference synchronous with post-order traversal for a parallel
> table walk.
> 
> Schedule an rcu callback to clean up the child table page for parallel
> walks.
> 
> Signed-off-by: Oliver Upton <oupton@google.com>
> ---
>  arch/arm64/include/asm/kvm_pgtable.h |  3 ++
>  arch/arm64/kvm/hyp/pgtable.c         | 24 +++++++++++++--
>  arch/arm64/kvm/mmu.c                 | 44 +++++++++++++++++++++++++++-
>  3 files changed, 67 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index 74955aba5918..52e55e00f0ca 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -81,6 +81,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
>   * @put_page:			Decrement the refcount on a page. When the
>   *				refcount reaches 0 the page is automatically
>   *				freed.
> + * @free_table:			Drop the last page reference, possibly in the
> + *				next RCU sync if doing a shared walk.
>   * @page_count:			Return the refcount of a page.
>   * @phys_to_virt:		Convert a physical address into a virtual
>   *				address	mapped in the current context.
> @@ -98,6 +100,7 @@ struct kvm_pgtable_mm_ops {
>  	void		(*get_page)(void *addr);
>  	void		(*put_page)(void *addr);
>  	int		(*page_count)(void *addr);
> +	void		(*free_table)(void *addr, bool shared);
>  	void*		(*phys_to_virt)(phys_addr_t phys);
>  	phys_addr_t	(*virt_to_phys)(void *addr);
>  	void		(*dcache_clean_inval_poc)(void *addr, size_t size);
> diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
> index 121818d4c33e..a9a48edba63b 100644
> --- a/arch/arm64/kvm/hyp/pgtable.c
> +++ b/arch/arm64/kvm/hyp/pgtable.c
> @@ -147,12 +147,19 @@ static inline void kvm_pgtable_walk_end(void)
>  {}
>  
>  #define kvm_dereference_ptep	rcu_dereference_raw
> +
> +static inline void kvm_pgtable_destroy_barrier(void)
> +{}
> +
>  #else
>  #define kvm_pgtable_walk_begin	rcu_read_lock
>  
>  #define kvm_pgtable_walk_end	rcu_read_unlock
>  
>  #define kvm_dereference_ptep	rcu_dereference
> +
> +#define kvm_pgtable_destroy_barrier	rcu_barrier
> +
>  #endif
>  
>  static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
> @@ -1063,7 +1070,12 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
>  		childp = kvm_pte_follow(*old, mm_ops);
>  	}
>  
> -	mm_ops->put_page(childp);
> +	/*
> +	 * If we do not have exclusive access to the page tables it is possible
> +	 * the unlinked table remains visible to another thread until the next
> +	 * rcu synchronization.
> +	 */
> +	mm_ops->free_table(childp, shared);
>  	mm_ops->put_page(ptep);
>  
>  	return ret;
> @@ -1203,7 +1215,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  					       kvm_granule_size(level));
>  
>  	if (childp)
> -		mm_ops->put_page(childp);
> +		mm_ops->free_table(childp, shared);
>  
>  	return 0;
>  }
> @@ -1433,7 +1445,7 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
>  	mm_ops->put_page(ptep);
>  
>  	if (kvm_pte_table(*old, level))
> -		mm_ops->put_page(kvm_pte_follow(*old, mm_ops));
> +		mm_ops->free_table(kvm_pte_follow(*old, mm_ops), shared);
>  
>  	return 0;
>  }
> @@ -1452,4 +1464,10 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
>  	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
>  	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
>  	pgt->pgd = NULL;
> +
> +	/*
> +	 * Guarantee that all unlinked subtrees associated with the stage2 page
> +	 * table have also been freed before returning.
> +	 */
> +	kvm_pgtable_destroy_barrier();
>  }
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index cc6ed6b06ec2..6ecf37009c21 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -98,9 +98,50 @@ static bool kvm_is_device_pfn(unsigned long pfn)
>  static void *stage2_memcache_zalloc_page(void *arg)
>  {
>  	struct kvm_mmu_caches *mmu_caches = arg;
> +	struct stage2_page_header *hdr;
> +	void *addr;
>  
>  	/* Allocated with __GFP_ZERO, so no need to zero */
> -	return kvm_mmu_memory_cache_alloc(&mmu_caches->page_cache);
> +	addr = kvm_mmu_memory_cache_alloc(&mmu_caches->page_cache);
> +	if (!addr)
> +		return NULL;
> +
> +	hdr = kvm_mmu_memory_cache_alloc(&mmu_caches->header_cache);
> +	if (!hdr) {
> +		free_page((unsigned long)addr);
> +		return NULL;
> +	}
> +
> +	hdr->page = virt_to_page(addr);
> +	set_page_private(hdr->page, (unsigned long)hdr);
> +	return addr;
> +}
> +
> +static void stage2_free_page_now(struct stage2_page_header *hdr)
> +{
> +	WARN_ON(page_ref_count(hdr->page) != 1);
> +
> +	__free_page(hdr->page);
> +	kmem_cache_free(stage2_page_header_cache, hdr);
> +}
> +
> +static void stage2_free_page_rcu_cb(struct rcu_head *head)
> +{
> +	struct stage2_page_header *hdr = container_of(head, struct stage2_page_header,
> +						      rcu_head);
> +
> +	stage2_free_page_now(hdr);
> +}
> +
> +static void stage2_free_table(void *addr, bool shared)
> +{
> +	struct page *page = virt_to_page(addr);
> +	struct stage2_page_header *hdr = (struct stage2_page_header *)page_private(page);
> +
> +	if (shared)
> +		call_rcu(&hdr->rcu_head, stage2_free_page_rcu_cb);

Can the number of callbacks grow to "dangerous" numbers? can it be
bounded with something like the following?

if number of readers is really high:
	synchronize_rcu() 
else
	call_rcu()

maybe the rcu API has an option for that.

> +	else
> +		stage2_free_page_now(hdr);
>  }
>  
>  static void *kvm_host_zalloc_pages_exact(size_t size)
> @@ -613,6 +654,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
>  	.free_pages_exact	= free_pages_exact,
>  	.get_page		= kvm_host_get_page,
>  	.put_page		= kvm_host_put_page,
> +	.free_table		= stage2_free_table,
>  	.page_count		= kvm_host_page_count,
>  	.phys_to_virt		= kvm_host_va,
>  	.virt_to_phys		= kvm_host_pa,
> -- 
> 2.36.0.rc0.470.gd361397f0d-goog
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2022-04-19  2:59 UTC|newest]

Thread overview: 165+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-15 21:58 [RFC PATCH 00/17] KVM: arm64: Parallelize stage 2 fault handling Oliver Upton
2022-04-15 21:58 ` Oliver Upton
2022-04-15 21:58 ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 01/17] KVM: arm64: Directly read owner id field in stage2_pte_is_counted() Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 02/17] KVM: arm64: Only read the pte once per visit Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-21 16:12   ` Ben Gardon
2022-04-21 16:12     ` Ben Gardon
2022-04-21 16:12     ` Ben Gardon
2022-04-15 21:58 ` [RFC PATCH 03/17] KVM: arm64: Return the next table from map callbacks Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 04/17] KVM: arm64: Protect page table traversal with RCU Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-19  2:55   ` Ricardo Koller
2022-04-19  2:55     ` Ricardo Koller
2022-04-19  2:55     ` Ricardo Koller
2022-04-19  3:01     ` Oliver Upton
2022-04-19  3:01       ` Oliver Upton
2022-04-19  3:01       ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 05/17] KVM: arm64: Take an argument to indicate parallel walk Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-16 11:30   ` Marc Zyngier
2022-04-16 11:30     ` Marc Zyngier
2022-04-16 11:30     ` Marc Zyngier
2022-04-16 16:03     ` Oliver Upton
2022-04-16 16:03       ` Oliver Upton
2022-04-16 16:03       ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 06/17] KVM: arm64: Implement break-before-make sequence for parallel walks Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-20 16:55   ` Quentin Perret
2022-04-20 16:55     ` Quentin Perret
2022-04-20 16:55     ` Quentin Perret
2022-04-20 17:06     ` Oliver Upton
2022-04-20 17:06       ` Oliver Upton
2022-04-20 17:06       ` Oliver Upton
2022-04-21 16:57   ` Ben Gardon
2022-04-21 16:57     ` Ben Gardon
2022-04-21 16:57     ` Ben Gardon
2022-04-21 18:52     ` Oliver Upton
2022-04-21 18:52       ` Oliver Upton
2022-04-21 18:52       ` Oliver Upton
2022-04-26 21:32       ` Ben Gardon
2022-04-26 21:32         ` Ben Gardon
2022-04-26 21:32         ` Ben Gardon
2022-04-25 15:13   ` Sean Christopherson
2022-04-25 15:13     ` Sean Christopherson
2022-04-25 15:13     ` Sean Christopherson
2022-04-25 16:53     ` Oliver Upton
2022-04-25 16:53       ` Oliver Upton
2022-04-25 16:53       ` Oliver Upton
2022-04-25 18:16       ` Sean Christopherson
2022-04-25 18:16         ` Sean Christopherson
2022-04-25 18:16         ` Sean Christopherson
2022-04-15 21:58 ` [RFC PATCH 07/17] KVM: arm64: Enlighten perm relax path about " Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 08/17] KVM: arm64: Spin off helper for initializing table pte Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 09/17] KVM: arm64: Tear down unlinked page tables in parallel walk Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-21 13:21   ` Quentin Perret
2022-04-21 13:21     ` Quentin Perret
2022-04-21 13:21     ` Quentin Perret
2022-04-21 16:40     ` Oliver Upton
2022-04-21 16:40       ` Oliver Upton
2022-04-21 16:40       ` Oliver Upton
2022-04-22 16:00       ` Quentin Perret
2022-04-22 16:00         ` Quentin Perret
2022-04-22 16:00         ` Quentin Perret
2022-04-22 20:41         ` Oliver Upton
2022-04-22 20:41           ` Oliver Upton
2022-04-22 20:41           ` Oliver Upton
2022-05-03 14:17           ` Quentin Perret
2022-05-03 14:17             ` Quentin Perret
2022-05-03 14:17             ` Quentin Perret
2022-05-04  6:03             ` Oliver Upton
2022-05-04  6:03               ` Oliver Upton
2022-05-04  6:03               ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 10/17] KVM: arm64: Assume a table pte is already owned in post-order traversal Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-21 16:11   ` Ben Gardon
2022-04-21 16:11     ` Ben Gardon
2022-04-21 16:11     ` Ben Gardon
2022-04-21 17:16     ` Oliver Upton
2022-04-21 17:16       ` Oliver Upton
2022-04-21 17:16       ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 11/17] KVM: arm64: Move MMU cache init/destroy into helpers Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 12/17] KVM: arm64: Stuff mmu page cache in sub struct Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 13/17] KVM: arm64: Setup cache for stage2 page headers Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58 ` [RFC PATCH 14/17] KVM: arm64: Punt last page reference to rcu callback for parallel walk Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-19  2:59   ` Ricardo Koller [this message]
2022-04-19  2:59     ` Ricardo Koller
2022-04-19  2:59     ` Ricardo Koller
2022-04-19  3:09     ` Ricardo Koller
2022-04-19  3:09       ` Ricardo Koller
2022-04-19  3:09       ` Ricardo Koller
2022-04-20  0:53       ` Oliver Upton
2022-04-20  0:53         ` Oliver Upton
2022-04-20  0:53         ` Oliver Upton
2022-09-08  0:52         ` David Matlack
2022-09-08  0:52           ` David Matlack
2022-09-08  0:52           ` David Matlack
2022-04-21 16:28   ` Ben Gardon
2022-04-21 16:28     ` Ben Gardon
2022-04-21 16:28     ` Ben Gardon
2022-04-15 21:58 ` [RFC PATCH 15/17] KVM: arm64: Allow parallel calls to kvm_pgtable_stage2_map() Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:58   ` Oliver Upton
2022-04-15 21:59 ` [RFC PATCH 16/17] KVM: arm64: Enable parallel stage 2 MMU faults Oliver Upton
2022-04-15 21:59   ` Oliver Upton
2022-04-15 21:59   ` Oliver Upton
2022-04-21 16:35   ` Ben Gardon
2022-04-21 16:35     ` Ben Gardon
2022-04-21 16:35     ` Ben Gardon
2022-04-21 16:46     ` Oliver Upton
2022-04-21 16:46       ` Oliver Upton
2022-04-21 16:46       ` Oliver Upton
2022-04-21 17:03       ` Ben Gardon
2022-04-21 17:03         ` Ben Gardon
2022-04-21 17:03         ` Ben Gardon
2022-04-15 21:59 ` [RFC PATCH 17/17] TESTONLY: KVM: arm64: Add super lazy accounting of stage 2 table pages Oliver Upton
2022-04-15 21:59   ` Oliver Upton
2022-04-15 21:59   ` Oliver Upton
2022-04-15 23:35 ` [RFC PATCH 00/17] KVM: arm64: Parallelize stage 2 fault handling David Matlack
2022-04-15 23:35   ` David Matlack
2022-04-15 23:35   ` David Matlack
2022-04-16  0:04   ` Oliver Upton
2022-04-16  0:04     ` Oliver Upton
2022-04-16  0:04     ` Oliver Upton
2022-04-21 16:43     ` David Matlack
2022-04-21 16:43       ` David Matlack
2022-04-21 16:43       ` David Matlack
2022-04-16  6:23 ` Oliver Upton
2022-04-16  6:23   ` Oliver Upton
2022-04-16  6:23   ` Oliver Upton
2022-04-19 17:57 ` Ben Gardon
2022-04-19 17:57   ` Ben Gardon
2022-04-19 17:57   ` Ben Gardon
2022-04-19 18:36   ` Oliver Upton
2022-04-19 18:36     ` Oliver Upton
2022-04-19 18:36     ` Oliver Upton
2022-04-21 16:30     ` Ben Gardon
2022-04-21 16:30       ` Ben Gardon
2022-04-21 16:30       ` Ben Gardon
2022-04-21 16:37       ` Paolo Bonzini
2022-04-21 16:37         ` Paolo Bonzini
2022-04-21 16:37         ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Yl4leEoIg+dr/1QM@google.com \
    --to=ricarkol@google.com \
    --cc=alexandru.elisei@arm.com \
    --cc=bgardon@google.com \
    --cc=dmatlack@google.com \
    --cc=james.morse@arm.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=maz@kernel.org \
    --cc=oupton@google.com \
    --cc=pbonzini@redhat.com \
    --cc=pshier@google.com \
    --cc=reijiw@google.com \
    --cc=seanjc@google.com \
    --cc=suzuki.poulose@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.