linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Yu Zhao <yuzhao@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Alistair Popple <apopple@nvidia.com>,
	Anup Patel <anup@brainfault.org>, Ben Gardon <bgardon@google.com>,
	Borislav Petkov <bp@alien8.de>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Chao Peng <chao.p.peng@linux.intel.com>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Fabiano Rosas <farosas@linux.ibm.com>,
	Gaosheng Cui <cuigaosheng1@huawei.com>,
	Gavin Shan <gshan@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>,
	Ingo Molnar <mingo@redhat.com>, James Morse <james.morse@arm.com>,
	"Jason A. Donenfeld" <Jason@zx2c4.com>,
	Jason Gunthorpe <jgg@ziepe.ca>, Jonathan Corbet <corbet@lwn.net>,
	Marc Zyngier <maz@kernel.org>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Michael Larabel <michael@michaellarabel.com>,
	Mike Rapoport <rppt@kernel.org>,
	Nicholas Piggin <npiggin@gmail.com>,
	Oliver Upton <oliver.upton@linux.dev>,
	Paul Mackerras <paulus@ozlabs.org>, Peter Xu <peterx@redhat.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Thomas Huth <thuth@redhat.com>, Will Deacon <will@kernel.org>,
	Zenghui Yu <yuzenghui@huawei.com>,
	kvmarm@lists.linux.dev, kvm@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linuxppc-dev@lists.ozlabs.org,
	linux-trace-kernel@vger.kernel.org, x86@kernel.org,
	linux-mm@google.com
Subject: Re: [PATCH mm-unstable v2 01/10] mm/kvm: add mmu_notifier_ops->test_clear_young()
Date: Thu, 15 Jun 2023 10:42:57 -0700	[thread overview]
Message-ID: <ZItNoeWriZgLUaon@google.com> (raw)
In-Reply-To: <20230526234435.662652-2-yuzhao@google.com>

On Fri, May 26, 2023, Yu Zhao wrote:
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 0e571e973bc2..374262545f96 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -258,6 +258,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
>  #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
>  struct kvm_gfn_range {
>  	struct kvm_memory_slot *slot;
> +	void *args;

There's no reason to make this "void *", just declare "struct test_clear_young_args"
in the header.  Arch code won't be able to use it regardless.  And I vote for
something more like "test_clear_young_metadata", as there's far more information
in there than just function arguments.

And to stave off the argument that "void *" would allow reuse, take this opportunity
to unionize the test_clear_young field with the change_pte field, e.g.

	/* comment about these fields being callback specific. */
	union {
		struct test_clear_young_metadata *metadata;
		pte_t pte;
		unsigned long callback_arg; /* needs a better name */
	};

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 51e4882d0873..31ee58754b19 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -541,6 +541,7 @@ typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
>  typedef void (*on_unlock_fn_t)(struct kvm *kvm);
>  
>  struct kvm_hva_range {
> +	void *args;

Same feedback as kvm_gfn_range.

>  	unsigned long start;
>  	unsigned long end;
>  	pte_t pte;
> @@ -549,6 +550,7 @@ struct kvm_hva_range {
>  	on_unlock_fn_t on_unlock;
>  	bool flush_on_ret;
>  	bool may_block;
> +	bool lockless;
>  };
>  
>  /*
> @@ -602,6 +604,8 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
>  			hva_end = min(range->end, slot->userspace_addr +
>  						  (slot->npages << PAGE_SHIFT));
>  
> +			gfn_range.args = range->args;

And this goes away because the generic callback_arg is what gets propagated.

> +
>  			/*
>  			 * To optimize for the likely case where the address
>  			 * range is covered by zero or one memslots, don't
> @@ -619,7 +623,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
>  			gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
>  			gfn_range.slot = slot;
>  
> -			if (!locked) {
> +			if (!range->lockless && !locked) {
>  				locked = true;
>  				KVM_MMU_LOCK(kvm);
>  				if (!IS_KVM_NULL_FN(range->on_lock))
> @@ -628,6 +632,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
>  					break;
>  			}
>  			ret |= range->handler(kvm, &gfn_range);
> +
> +			if (range->lockless && ret)

I don't like overloading "lockless" to also mean "stop on ret".  Just add another
flag, there's literally no cost for most callbacks as everything is constant at
compile time and gets optimized away.

> +		range.args = &args;
> +		range.lockless = true;

The lockless and stop_on_ret behavior needs comments.

> +		range.handler = kvm_arch_test_clear_young;
> +
> +		if (!__kvm_handle_hva_range(kvm, &range))
> +			return args.young ? MMU_NOTIFIER_RANGE_LOCKLESS : 0;
> +	}
> +
> +	if (bitmap)
> +		return 0;
> +
> +	range.args = NULL;
> +	range.lockless = false;

No need to manually clear these, they'll be zeroed by the initialization code.

E.g. all in all, something like so

---
 include/linux/kvm_host.h |  9 +++++++--
 virt/kvm/kvm_main.c      | 29 +++++++++++++++++------------
 2 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7a0922cbc36f..e04605061f5e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -256,12 +256,17 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+struct test_clear_young_metadata;
+
 struct kvm_gfn_range {
 	struct kvm_memory_slot *slot;
-	void *args;
 	gfn_t start;
 	gfn_t end;
-	pte_t pte;
+	union {
+		struct test_clear_young_metadata *metadata;
+		pte_t pte;
+		unsigned long callback_arg;
+	};
 	bool may_block;
 };
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ac83cfb30771..8cf4fee9cd8b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -536,16 +536,20 @@ typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
 typedef void (*on_unlock_fn_t)(struct kvm *kvm);
 
 struct kvm_hva_range {
-	void *args;
 	unsigned long start;
 	unsigned long end;
-	pte_t pte;
 	hva_handler_t handler;
+	union {
+		struct test_clear_young_metadata *metadata;
+		pte_t pte;
+		unsigned long callback_arg;
+	};
 	on_lock_fn_t on_lock;
 	on_unlock_fn_t on_unlock;
 	bool flush_on_ret;
 	bool may_block;
 	bool lockless;
+	bool stop_on_ret;
 };
 
 /*
@@ -576,6 +580,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 	struct kvm_memslots *slots;
 	int i, idx;
 
+	BUILD_BUG_ON(sizeof(gfn_range.callback_arg) != sizeof(gfn_range.pte) ||
+		     sizeof(gfn_range.callback_arg) != sizeof(gfn_range.metadata));
+
 	if (WARN_ON_ONCE(range->end <= range->start))
 		return 0;
 
@@ -599,16 +606,14 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 			hva_end = min(range->end, slot->userspace_addr +
 						  (slot->npages << PAGE_SHIFT));
 
-			gfn_range.args = range->args;
-
 			/*
 			 * To optimize for the likely case where the address
 			 * range is covered by zero or one memslots, don't
 			 * bother making these conditional (to avoid writes on
 			 * the second or later invocation of the handler).
 			 */
-			gfn_range.pte = range->pte;
 			gfn_range.may_block = range->may_block;
+			gfn_range.callback_arg = range->callback_arg;
 
 			/*
 			 * {gfn(page) | page intersects with [hva_start, hva_end)} =
@@ -628,7 +633,8 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 			}
 			ret |= range->handler(kvm, &gfn_range);
 
-			if (range->lockless && ret)
+			/* comment goes here. */
+			if (range->stop_on_ret && ret)
 				break;
 		}
 	}
@@ -830,7 +836,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 	return kvm_handle_hva_range(mn, start, end, __pte(0), kvm_age_gfn);
 }
 
-struct test_clear_young_args {
+struct test_clear_young_metadata {
 	unsigned long *bitmap;
 	unsigned long end;
 	bool clear;
@@ -839,7 +845,7 @@ struct test_clear_young_args {
 
 bool kvm_should_clear_young(struct kvm_gfn_range *range, gfn_t gfn)
 {
-	struct test_clear_young_args *args = range->args;
+	struct test_clear_young_metadata *args = range->metadata;
 
 	VM_WARN_ON_ONCE(gfn < range->start || gfn >= range->end);
 
@@ -880,14 +886,15 @@ static int kvm_mmu_notifier_test_clear_young(struct mmu_notifier *mn, struct mm_
 	trace_kvm_age_hva(start, end);
 
 	if (kvm_test_clear_young) {
-		struct test_clear_young_args args = {
+		struct test_clear_young_metadata args = {
 			.bitmap	= bitmap,
 			.end	= end,
 			.clear	= clear,
 		};
 
-		range.args = &args;
 		range.lockless = true;
+		range.stop_on_ret = true;
+		range.metadata = &args;
 		range.handler = kvm_test_clear_young;
 
 		if (!__kvm_handle_hva_range(kvm, &range))
@@ -897,8 +904,6 @@ static int kvm_mmu_notifier_test_clear_young(struct mmu_notifier *mn, struct mm_
 	if (bitmap)
 		return 0;
 
-	range.args = NULL;
-	range.lockless = false;
 	range.handler = clear ? kvm_age_gfn : kvm_test_age_gfn;
 
 	return __kvm_handle_hva_range(kvm, &range) ? MMU_NOTIFIER_RANGE_YOUNG : 0;

base-commit: 7a5d8be2c18415b73b9380741095f439d6983a40
-- 


  parent reply	other threads:[~2023-06-15 17:43 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-26 23:44 [PATCH mm-unstable v2 00/10] mm/kvm: locklessly clear the accessed bit Yu Zhao
2023-05-26 23:44 ` [PATCH mm-unstable v2 01/10] mm/kvm: add mmu_notifier_ops->test_clear_young() Yu Zhao
2023-06-06  8:34   ` Tzung-Bi Shih
2023-06-09  1:00     ` Yu Zhao
     [not found]   ` <ZHedMX470b7EMwbe@ziepe.ca>
2023-06-09  9:04     ` Paolo Bonzini
2023-06-15 17:42   ` Sean Christopherson [this message]
2023-06-20  7:30   ` Nicholas Piggin
2023-05-26 23:44 ` [PATCH mm-unstable v2 02/10] mm/kvm: use mmu_notifier_ops->test_clear_young() Yu Zhao
2023-05-26 23:44 ` [PATCH mm-unstable v2 03/10] kvm/arm64: export stage2_try_set_pte() and macros Yu Zhao
2023-05-26 23:44 ` [PATCH mm-unstable v2 04/10] kvm/arm64: make stage2 page tables RCU safe Yu Zhao
2023-05-27 18:08   ` Oliver Upton
2023-05-27 20:13     ` Yu Zhao
2023-05-30 19:37       ` Oliver Upton
2023-05-30 20:06         ` Yu Zhao
     [not found]           ` <ZHef0VsZvZ1Vnz0u@linux.dev>
2023-05-31 23:10             ` Yu Zhao
2023-05-31 23:22               ` Oliver Upton
2023-05-31 23:41                 ` Yu Zhao
2023-05-26 23:44 ` [PATCH mm-unstable v2 05/10] kvm/arm64: add kvm_arch_test_clear_young() Yu Zhao
2023-05-26 23:44 ` [PATCH mm-unstable v2 06/10] kvm/powerpc: make radix page tables RCU safe Yu Zhao
2023-06-20  6:32   ` Nicholas Piggin
2023-06-20  8:00     ` Yu Zhao
2023-06-20 10:49       ` Nicholas Piggin
2023-05-26 23:44 ` [PATCH mm-unstable v2 07/10] kvm/powerpc: add kvm_arch_test_clear_young() Yu Zhao
2023-06-20  7:47   ` Nicholas Piggin
2023-06-21  0:38     ` Yu Zhao
2023-06-21  2:51       ` Nicholas Piggin
2023-05-26 23:44 ` [PATCH mm-unstable v2 08/10] kvm/x86: move tdp_mmu_enabled and shadow_accessed_mask Yu Zhao
2023-06-15 16:59   ` Sean Christopherson
2023-05-26 23:44 ` [PATCH mm-unstable v2 09/10] kvm/x86: add kvm_arch_test_clear_young() Yu Zhao
2023-06-09  9:06   ` Paolo Bonzini
2023-06-15 18:26   ` Sean Christopherson
2023-05-26 23:44 ` [PATCH mm-unstable v2 10/10] mm: multi-gen LRU: use mmu_notifier_test_clear_young() Yu Zhao
2023-06-09  0:59 ` kvm/arm64: Spark benchmark Yu Zhao
2023-06-09 13:04   ` Marc Zyngier
2023-06-18 20:11     ` Yu Zhao
2023-06-09  0:59 ` kvm/powerpc: memcached benchmark Yu Zhao
2023-06-09  0:59 ` kvm/x86: multichase benchmark Yu Zhao
2023-06-18 19:19   ` Yu Zhao
2023-06-09  9:07 ` [PATCH mm-unstable v2 00/10] mm/kvm: locklessly clear the accessed bit Paolo Bonzini
2023-06-20  2:19   ` Yu Zhao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZItNoeWriZgLUaon@google.com \
    --to=seanjc@google.com \
    --cc=Jason@zx2c4.com \
    --cc=akpm@linux-foundation.org \
    --cc=anup@brainfault.org \
    --cc=apopple@nvidia.com \
    --cc=bgardon@google.com \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=corbet@lwn.net \
    --cc=cuigaosheng1@huawei.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=farosas@linux.ibm.com \
    --cc=gshan@redhat.com \
    --cc=hpa@zytor.com \
    --cc=james.morse@arm.com \
    --cc=jgg@ziepe.ca \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@google.com \
    --cc=linux-mm@kvack.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=maz@kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=michael@michaellarabel.com \
    --cc=mingo@redhat.com \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=oliver.upton@linux.dev \
    --cc=paulus@ozlabs.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=suzuki.poulose@arm.com \
    --cc=tglx@linutronix.de \
    --cc=thuth@redhat.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=yuzenghui@huawei.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).