[20/24] kvm: x86/mmu: Add atomic option for setting SPTEs
diff mbox series

Message ID 20210112181041.356734-21-bgardon@google.com
State New, archived
Headers show
Series
  • Allow parallel page faults with TDP MMU
Related show

Commit Message

Ben Gardon Jan. 12, 2021, 6:10 p.m. UTC
In order to allow multiple TDP MMU operations to proceed in parallel,
there must be an option to modify SPTEs atomically so that changes are
not lost. Add that option to __tdp_mmu_set_spte and
__handle_changed_spte.

Reviewed-by: Peter Feiner <pfeiner@google.com>

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 67 ++++++++++++++++++++++++++++++++------
 1 file changed, 57 insertions(+), 10 deletions(-)

Comments

kernel test robot Jan. 13, 2021, 12:05 a.m. UTC | #1
Hi Ben,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.11-rc3 next-20210112]
[cannot apply to kvm/linux-next kvmarm/next kvm-ppc/kvm-ppc-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Ben-Gardon/Allow-parallel-page-faults-with-TDP-MMU/20210113-021817
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git a0d54b4f5b219fb31f0776e9f53aa137e78ae431
config: i386-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/df85de7a1c9a5a1dc43776eb0b31c39c785aed25
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Ben-Gardon/Allow-parallel-page-faults-with-TDP-MMU/20210113-021817
        git checkout df85de7a1c9a5a1dc43776eb0b31c39c785aed25
        # save the attached .config to linux build tree
        make W=1 ARCH=i386 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/x86/include/asm/atomic.h:8,
                    from include/linux/atomic.h:7,
                    from include/linux/cpumask.h:13,
                    from arch/x86/include/asm/cpumask.h:5,
                    from arch/x86/include/asm/msr.h:11,
                    from arch/x86/include/asm/processor.h:22,
                    from arch/x86/include/asm/cpufeature.h:5,
                    from arch/x86/include/asm/thread_info.h:53,
                    from include/linux/thread_info.h:56,
                    from arch/x86/include/asm/preempt.h:7,
                    from include/linux/preempt.h:78,
                    from include/linux/percpu.h:6,
                    from include/linux/context_tracking_state.h:5,
                    from include/linux/hardirq.h:5,
                    from include/linux/kvm_host.h:7,
                    from arch/x86/kvm/mmu.h:5,
                    from arch/x86/kvm/mmu/tdp_mmu.c:3:
   arch/x86/kvm/mmu/tdp_mmu.c: In function 'handle_disconnected_tdp_mmu_page':
>> arch/x86/include/asm/cmpxchg.h:67:4: error: call to '__xchg_wrong_size' declared with attribute error: Bad argument size for xchg
      67 |    __ ## op ## _wrong_size();   \
         |    ^~~~~~~~~~~~~~~~~~~~~~~~~
   arch/x86/include/asm/cmpxchg.h:78:27: note: in expansion of macro '__xchg_op'
      78 | #define arch_xchg(ptr, v) __xchg_op((ptr), (v), xchg, "")
         |                           ^~~~~~~~~
   include/asm-generic/atomic-instrumented.h:1649:2: note: in expansion of macro 'arch_xchg'
    1649 |  arch_xchg(__ai_ptr, __VA_ARGS__); \
         |  ^~~~~~~~~
   arch/x86/kvm/mmu/tdp_mmu.c:353:21: note: in expansion of macro 'xchg'
     353 |    old_child_spte = xchg(sptep, 0);
         |                     ^~~~


vim +/__xchg_wrong_size +67 arch/x86/include/asm/cmpxchg.h

e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  37  
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  38  /* 
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  39   * An exchange-type operation, which takes a value and a pointer, and
7f5281ae8a8e7f86 Li Zhong            2013-04-25  40   * returns the old value.
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  41   */
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  42  #define __xchg_op(ptr, arg, op, lock)					\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  43  	({								\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  44  	        __typeof__ (*(ptr)) __ret = (arg);			\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  45  		switch (sizeof(*(ptr))) {				\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  46  		case __X86_CASE_B:					\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  47  			asm volatile (lock #op "b %b0, %1\n"		\
2ca052a3710fac20 Jeremy Fitzhardinge 2012-04-02  48  				      : "+q" (__ret), "+m" (*(ptr))	\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  49  				      : : "memory", "cc");		\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  50  			break;						\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  51  		case __X86_CASE_W:					\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  52  			asm volatile (lock #op "w %w0, %1\n"		\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  53  				      : "+r" (__ret), "+m" (*(ptr))	\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  54  				      : : "memory", "cc");		\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  55  			break;						\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  56  		case __X86_CASE_L:					\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  57  			asm volatile (lock #op "l %0, %1\n"		\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  58  				      : "+r" (__ret), "+m" (*(ptr))	\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  59  				      : : "memory", "cc");		\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  60  			break;						\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  61  		case __X86_CASE_Q:					\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  62  			asm volatile (lock #op "q %q0, %1\n"		\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  63  				      : "+r" (__ret), "+m" (*(ptr))	\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  64  				      : : "memory", "cc");		\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  65  			break;						\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  66  		default:						\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30 @67  			__ ## op ## _wrong_size();			\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  68  		}							\
31a8394e069e47dc Jeremy Fitzhardinge 2011-09-30  69  		__ret;							\
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  70  	})
e9826380d83d1bda Jeremy Fitzhardinge 2011-08-18  71  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Paolo Bonzini Jan. 26, 2021, 2:21 p.m. UTC | #2
On 12/01/21 19:10, Ben Gardon wrote:
>  static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
> -				u64 old_spte, u64 new_spte, int level);
> +				u64 old_spte, u64 new_spte, int level,
> +				bool atomic);

If you don't mind, I prefer "shared" as the name for the new argument 
(i.e. "this is what you need to know", rathar than "this is what I want 
you to do").

> 
> +/*
> + * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
> + * associated bookkeeping
> + *
> + * @kvm: kvm instance
> + * @iter: a tdp_iter instance currently on the SPTE that should be set
> + * @new_spte: The value the SPTE should be set to
> + * Returns: true if the SPTE was set, false if it was not. If false is returned,
> + *	    this function will have no side-effects.
> + */
> +static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
> +					   struct tdp_iter *iter,
> +					   u64 new_spte)
> +{
> +	u64 *root_pt = tdp_iter_root_pt(iter);
> +	struct kvm_mmu_page *root = sptep_to_sp(root_pt);
> +	int as_id = kvm_mmu_page_as_id(root);
> +
> +	kvm_mmu_lock_assert_held_shared(kvm);
> +
> +	if (cmpxchg64(iter->sptep, iter->old_spte, new_spte) != iter->old_spte)
> +		return false;
> +
> +	handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
> +			    iter->level, true);
> +
> +	return true;
> +}
> +
> +

Still unused as of this patch, so please move it where it's used.

Note that in this case, "atomic" in the name is appropriate, think of 
hypothetical code like this:

	if (!shared)
		tdp_mmu_set_spte(...)
	else if (!tdp_mmu_set_spte_atomic(...)
		

which says "if there could be concurrent changes, be careful and do 
everything with atomic operations".

Paolo

Patch
diff mbox series

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 264594947c3b..1380ed313476 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -7,6 +7,7 @@ 
 #include "tdp_mmu.h"
 #include "spte.h"
 
+#include <asm/cmpxchg.h>
 #include <trace/events/kvm.h>
 
 #ifdef CONFIG_X86_64
@@ -226,7 +227,8 @@  static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
 }
 
 static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
-				u64 old_spte, u64 new_spte, int level);
+				u64 old_spte, u64 new_spte, int level,
+				bool atomic);
 
 static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
 {
@@ -320,15 +322,19 @@  static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
  *
  * @kvm: kvm instance
  * @pt: the page removed from the paging structure
+ * @atomic: Use atomic operations to clear the SPTEs in any disconnected
+ *	    pages of memory.
  *
  * Given a page table that has been removed from the TDP paging structure,
  * iterates through the page table to clear SPTEs and free child page tables.
  */
-static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt)
+static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt,
+					     bool atomic)
 {
 	struct kvm_mmu_page *sp;
 	gfn_t gfn;
 	int level;
+	u64 *sptep;
 	u64 old_child_spte;
 	int i;
 
@@ -341,11 +347,17 @@  static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt)
 	tdp_mmu_unlink_page(kvm, sp, atomic);
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
-		old_child_spte = READ_ONCE(*(pt + i));
-		WRITE_ONCE(*(pt + i), 0);
+		sptep = pt + i;
+
+		if (atomic) {
+			old_child_spte = xchg(sptep, 0);
+		} else {
+			old_child_spte = READ_ONCE(*sptep);
+			WRITE_ONCE(*sptep, 0);
+		}
 		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp),
 			gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
-			old_child_spte, 0, level - 1);
+			old_child_spte, 0, level - 1, atomic);
 	}
 
 	kvm_flush_remote_tlbs_with_address(kvm, gfn,
@@ -362,12 +374,15 @@  static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt)
  * @old_spte: The value of the SPTE before the change
  * @new_spte: The value of the SPTE after the change
  * @level: the level of the PT the SPTE is part of in the paging structure
+ * @atomic: Use atomic operations to clear the SPTEs in any disconnected
+ *	    pages of memory.
  *
  * Handle bookkeeping that might result from the modification of a SPTE.
  * This function must be called for all TDP SPTE modifications.
  */
 static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
-				u64 old_spte, u64 new_spte, int level)
+				  u64 old_spte, u64 new_spte, int level,
+				  bool atomic)
 {
 	bool was_present = is_shadow_present_pte(old_spte);
 	bool is_present = is_shadow_present_pte(new_spte);
@@ -439,18 +454,50 @@  static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 	 */
 	if (was_present && !was_leaf && (pfn_changed || !is_present))
 		handle_disconnected_tdp_mmu_page(kvm,
-				spte_to_child_pt(old_spte, level));
+				spte_to_child_pt(old_spte, level), atomic);
 }
 
 static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
-				u64 old_spte, u64 new_spte, int level)
+				u64 old_spte, u64 new_spte, int level,
+				bool atomic)
 {
-	__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level);
+	__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level,
+			      atomic);
 	handle_changed_spte_acc_track(old_spte, new_spte, level);
 	handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte,
 				      new_spte, level);
 }
 
+/*
+ * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
+ * associated bookkeeping
+ *
+ * @kvm: kvm instance
+ * @iter: a tdp_iter instance currently on the SPTE that should be set
+ * @new_spte: The value the SPTE should be set to
+ * Returns: true if the SPTE was set, false if it was not. If false is returned,
+ *	    this function will have no side-effects.
+ */
+static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
+					   struct tdp_iter *iter,
+					   u64 new_spte)
+{
+	u64 *root_pt = tdp_iter_root_pt(iter);
+	struct kvm_mmu_page *root = sptep_to_sp(root_pt);
+	int as_id = kvm_mmu_page_as_id(root);
+
+	kvm_mmu_lock_assert_held_shared(kvm);
+
+	if (cmpxchg64(iter->sptep, iter->old_spte, new_spte) != iter->old_spte)
+		return false;
+
+	handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
+			    iter->level, true);
+
+	return true;
+}
+
+
 /*
  * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
  * @kvm: kvm instance
@@ -480,7 +527,7 @@  static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 	WRITE_ONCE(*iter->sptep, new_spte);
 
 	__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
-			      iter->level);
+			      iter->level, false);
 	if (record_acc_track)
 		handle_changed_spte_acc_track(iter->old_spte, new_spte,
 					      iter->level);