All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
@ 2018-09-13 20:52 Philip Yang
       [not found] ` <1536871954-8451-1-git-send-email-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Philip Yang @ 2018-09-13 20:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Philip Yang

Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.

It supports both KFD userptr and gfx userptr paths.

This depends on several HMM patchset from Jérôme Glisse queued for
upstream.

Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
 drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 ++++++++++++++-------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
 4 files changed, 56 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e54..960a633 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	depends on DRM_AMDGPU
-	select MMU_NOTIFIER
+	select HMM_MIRROR
 	help
-	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
-	  selected to enabled full userptr support.
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.
 
 config DRM_AMDGPU_GART_DEBUGFS
 	bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 138cb78..c1e5d43 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -171,7 +171,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
 
 include $(FULL_AMD_PATH)/powerplay/Makefile
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b..ad52f34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
 #include <linux/interval_tree.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
@@ -66,6 +66,7 @@
  * @objects: interval tree containing amdgpu_mn_nodes
  * @read_lock: mutex for recursive locking of @lock
  * @recursion: depth of recursion
+ * @mirror: HMM mirror function support
  *
  * Data for each amdgpu device and process address space.
  */
@@ -73,7 +74,6 @@ struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
-	struct mmu_notifier	mn;
 	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
@@ -87,6 +87,9 @@ struct amdgpu_mn {
 	struct rb_root_cached	objects;
 	struct mutex		read_lock;
 	atomic_t		recursion;
+
+	/* HMM mirror */
+	struct hmm_mirror	mirror;
 };
 
 /**
@@ -103,7 +106,7 @@ struct amdgpu_mn_node {
 };
 
 /**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
  *
  * @work: previously sheduled work item
  *
@@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	}
 	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+	hmm_mirror_unregister(&amn->mirror);
+
 	kfree(amn);
 }
 
 /**
  * amdgpu_mn_release - callback to notify about mm destruction
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
  *
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
  */
-static void amdgpu_mn_release(struct mmu_notifier *mn,
-			      struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 
 	INIT_WORK(&amn->work, amdgpu_mn_destroy);
 	schedule_work(&amn->work);
 }
 
-
 /**
  * amdgpu_mn_lock - take the write side lock for this notifier
  *
@@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 /**
  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -278,28 +277,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 		amdgpu_mn_invalidate_node(node, start, end);
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
 /**
  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * We temporarily evict all BOs between start and end. This
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -326,59 +325,41 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 
 			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 							 start, end))
-				amdgpu_amdkfd_evict_userptr(mem, mm);
+				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
 		}
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
  */
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
-					   struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
-{
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
-	amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 	[AMDGPU_MN_TYPE_GFX] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables =
+				amdgpu_mn_invalidate_range_start_gfx,
+		.release = amdgpu_hmm_mirror_release
 	},
 	[AMDGPU_MN_TYPE_HSA] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables =
+				amdgpu_mn_invalidate_range_start_hsa,
+		.release = amdgpu_hmm_mirror_release
 	},
 };
 
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
 /**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
  *
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
  */
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
@@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 	amn->mm = mm;
 	init_rwsem(&amn->lock);
 	amn->type = type;
-	amn->mn.ops = &amdgpu_mn_ops[type];
 	amn->objects = RB_ROOT_CACHED;
 	mutex_init(&amn->read_lock);
 	atomic_set(&amn->recursion, 0);
 
-	r = __mmu_notifier_register(&amn->mn, mm);
+	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+	r = hmm_mirror_register(&amn->mirror, mm);
 	if (r)
 		goto free_amn;
 
@@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432..0e27526 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_HSA,
 };
 
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found] ` <1536871954-8451-1-git-send-email-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-13 21:51   ` Felix Kuehling
       [not found]     ` <9d6717ac-23f0-7beb-6e41-58c6e32acdf8-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2018-09-13 21:51 UTC (permalink / raw)
  To: Philip Yang, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König

On 2018-09-13 04:52 PM, Philip Yang wrote:
> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
> DRM_AMDGPU_USERPTR Kconfig.
>
> It supports both KFD userptr and gfx userptr paths.
>
> This depends on several HMM patchset from Jérôme Glisse queued for
> upstream.
>
> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>  drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 ++++++++++++++-------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>  4 files changed, 56 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 9221e54..960a633 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>  config DRM_AMDGPU_USERPTR
>  	bool "Always enable userptr write support"
>  	depends on DRM_AMDGPU
> -	select MMU_NOTIFIER
> +	select HMM_MIRROR
>  	help
> -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
> -	  selected to enabled full userptr support.
> +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
> +	  isn't already selected to enabled full userptr support.
>  
>  config DRM_AMDGPU_GART_DEBUGFS
>  	bool "Allow GART access through debugfs"
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 138cb78..c1e5d43 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -171,7 +171,7 @@ endif
>  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>  
>  include $(FULL_AMD_PATH)/powerplay/Makefile
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index e55508b..ad52f34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -45,7 +45,7 @@
>  
>  #include <linux/firmware.h>
>  #include <linux/module.h>
> -#include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
>  #include <linux/interval_tree.h>
>  #include <drm/drmP.h>
>  #include <drm/drm.h>
> @@ -66,6 +66,7 @@

Need to remove @mn documentation.

>   * @objects: interval tree containing amdgpu_mn_nodes
>   * @read_lock: mutex for recursive locking of @lock
>   * @recursion: depth of recursion
> + * @mirror: HMM mirror function support
>   *
>   * Data for each amdgpu device and process address space.
>   */
> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>  	/* constant after initialisation */
>  	struct amdgpu_device	*adev;
>  	struct mm_struct	*mm;
> -	struct mmu_notifier	mn;
>  	enum amdgpu_mn_type	type;
>  
>  	/* only used on destruction */
> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>  	struct rb_root_cached	objects;
>  	struct mutex		read_lock;
>  	atomic_t		recursion;
> +
> +	/* HMM mirror */
> +	struct hmm_mirror	mirror;
>  };
>  
>  /**
> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>  };
>  
>  /**
> - * amdgpu_mn_destroy - destroy the MMU notifier
> + * amdgpu_mn_destroy - destroy the HMM mirror
>   *
>   * @work: previously sheduled work item
>   *
> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
>  	}
>  	up_write(&amn->lock);
>  	mutex_unlock(&adev->mn_lock);
> -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
> +	hmm_mirror_unregister(&amn->mirror);
> +
>  	kfree(amn);
>  }
>  
>  /**
>   * amdgpu_mn_release - callback to notify about mm destruction

Update the function name in the comment.

>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> + * @mirror: the HMM mirror (mm) this callback is about
>   *
> - * Shedule a work item to lazy destroy our notifier.
> + * Shedule a work item to lazy destroy HMM mirror.
>   */
> -static void amdgpu_mn_release(struct mmu_notifier *mn,
> -			      struct mm_struct *mm)
> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>  
>  	INIT_WORK(&amn->work, amdgpu_mn_destroy);
>  	schedule_work(&amn->work);
>  }
>  
> -
>  /**
>   * amdgpu_mn_lock - take the write side lock for this notifier
>   *
> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>  /**
>   * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>   *
>   * Block for operations on BOs to finish and mark pages as accessed and
>   * potentially dirty.
>   */
> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>  	struct interval_tree_node *it;
>  
>  	/* notification is exclusive, but interval is inclusive */
> @@ -278,28 +277,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>  		amdgpu_mn_invalidate_node(node, start, end);
>  	}
>  
> +	amdgpu_mn_read_unlock(amn);
> +

amdgpu_mn_read_lock/unlock support recursive locking for multiple
overlapping or nested invalidation ranges. But if you'r locking and
unlocking in the same function. Is that still a concern?

>  	return 0;
>  }
>  
>  /**
>   * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>   *
>   * We temporarily evict all BOs between start and end. This
>   * necessitates evicting all user-mode queues of the process. The BOs
>   * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>   */
> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>  	struct interval_tree_node *it;
>  
>  	/* notification is exclusive, but interval is inclusive */
> @@ -326,59 +325,41 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>  
>  			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>  							 start, end))
> -				amdgpu_amdkfd_evict_userptr(mem, mm);
> +				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>  		}
>  	}
>  
> +	amdgpu_mn_read_unlock(amn);
> +
>  	return 0;
>  }
>  
> -/**
> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
> - *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> - *
> - * Release the lock again to allow new command submissions.
> +/* Low bits of any reasonable mm pointer will be unused due to struct
> + * alignment. Use these bits to make a unique key from the mm pointer
> + * and notifier type.
>   */
> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
> -					   struct mm_struct *mm,
> -					   unsigned long start,
> -					   unsigned long end)
> -{
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> -
> -	amdgpu_mn_read_unlock(amn);
> -}
> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>  
> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>  	[AMDGPU_MN_TYPE_GFX] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> +				amdgpu_mn_invalidate_range_start_gfx,
> +		.release = amdgpu_hmm_mirror_release
>  	},
>  	[AMDGPU_MN_TYPE_HSA] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> +				amdgpu_mn_invalidate_range_start_hsa,
> +		.release = amdgpu_hmm_mirror_release
>  	},
>  };
>  
> -/* Low bits of any reasonable mm pointer will be unused due to struct
> - * alignment. Use these bits to make a unique key from the mm pointer
> - * and notifier type.
> - */
> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> -
>  /**
> - * amdgpu_mn_get - create notifier context
> + * amdgpu_mn_get - create HMM mirror context
>   *
>   * @adev: amdgpu device pointer
>   * @type: type of MMU notifier context
>   *
> - * Creates a notifier context for current->mm.
> + * Creates a HMM mirror context for current->mm.
>   */
>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type)
> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>  	amn->mm = mm;
>  	init_rwsem(&amn->lock);
>  	amn->type = type;
> -	amn->mn.ops = &amdgpu_mn_ops[type];
>  	amn->objects = RB_ROOT_CACHED;
>  	mutex_init(&amn->read_lock);
>  	atomic_set(&amn->recursion, 0);
>  
> -	r = __mmu_notifier_register(&amn->mn, mm);
> +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
> +	r = hmm_mirror_register(&amn->mirror, mm);
>  	if (r)
>  		goto free_amn;
>  
> @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>   * @bo: amdgpu buffer object
>   * @addr: userptr addr we should monitor
>   *
> - * Registers an MMU notifier for the given BO at the specified address.
> + * Registers an HMM mirror for the given BO at the specified address.
>   * Returns 0 on success, -ERRNO if anything goes wrong.
>   */
>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>  }
>  
>  /**
> - * amdgpu_mn_unregister - unregister a BO for notifier updates
> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>   *
>   * @bo: amdgpu buffer object
>   *
> - * Remove any registration of MMU notifier updates from the buffer object.
> + * Remove any registration of HMM mirror updates from the buffer object.
>   */
>  void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>  {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index eb0f432..0e27526 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>  	AMDGPU_MN_TYPE_HSA,
>  };
>  
> -#if defined(CONFIG_MMU_NOTIFIER)
> +#if defined(CONFIG_HMM)
>  void amdgpu_mn_lock(struct amdgpu_mn *mn);
>  void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]     ` <9d6717ac-23f0-7beb-6e41-58c6e32acdf8-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-14  7:51       ` Christian König
       [not found]         ` <58bc3bb9-b7b1-a32f-e355-c78a23d95215-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-09-14  7:51 UTC (permalink / raw)
  To: Felix Kuehling, Philip Yang, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König

Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
> On 2018-09-13 04:52 PM, Philip Yang wrote:
>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>> DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> This depends on several HMM patchset from Jérôme Glisse queued for
>> upstream.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 ++++++++++++++-------------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>   4 files changed, 56 insertions(+), 75 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e54..960a633 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>   config DRM_AMDGPU_USERPTR
>>   	bool "Always enable userptr write support"
>>   	depends on DRM_AMDGPU
>> -	select MMU_NOTIFIER
>> +	select HMM_MIRROR
>>   	help
>> -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> -	  selected to enabled full userptr support.
>> +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> +	  isn't already selected to enabled full userptr support.
>>   
>>   config DRM_AMDGPU_GART_DEBUGFS
>>   	bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 138cb78..c1e5d43 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -171,7 +171,7 @@ endif
>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>>   
>>   include $(FULL_AMD_PATH)/powerplay/Makefile
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index e55508b..ad52f34 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>>   
>>   #include <linux/firmware.h>
>>   #include <linux/module.h>
>> -#include <linux/mmu_notifier.h>
>> +#include <linux/hmm.h>
>>   #include <linux/interval_tree.h>
>>   #include <drm/drmP.h>
>>   #include <drm/drm.h>
>> @@ -66,6 +66,7 @@
> Need to remove @mn documentation.
>
>>    * @objects: interval tree containing amdgpu_mn_nodes
>>    * @read_lock: mutex for recursive locking of @lock
>>    * @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>>    *
>>    * Data for each amdgpu device and process address space.
>>    */
>> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>>   	/* constant after initialisation */
>>   	struct amdgpu_device	*adev;
>>   	struct mm_struct	*mm;
>> -	struct mmu_notifier	mn;
>>   	enum amdgpu_mn_type	type;
>>   
>>   	/* only used on destruction */
>> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>>   	struct rb_root_cached	objects;
>>   	struct mutex		read_lock;
>>   	atomic_t		recursion;
>> +
>> +	/* HMM mirror */
>> +	struct hmm_mirror	mirror;
>>   };
>>   
>>   /**
>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>>   };
>>   
>>   /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>    *
>>    * @work: previously sheduled work item
>>    *
>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
>>   	}
>>   	up_write(&amn->lock);
>>   	mutex_unlock(&adev->mn_lock);
>> -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +	hmm_mirror_unregister(&amn->mirror);
>> +
>>   	kfree(amn);
>>   }
>>   
>>   /**
>>    * amdgpu_mn_release - callback to notify about mm destruction
> Update the function name in the comment.
>
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>>    *
>> - * Shedule a work item to lazy destroy our notifier.
>> + * Shedule a work item to lazy destroy HMM mirror.
>>    */
>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>> -			      struct mm_struct *mm)
>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>   {
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>>   
>>   	INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>   	schedule_work(&amn->work);
>>   }
>>   
>> -
>>   /**
>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>    *
>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>>   /**
>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * Block for operations on BOs to finish and mark pages as accessed and
>>    * potentially dirty.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>> -						 struct mm_struct *mm,
>> -						 unsigned long start,
>> -						 unsigned long end,
>> -						 bool blockable)
>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror *mirror,
>> +			const struct hmm_update *update)
>>   {
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>> +	unsigned long start = update->start;
>> +	unsigned long end = update->end;
>> +	bool blockable = update->blockable;
>>   	struct interval_tree_node *it;
>>   
>>   	/* notification is exclusive, but interval is inclusive */
>> @@ -278,28 +277,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>   		amdgpu_mn_invalidate_node(node, start, end);
>>   	}
>>   
>> +	amdgpu_mn_read_unlock(amn);
>> +
> amdgpu_mn_read_lock/unlock support recursive locking for multiple
> overlapping or nested invalidation ranges. But if you'r locking and
> unlocking in the same function. Is that still a concern?

Well the real problem is that unlocking them here won't work.

We need to hold the lock until we are sure that the operation which 
updates the page tables is completed.

Christian.

>
>>   	return 0;
>>   }
>>   
>>   /**
>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * We temporarily evict all BOs between start and end. This
>>    * necessitates evicting all user-mode queues of the process. The BOs
>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>> -						 struct mm_struct *mm,
>> -						 unsigned long start,
>> -						 unsigned long end,
>> -						 bool blockable)
>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror *mirror,
>> +			const struct hmm_update *update)
>>   {
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>> +	unsigned long start = update->start;
>> +	unsigned long end = update->end;
>> +	bool blockable = update->blockable;
>>   	struct interval_tree_node *it;
>>   
>>   	/* notification is exclusive, but interval is inclusive */
>> @@ -326,59 +325,41 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>   
>>   			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>   							 start, end))
>> -				amdgpu_amdkfd_evict_userptr(mem, mm);
>> +				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>   		}
>>   	}
>>   
>> +	amdgpu_mn_read_unlock(amn);
>> +
>>   	return 0;
>>   }
>>   
>> -/**
>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> - *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> - *
>> - * Release the lock again to allow new command submissions.
>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>> + * alignment. Use these bits to make a unique key from the mm pointer
>> + * and notifier type.
>>    */
>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> -					   struct mm_struct *mm,
>> -					   unsigned long start,
>> -					   unsigned long end)
>> -{
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> -
>> -	amdgpu_mn_read_unlock(amn);
>> -}
>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>   
>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>   	[AMDGPU_MN_TYPE_GFX] = {
>> -		.release = amdgpu_mn_release,
>> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
>> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +		.sync_cpu_device_pagetables =
>> +				amdgpu_mn_invalidate_range_start_gfx,
>> +		.release = amdgpu_hmm_mirror_release
>>   	},
>>   	[AMDGPU_MN_TYPE_HSA] = {
>> -		.release = amdgpu_mn_release,
>> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
>> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +		.sync_cpu_device_pagetables =
>> +				amdgpu_mn_invalidate_range_start_hsa,
>> +		.release = amdgpu_hmm_mirror_release
>>   	},
>>   };
>>   
>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>> - * alignment. Use these bits to make a unique key from the mm pointer
>> - * and notifier type.
>> - */
>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -
>>   /**
>> - * amdgpu_mn_get - create notifier context
>> + * amdgpu_mn_get - create HMM mirror context
>>    *
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a notifier context for current->mm.
>> + * Creates a HMM mirror context for current->mm.
>>    */
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type)
>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>   	amn->mm = mm;
>>   	init_rwsem(&amn->lock);
>>   	amn->type = type;
>> -	amn->mn.ops = &amdgpu_mn_ops[type];
>>   	amn->objects = RB_ROOT_CACHED;
>>   	mutex_init(&amn->read_lock);
>>   	atomic_set(&amn->recursion, 0);
>>   
>> -	r = __mmu_notifier_register(&amn->mn, mm);
>> +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>> +	r = hmm_mirror_register(&amn->mirror, mm);
>>   	if (r)
>>   		goto free_amn;
>>   
>> @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>    * @bo: amdgpu buffer object
>>    * @addr: userptr addr we should monitor
>>    *
>> - * Registers an MMU notifier for the given BO at the specified address.
>> + * Registers an HMM mirror for the given BO at the specified address.
>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>    */
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>   }
>>   
>>   /**
>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>    *
>>    * @bo: amdgpu buffer object
>>    *
>> - * Remove any registration of MMU notifier updates from the buffer object.
>> + * Remove any registration of HMM mirror updates from the buffer object.
>>    */
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index eb0f432..0e27526 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>   	AMDGPU_MN_TYPE_HSA,
>>   };
>>   
>> -#if defined(CONFIG_MMU_NOTIFIER)
>> +#if defined(CONFIG_HMM)
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]         ` <58bc3bb9-b7b1-a32f-e355-c78a23d95215-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-09-14 17:47           ` Philip Yang
       [not found]             ` <383388c8-1bff-48d9-1044-f16e66bcbfa5-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Philip Yang @ 2018-09-14 17:47 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-09-14 03:51 AM, Christian König wrote:
> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>> DRM_AMDGPU_USERPTR Kconfig.
>>>
>>> It supports both KFD userptr and gfx userptr paths.
>>>
>>> This depends on several HMM patchset from Jérôme Glisse queued for
>>> upstream.
>>>
>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 
>>> ++++++++++++++-------------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>>   4 files changed, 56 insertions(+), 75 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> index 9221e54..960a633 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>>   config DRM_AMDGPU_USERPTR
>>>       bool "Always enable userptr write support"
>>>       depends on DRM_AMDGPU
>>> -    select MMU_NOTIFIER
>>> +    select HMM_MIRROR
>>>       help
>>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>> -      selected to enabled full userptr support.
>>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>> +      isn't already selected to enabled full userptr support.
>>>     config DRM_AMDGPU_GART_DEBUGFS
>>>       bool "Allow GART access through debugfs"
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 138cb78..c1e5d43 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -171,7 +171,7 @@ endif
>>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>>>     include $(FULL_AMD_PATH)/powerplay/Makefile
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> index e55508b..ad52f34 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> @@ -45,7 +45,7 @@
>>>     #include <linux/firmware.h>
>>>   #include <linux/module.h>
>>> -#include <linux/mmu_notifier.h>
>>> +#include <linux/hmm.h>
>>>   #include <linux/interval_tree.h>
>>>   #include <drm/drmP.h>
>>>   #include <drm/drm.h>
>>> @@ -66,6 +66,7 @@
>> Need to remove @mn documentation.
>>
>>>    * @objects: interval tree containing amdgpu_mn_nodes
>>>    * @read_lock: mutex for recursive locking of @lock
>>>    * @recursion: depth of recursion
>>> + * @mirror: HMM mirror function support
>>>    *
>>>    * Data for each amdgpu device and process address space.
>>>    */
>>> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>>>       /* constant after initialisation */
>>>       struct amdgpu_device    *adev;
>>>       struct mm_struct    *mm;
>>> -    struct mmu_notifier    mn;
>>>       enum amdgpu_mn_type    type;
>>>         /* only used on destruction */
>>> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>>>       struct rb_root_cached    objects;
>>>       struct mutex        read_lock;
>>>       atomic_t        recursion;
>>> +
>>> +    /* HMM mirror */
>>> +    struct hmm_mirror    mirror;
>>>   };
>>>     /**
>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>>>   };
>>>     /**
>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>>    *
>>>    * @work: previously sheduled work item
>>>    *
>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct 
>>> work_struct *work)
>>>       }
>>>       up_write(&amn->lock);
>>>       mutex_unlock(&adev->mn_lock);
>>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>> +    hmm_mirror_unregister(&amn->mirror);
>>> +
>>>       kfree(amn);
>>>   }
>>>     /**
>>>    * amdgpu_mn_release - callback to notify about mm destruction
>> Update the function name in the comment.
>>
>>>    *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> + * @mirror: the HMM mirror (mm) this callback is about
>>>    *
>>> - * Shedule a work item to lazy destroy our notifier.
>>> + * Shedule a work item to lazy destroy HMM mirror.
>>>    */
>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>> -                  struct mm_struct *mm)
>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>>   {
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>> mirror);
>>>         INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>>       schedule_work(&amn->work);
>>>   }
>>>   -
>>>   /**
>>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>>    *
>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct 
>>> amdgpu_mn_node *node,
>>>   /**
>>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about 
>>> mm change
>>>    *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>>    *
>>>    * Block for operations on BOs to finish and mark pages as 
>>> accessed and
>>>    * potentially dirty.
>>>    */
>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier 
>>> *mn,
>>> -                         struct mm_struct *mm,
>>> -                         unsigned long start,
>>> -                         unsigned long end,
>>> -                         bool blockable)
>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror 
>>> *mirror,
>>> +            const struct hmm_update *update)
>>>   {
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>> mirror);
>>> +    unsigned long start = update->start;
>>> +    unsigned long end = update->end;
>>> +    bool blockable = update->blockable;
>>>       struct interval_tree_node *it;
>>>         /* notification is exclusive, but interval is inclusive */
>>> @@ -278,28 +277,28 @@ static int 
>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>>           amdgpu_mn_invalidate_node(node, start, end);
>>>       }
>>>   +    amdgpu_mn_read_unlock(amn);
>>> +
>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>> overlapping or nested invalidation ranges. But if you'r locking and
>> unlocking in the same function. Is that still a concern?
>
I don't understand the possible recursive case, but 
amdgpu_mn_read_lock() still support recursive locking.
> Well the real problem is that unlocking them here won't work.
>
> We need to hold the lock until we are sure that the operation which 
> updates the page tables is completed.
>
The reason for this change is because hmm mirror has invalidate_start 
callback, no invalidate_end callback

Check mmu_notifier.c and hmm.c again, below is entire logic to update 
CPU page tables and callback:

mn lock amn->lock is used to protect interval tree access because user 
may submit/register new userptr anytime.
This is same for old and new way.

step 2 guarantee the GPU operation is done before updating CPU page table.

So I think the change is safe. We don't need hold mn lock until the CPU 
page tables update is completed.

Old:
    1. down_read_non_owner(&amn->lock)
    2. loop to handle BOs from node->bos through interval tree 
amn->object nodes
        gfx: wait for pending BOs fence operation done, mark user pages 
dirty
        kfd: evict user queues of the process, wait for queue unmap/map 
operation done
    3. update CPU page tables
    4. up_read(&amn->lock)

New, switch step 3 and 4
    1. down_read_non_owner(&amn->lock)
    2. loop to handle BOs from node->bos through interval tree 
amn->object nodes
        gfx: wait for pending BOs fence operation done, mark user pages 
dirty
        kfd: evict user queues of the process, wait for queue unmap/map 
operation done
    3. up_read(&amn->lock)
    4. update CPU page tables

Regards,
Philip
> Christian.
>
>>
>>>       return 0;
>>>   }
>>>     /**
>>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about 
>>> mm change
>>>    *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>>    *
>>>    * We temporarily evict all BOs between start and end. This
>>>    * necessitates evicting all user-mode queues of the process. The BOs
>>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>>    */
>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier 
>>> *mn,
>>> -                         struct mm_struct *mm,
>>> -                         unsigned long start,
>>> -                         unsigned long end,
>>> -                         bool blockable)
>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror 
>>> *mirror,
>>> +            const struct hmm_update *update)
>>>   {
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>> mirror);
>>> +    unsigned long start = update->start;
>>> +    unsigned long end = update->end;
>>> +    bool blockable = update->blockable;
>>>       struct interval_tree_node *it;
>>>         /* notification is exclusive, but interval is inclusive */
>>> @@ -326,59 +325,41 @@ static int 
>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>>                 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>>                                start, end))
>>> -                amdgpu_amdkfd_evict_userptr(mem, mm);
>>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>>           }
>>>       }
>>>   +    amdgpu_mn_read_unlock(amn);
>>> +
>>>       return 0;
>>>   }
>>>   -/**
>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>>> - *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> - *
>>> - * Release the lock again to allow new command submissions.
>>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>>> + * alignment. Use these bits to make a unique key from the mm pointer
>>> + * and notifier type.
>>>    */
>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>> -                       struct mm_struct *mm,
>>> -                       unsigned long start,
>>> -                       unsigned long end)
>>> -{
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> -
>>> -    amdgpu_mn_read_unlock(amn);
>>> -}
>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>   -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>>       [AMDGPU_MN_TYPE_GFX] = {
>>> -        .release = amdgpu_mn_release,
>>> -        .invalidate_range_start = 
>>> amdgpu_mn_invalidate_range_start_gfx,
>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> +        .sync_cpu_device_pagetables =
>>> +                amdgpu_mn_invalidate_range_start_gfx,
>>> +        .release = amdgpu_hmm_mirror_release
>>>       },
>>>       [AMDGPU_MN_TYPE_HSA] = {
>>> -        .release = amdgpu_mn_release,
>>> -        .invalidate_range_start = 
>>> amdgpu_mn_invalidate_range_start_hsa,
>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> +        .sync_cpu_device_pagetables =
>>> +                amdgpu_mn_invalidate_range_start_hsa,
>>> +        .release = amdgpu_hmm_mirror_release
>>>       },
>>>   };
>>>   -/* Low bits of any reasonable mm pointer will be unused due to 
>>> struct
>>> - * alignment. Use these bits to make a unique key from the mm pointer
>>> - * and notifier type.
>>> - */
>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>> -
>>>   /**
>>> - * amdgpu_mn_get - create notifier context
>>> + * amdgpu_mn_get - create HMM mirror context
>>>    *
>>>    * @adev: amdgpu device pointer
>>>    * @type: type of MMU notifier context
>>>    *
>>> - * Creates a notifier context for current->mm.
>>> + * Creates a HMM mirror context for current->mm.
>>>    */
>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>                   enum amdgpu_mn_type type)
>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>> amdgpu_device *adev,
>>>       amn->mm = mm;
>>>       init_rwsem(&amn->lock);
>>>       amn->type = type;
>>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>>       amn->objects = RB_ROOT_CACHED;
>>>       mutex_init(&amn->read_lock);
>>>       atomic_set(&amn->recursion, 0);
>>>   -    r = __mmu_notifier_register(&amn->mn, mm);
>>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>>       if (r)
>>>           goto free_amn;
>>>   @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>> amdgpu_device *adev,
>>>    * @bo: amdgpu buffer object
>>>    * @addr: userptr addr we should monitor
>>>    *
>>> - * Registers an MMU notifier for the given BO at the specified 
>>> address.
>>> + * Registers an HMM mirror for the given BO at the specified address.
>>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>>    */
>>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, 
>>> unsigned long addr)
>>>   }
>>>     /**
>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>>    *
>>>    * @bo: amdgpu buffer object
>>>    *
>>> - * Remove any registration of MMU notifier updates from the buffer 
>>> object.
>>> + * Remove any registration of HMM mirror updates from the buffer 
>>> object.
>>>    */
>>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>>   {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> index eb0f432..0e27526 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>>       AMDGPU_MN_TYPE_HSA,
>>>   };
>>>   -#if defined(CONFIG_MMU_NOTIFIER)
>>> +#if defined(CONFIG_HMM)
>>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]             ` <383388c8-1bff-48d9-1044-f16e66bcbfa5-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-14 17:52               ` Christian König
       [not found]                 ` <3850fbeb-5d91-9c14-43c9-45d5d058e15b-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-09-14 17:52 UTC (permalink / raw)
  To: Philip Yang, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Jerome Glisse

Am 14.09.2018 um 19:47 schrieb Philip Yang:
> On 2018-09-14 03:51 AM, Christian König wrote:
>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>> Replace our MMU notifier with 
>>>> hmm_mirror_ops.sync_cpu_device_pagetables
>>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>>> DRM_AMDGPU_USERPTR Kconfig.
>>>>
>>>> It supports both KFD userptr and gfx userptr paths.
>>>>
>>>> This depends on several HMM patchset from Jérôme Glisse queued for
>>>> upstream.
>>>>
>>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 
>>>> ++++++++++++++-------------------
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>>>   4 files changed, 56 insertions(+), 75 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
>>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>> index 9221e54..960a633 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>>>   config DRM_AMDGPU_USERPTR
>>>>       bool "Always enable userptr write support"
>>>>       depends on DRM_AMDGPU
>>>> -    select MMU_NOTIFIER
>>>> +    select HMM_MIRROR
>>>>       help
>>>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>>> -      selected to enabled full userptr support.
>>>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>>> +      isn't already selected to enabled full userptr support.
>>>>     config DRM_AMDGPU_GART_DEBUGFS
>>>>       bool "Allow GART access through debugfs"
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> index 138cb78..c1e5d43 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> @@ -171,7 +171,7 @@ endif
>>>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>>>>     include $(FULL_AMD_PATH)/powerplay/Makefile
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>> index e55508b..ad52f34 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>> @@ -45,7 +45,7 @@
>>>>     #include <linux/firmware.h>
>>>>   #include <linux/module.h>
>>>> -#include <linux/mmu_notifier.h>
>>>> +#include <linux/hmm.h>
>>>>   #include <linux/interval_tree.h>
>>>>   #include <drm/drmP.h>
>>>>   #include <drm/drm.h>
>>>> @@ -66,6 +66,7 @@
>>> Need to remove @mn documentation.
>>>
>>>>    * @objects: interval tree containing amdgpu_mn_nodes
>>>>    * @read_lock: mutex for recursive locking of @lock
>>>>    * @recursion: depth of recursion
>>>> + * @mirror: HMM mirror function support
>>>>    *
>>>>    * Data for each amdgpu device and process address space.
>>>>    */
>>>> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>>>>       /* constant after initialisation */
>>>>       struct amdgpu_device    *adev;
>>>>       struct mm_struct    *mm;
>>>> -    struct mmu_notifier    mn;
>>>>       enum amdgpu_mn_type    type;
>>>>         /* only used on destruction */
>>>> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>>>>       struct rb_root_cached    objects;
>>>>       struct mutex        read_lock;
>>>>       atomic_t        recursion;
>>>> +
>>>> +    /* HMM mirror */
>>>> +    struct hmm_mirror    mirror;
>>>>   };
>>>>     /**
>>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>>>>   };
>>>>     /**
>>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>>>    *
>>>>    * @work: previously sheduled work item
>>>>    *
>>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct 
>>>> work_struct *work)
>>>>       }
>>>>       up_write(&amn->lock);
>>>>       mutex_unlock(&adev->mn_lock);
>>>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>>> +    hmm_mirror_unregister(&amn->mirror);
>>>> +
>>>>       kfree(amn);
>>>>   }
>>>>     /**
>>>>    * amdgpu_mn_release - callback to notify about mm destruction
>>> Update the function name in the comment.
>>>
>>>>    *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> + * @mirror: the HMM mirror (mm) this callback is about
>>>>    *
>>>> - * Shedule a work item to lazy destroy our notifier.
>>>> + * Shedule a work item to lazy destroy HMM mirror.
>>>>    */
>>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>>> -                  struct mm_struct *mm)
>>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>>>   {
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>>> mirror);
>>>>         INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>>>       schedule_work(&amn->work);
>>>>   }
>>>>   -
>>>>   /**
>>>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>>>    *
>>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct 
>>>> amdgpu_mn_node *node,
>>>>   /**
>>>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify 
>>>> about mm change
>>>>    *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> - * @start: start of updated range
>>>> - * @end: end of updated range
>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>> + * @update: the update start, end address
>>>>    *
>>>>    * Block for operations on BOs to finish and mark pages as 
>>>> accessed and
>>>>    * potentially dirty.
>>>>    */
>>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct 
>>>> mmu_notifier *mn,
>>>> -                         struct mm_struct *mm,
>>>> -                         unsigned long start,
>>>> -                         unsigned long end,
>>>> -                         bool blockable)
>>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror 
>>>> *mirror,
>>>> +            const struct hmm_update *update)
>>>>   {
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>>> mirror);
>>>> +    unsigned long start = update->start;
>>>> +    unsigned long end = update->end;
>>>> +    bool blockable = update->blockable;
>>>>       struct interval_tree_node *it;
>>>>         /* notification is exclusive, but interval is inclusive */
>>>> @@ -278,28 +277,28 @@ static int 
>>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>>>           amdgpu_mn_invalidate_node(node, start, end);
>>>>       }
>>>>   +    amdgpu_mn_read_unlock(amn);
>>>> +
>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>> overlapping or nested invalidation ranges. But if you'r locking and
>>> unlocking in the same function. Is that still a concern?
>>
> I don't understand the possible recursive case, but 
> amdgpu_mn_read_lock() still support recursive locking.
>> Well the real problem is that unlocking them here won't work.
>>
>> We need to hold the lock until we are sure that the operation which 
>> updates the page tables is completed.
>>
> The reason for this change is because hmm mirror has invalidate_start 
> callback, no invalidate_end callback
>
> Check mmu_notifier.c and hmm.c again, below is entire logic to update 
> CPU page tables and callback:
>
> mn lock amn->lock is used to protect interval tree access because user 
> may submit/register new userptr anytime.
> This is same for old and new way.
>
> step 2 guarantee the GPU operation is done before updating CPU page 
> table.
>
> So I think the change is safe. We don't need hold mn lock until the 
> CPU page tables update is completed.

No, that isn't even remotely correct. The lock doesn't protects the 
interval tree.

>
> Old:
>    1. down_read_non_owner(&amn->lock)
>    2. loop to handle BOs from node->bos through interval tree 
> amn->object nodes
>        gfx: wait for pending BOs fence operation done, mark user pages 
> dirty
>        kfd: evict user queues of the process, wait for queue unmap/map 
> operation done
>    3. update CPU page tables
>    4. up_read(&amn->lock)
>
> New, switch step 3 and 4
>    1. down_read_non_owner(&amn->lock)
>    2. loop to handle BOs from node->bos through interval tree 
> amn->object nodes
>        gfx: wait for pending BOs fence operation done, mark user pages 
> dirty
>        kfd: evict user queues of the process, wait for queue unmap/map 
> operation done
>    3. up_read(&amn->lock)
>    4. update CPU page tables

The lock is there to make sure that we serialize page table updates with 
command submission.

If HMM doesn't provide a callback for the end of the invalidating then 
it can't be used for this.

Adding Jerome as well, since we are certainly missing something here.

Regards,
Christian.

>
> Regards,
> Philip
>> Christian.
>>
>>>
>>>>       return 0;
>>>>   }
>>>>     /**
>>>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify 
>>>> about mm change
>>>>    *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> - * @start: start of updated range
>>>> - * @end: end of updated range
>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>> + * @update: the update start, end address
>>>>    *
>>>>    * We temporarily evict all BOs between start and end. This
>>>>    * necessitates evicting all user-mode queues of the process. The 
>>>> BOs
>>>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>>>    */
>>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct 
>>>> mmu_notifier *mn,
>>>> -                         struct mm_struct *mm,
>>>> -                         unsigned long start,
>>>> -                         unsigned long end,
>>>> -                         bool blockable)
>>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror 
>>>> *mirror,
>>>> +            const struct hmm_update *update)
>>>>   {
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>>> mirror);
>>>> +    unsigned long start = update->start;
>>>> +    unsigned long end = update->end;
>>>> +    bool blockable = update->blockable;
>>>>       struct interval_tree_node *it;
>>>>         /* notification is exclusive, but interval is inclusive */
>>>> @@ -326,59 +325,41 @@ static int 
>>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>>>                 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>>>                                start, end))
>>>> -                amdgpu_amdkfd_evict_userptr(mem, mm);
>>>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>>>           }
>>>>       }
>>>>   +    amdgpu_mn_read_unlock(amn);
>>>> +
>>>>       return 0;
>>>>   }
>>>>   -/**
>>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm 
>>>> change
>>>> - *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> - * @start: start of updated range
>>>> - * @end: end of updated range
>>>> - *
>>>> - * Release the lock again to allow new command submissions.
>>>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>>>> + * alignment. Use these bits to make a unique key from the mm pointer
>>>> + * and notifier type.
>>>>    */
>>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>>> -                       struct mm_struct *mm,
>>>> -                       unsigned long start,
>>>> -                       unsigned long end)
>>>> -{
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> -
>>>> -    amdgpu_mn_read_unlock(amn);
>>>> -}
>>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>>   -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>>>       [AMDGPU_MN_TYPE_GFX] = {
>>>> -        .release = amdgpu_mn_release,
>>>> -        .invalidate_range_start = 
>>>> amdgpu_mn_invalidate_range_start_gfx,
>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>> +        .sync_cpu_device_pagetables =
>>>> +                amdgpu_mn_invalidate_range_start_gfx,
>>>> +        .release = amdgpu_hmm_mirror_release
>>>>       },
>>>>       [AMDGPU_MN_TYPE_HSA] = {
>>>> -        .release = amdgpu_mn_release,
>>>> -        .invalidate_range_start = 
>>>> amdgpu_mn_invalidate_range_start_hsa,
>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>> +        .sync_cpu_device_pagetables =
>>>> +                amdgpu_mn_invalidate_range_start_hsa,
>>>> +        .release = amdgpu_hmm_mirror_release
>>>>       },
>>>>   };
>>>>   -/* Low bits of any reasonable mm pointer will be unused due to 
>>>> struct
>>>> - * alignment. Use these bits to make a unique key from the mm pointer
>>>> - * and notifier type.
>>>> - */
>>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>> -
>>>>   /**
>>>> - * amdgpu_mn_get - create notifier context
>>>> + * amdgpu_mn_get - create HMM mirror context
>>>>    *
>>>>    * @adev: amdgpu device pointer
>>>>    * @type: type of MMU notifier context
>>>>    *
>>>> - * Creates a notifier context for current->mm.
>>>> + * Creates a HMM mirror context for current->mm.
>>>>    */
>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>>                   enum amdgpu_mn_type type)
>>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>>> amdgpu_device *adev,
>>>>       amn->mm = mm;
>>>>       init_rwsem(&amn->lock);
>>>>       amn->type = type;
>>>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>>>       amn->objects = RB_ROOT_CACHED;
>>>>       mutex_init(&amn->read_lock);
>>>>       atomic_set(&amn->recursion, 0);
>>>>   -    r = __mmu_notifier_register(&amn->mn, mm);
>>>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>>>       if (r)
>>>>           goto free_amn;
>>>>   @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>>> amdgpu_device *adev,
>>>>    * @bo: amdgpu buffer object
>>>>    * @addr: userptr addr we should monitor
>>>>    *
>>>> - * Registers an MMU notifier for the given BO at the specified 
>>>> address.
>>>> + * Registers an HMM mirror for the given BO at the specified address.
>>>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>>>    */
>>>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, 
>>>> unsigned long addr)
>>>>   }
>>>>     /**
>>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>>>    *
>>>>    * @bo: amdgpu buffer object
>>>>    *
>>>> - * Remove any registration of MMU notifier updates from the buffer 
>>>> object.
>>>> + * Remove any registration of HMM mirror updates from the buffer 
>>>> object.
>>>>    */
>>>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>>>   {
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>> index eb0f432..0e27526 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>>>       AMDGPU_MN_TYPE_HSA,
>>>>   };
>>>>   -#if defined(CONFIG_MMU_NOTIFIER)
>>>> +#if defined(CONFIG_HMM)
>>>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                 ` <3850fbeb-5d91-9c14-43c9-45d5d058e15b-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-14 20:21                   ` Felix Kuehling
       [not found]                     ` <de28cee0-3461-4f99-eeae-b793de00ca58-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Felix Kuehling @ 2018-09-14 20:21 UTC (permalink / raw)
  To: Christian König, Philip Yang,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Jerome Glisse

On 2018-09-14 01:52 PM, Christian König wrote:
> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>> On 2018-09-14 03:51 AM, Christian König wrote:
>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> Replace our MMU notifier with
>>>>> hmm_mirror_ops.sync_cpu_device_pagetables
>>>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>>>> DRM_AMDGPU_USERPTR Kconfig.
>>>>>
>>>>> It supports both KFD userptr and gfx userptr paths.
>>>>>
>>>>> This depends on several HMM patchset from Jérôme Glisse queued for
>>>>> upstream.
>>>>>
>>>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>>>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121
>>>>> ++++++++++++++-------------------
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>>>>   4 files changed, 56 insertions(+), 75 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> index 9221e54..960a633 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>>>>   config DRM_AMDGPU_USERPTR
>>>>>       bool "Always enable userptr write support"
>>>>>       depends on DRM_AMDGPU
>>>>> -    select MMU_NOTIFIER
>>>>> +    select HMM_MIRROR
>>>>>       help
>>>>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>>>> -      selected to enabled full userptr support.
>>>>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>>>> +      isn't already selected to enabled full userptr support.
>>>>>     config DRM_AMDGPU_GART_DEBUGFS
>>>>>       bool "Allow GART access through debugfs"
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> index 138cb78..c1e5d43 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> @@ -171,7 +171,7 @@ endif
>>>>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>>>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>>>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>>>>>     include $(FULL_AMD_PATH)/powerplay/Makefile
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> index e55508b..ad52f34 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> @@ -45,7 +45,7 @@
>>>>>     #include <linux/firmware.h>
>>>>>   #include <linux/module.h>
>>>>> -#include <linux/mmu_notifier.h>
>>>>> +#include <linux/hmm.h>
>>>>>   #include <linux/interval_tree.h>
>>>>>   #include <drm/drmP.h>
>>>>>   #include <drm/drm.h>
>>>>> @@ -66,6 +66,7 @@
>>>> Need to remove @mn documentation.
>>>>
>>>>>    * @objects: interval tree containing amdgpu_mn_nodes
>>>>>    * @read_lock: mutex for recursive locking of @lock
>>>>>    * @recursion: depth of recursion
>>>>> + * @mirror: HMM mirror function support
>>>>>    *
>>>>>    * Data for each amdgpu device and process address space.
>>>>>    */
>>>>> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>>>>>       /* constant after initialisation */
>>>>>       struct amdgpu_device    *adev;
>>>>>       struct mm_struct    *mm;
>>>>> -    struct mmu_notifier    mn;
>>>>>       enum amdgpu_mn_type    type;
>>>>>         /* only used on destruction */
>>>>> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>>>>>       struct rb_root_cached    objects;
>>>>>       struct mutex        read_lock;
>>>>>       atomic_t        recursion;
>>>>> +
>>>>> +    /* HMM mirror */
>>>>> +    struct hmm_mirror    mirror;
>>>>>   };
>>>>>     /**
>>>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>>>>>   };
>>>>>     /**
>>>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>>>>    *
>>>>>    * @work: previously sheduled work item
>>>>>    *
>>>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct
>>>>> work_struct *work)
>>>>>       }
>>>>>       up_write(&amn->lock);
>>>>>       mutex_unlock(&adev->mn_lock);
>>>>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>>>> +    hmm_mirror_unregister(&amn->mirror);
>>>>> +
>>>>>       kfree(amn);
>>>>>   }
>>>>>     /**
>>>>>    * amdgpu_mn_release - callback to notify about mm destruction
>>>> Update the function name in the comment.
>>>>
>>>>>    *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> + * @mirror: the HMM mirror (mm) this callback is about
>>>>>    *
>>>>> - * Shedule a work item to lazy destroy our notifier.
>>>>> + * Shedule a work item to lazy destroy HMM mirror.
>>>>>    */
>>>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>>>> -                  struct mm_struct *mm)
>>>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>>>>   {
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct
>>>>> amdgpu_mn, mirror);
>>>>>         INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>>>>       schedule_work(&amn->work);
>>>>>   }
>>>>>   -
>>>>>   /**
>>>>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>>>>    *
>>>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct
>>>>> amdgpu_mn_node *node,
>>>>>   /**
>>>>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify
>>>>> about mm change
>>>>>    *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> - * @start: start of updated range
>>>>> - * @end: end of updated range
>>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>>> + * @update: the update start, end address
>>>>>    *
>>>>>    * Block for operations on BOs to finish and mark pages as
>>>>> accessed and
>>>>>    * potentially dirty.
>>>>>    */
>>>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct
>>>>> mmu_notifier *mn,
>>>>> -                         struct mm_struct *mm,
>>>>> -                         unsigned long start,
>>>>> -                         unsigned long end,
>>>>> -                         bool blockable)
>>>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror
>>>>> *mirror,
>>>>> +            const struct hmm_update *update)
>>>>>   {
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct
>>>>> amdgpu_mn, mirror);
>>>>> +    unsigned long start = update->start;
>>>>> +    unsigned long end = update->end;
>>>>> +    bool blockable = update->blockable;
>>>>>       struct interval_tree_node *it;
>>>>>         /* notification is exclusive, but interval is inclusive */
>>>>> @@ -278,28 +277,28 @@ static int
>>>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>>>>           amdgpu_mn_invalidate_node(node, start, end);
>>>>>       }
>>>>>   +    amdgpu_mn_read_unlock(amn);
>>>>> +
>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>> overlapping or nested invalidation ranges. But if you'r locking and
>>>> unlocking in the same function. Is that still a concern?
>>>
>> I don't understand the possible recursive case, but
>> amdgpu_mn_read_lock() still support recursive locking.
>>> Well the real problem is that unlocking them here won't work.
>>>
>>> We need to hold the lock until we are sure that the operation which
>>> updates the page tables is completed.
>>>
>> The reason for this change is because hmm mirror has invalidate_start
>> callback, no invalidate_end callback
>>
>> Check mmu_notifier.c and hmm.c again, below is entire logic to update
>> CPU page tables and callback:
>>
>> mn lock amn->lock is used to protect interval tree access because
>> user may submit/register new userptr anytime.
>> This is same for old and new way.
>>
>> step 2 guarantee the GPU operation is done before updating CPU page
>> table.
>>
>> So I think the change is safe. We don't need hold mn lock until the
>> CPU page tables update is completed.
>
> No, that isn't even remotely correct. The lock doesn't protects the
> interval tree.
>
>>
>> Old:
>>    1. down_read_non_owner(&amn->lock)
>>    2. loop to handle BOs from node->bos through interval tree
>> amn->object nodes
>>        gfx: wait for pending BOs fence operation done, mark user
>> pages dirty
>>        kfd: evict user queues of the process, wait for queue
>> unmap/map operation done
>>    3. update CPU page tables
>>    4. up_read(&amn->lock)
>>
>> New, switch step 3 and 4
>>    1. down_read_non_owner(&amn->lock)
>>    2. loop to handle BOs from node->bos through interval tree
>> amn->object nodes
>>        gfx: wait for pending BOs fence operation done, mark user
>> pages dirty
>>        kfd: evict user queues of the process, wait for queue
>> unmap/map operation done
>>    3. up_read(&amn->lock)
>>    4. update CPU page tables
>
> The lock is there to make sure that we serialize page table updates
> with command submission.

As I understand it, the idea is to prevent command submission (adding
new fences to BOs) while a page table invalidation is in progress. But
do we really need another lock for this? Wouldn't the re-validation of
userptr BOs (currently calling get_user_pages) force synchronization
with the ongoing page table invalidation through the mmap_sem or other
MM locks?

Regards,
  Felix


>
> If HMM doesn't provide a callback for the end of the invalidating then
> it can't be used for this.
>
> Adding Jerome as well, since we are certainly missing something here.
>
> Regards,
> Christian.
>
>>
>> Regards,
>> Philip
>>> Christian.
>>>
>>>>
>>>>>       return 0;
>>>>>   }
>>>>>     /**
>>>>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify
>>>>> about mm change
>>>>>    *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> - * @start: start of updated range
>>>>> - * @end: end of updated range
>>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>>> + * @update: the update start, end address
>>>>>    *
>>>>>    * We temporarily evict all BOs between start and end. This
>>>>>    * necessitates evicting all user-mode queues of the process.
>>>>> The BOs
>>>>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>>>>    */
>>>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct
>>>>> mmu_notifier *mn,
>>>>> -                         struct mm_struct *mm,
>>>>> -                         unsigned long start,
>>>>> -                         unsigned long end,
>>>>> -                         bool blockable)
>>>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror
>>>>> *mirror,
>>>>> +            const struct hmm_update *update)
>>>>>   {
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct
>>>>> amdgpu_mn, mirror);
>>>>> +    unsigned long start = update->start;
>>>>> +    unsigned long end = update->end;
>>>>> +    bool blockable = update->blockable;
>>>>>       struct interval_tree_node *it;
>>>>>         /* notification is exclusive, but interval is inclusive */
>>>>> @@ -326,59 +325,41 @@ static int
>>>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>>>>                 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>>>>                                start, end))
>>>>> -                amdgpu_amdkfd_evict_userptr(mem, mm);
>>>>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>>>>           }
>>>>>       }
>>>>>   +    amdgpu_mn_read_unlock(amn);
>>>>> +
>>>>>       return 0;
>>>>>   }
>>>>>   -/**
>>>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm
>>>>> change
>>>>> - *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> - * @start: start of updated range
>>>>> - * @end: end of updated range
>>>>> - *
>>>>> - * Release the lock again to allow new command submissions.
>>>>> +/* Low bits of any reasonable mm pointer will be unused due to
>>>>> struct
>>>>> + * alignment. Use these bits to make a unique key from the mm
>>>>> pointer
>>>>> + * and notifier type.
>>>>>    */
>>>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>>>> -                       struct mm_struct *mm,
>>>>> -                       unsigned long start,
>>>>> -                       unsigned long end)
>>>>> -{
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> -
>>>>> -    amdgpu_mn_read_unlock(amn);
>>>>> -}
>>>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>>>   -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>>>>       [AMDGPU_MN_TYPE_GFX] = {
>>>>> -        .release = amdgpu_mn_release,
>>>>> -        .invalidate_range_start =
>>>>> amdgpu_mn_invalidate_range_start_gfx,
>>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>>> +        .sync_cpu_device_pagetables =
>>>>> +                amdgpu_mn_invalidate_range_start_gfx,
>>>>> +        .release = amdgpu_hmm_mirror_release
>>>>>       },
>>>>>       [AMDGPU_MN_TYPE_HSA] = {
>>>>> -        .release = amdgpu_mn_release,
>>>>> -        .invalidate_range_start =
>>>>> amdgpu_mn_invalidate_range_start_hsa,
>>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>>> +        .sync_cpu_device_pagetables =
>>>>> +                amdgpu_mn_invalidate_range_start_hsa,
>>>>> +        .release = amdgpu_hmm_mirror_release
>>>>>       },
>>>>>   };
>>>>>   -/* Low bits of any reasonable mm pointer will be unused due to
>>>>> struct
>>>>> - * alignment. Use these bits to make a unique key from the mm
>>>>> pointer
>>>>> - * and notifier type.
>>>>> - */
>>>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>>> -
>>>>>   /**
>>>>> - * amdgpu_mn_get - create notifier context
>>>>> + * amdgpu_mn_get - create HMM mirror context
>>>>>    *
>>>>>    * @adev: amdgpu device pointer
>>>>>    * @type: type of MMU notifier context
>>>>>    *
>>>>> - * Creates a notifier context for current->mm.
>>>>> + * Creates a HMM mirror context for current->mm.
>>>>>    */
>>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>>>                   enum amdgpu_mn_type type)
>>>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>>>> amdgpu_device *adev,
>>>>>       amn->mm = mm;
>>>>>       init_rwsem(&amn->lock);
>>>>>       amn->type = type;
>>>>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>>>>       amn->objects = RB_ROOT_CACHED;
>>>>>       mutex_init(&amn->read_lock);
>>>>>       atomic_set(&amn->recursion, 0);
>>>>>   -    r = __mmu_notifier_register(&amn->mn, mm);
>>>>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>>>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>>>>       if (r)
>>>>>           goto free_amn;
>>>>>   @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>>>> amdgpu_device *adev,
>>>>>    * @bo: amdgpu buffer object
>>>>>    * @addr: userptr addr we should monitor
>>>>>    *
>>>>> - * Registers an MMU notifier for the given BO at the specified
>>>>> address.
>>>>> + * Registers an HMM mirror for the given BO at the specified
>>>>> address.
>>>>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>>>>    */
>>>>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
>>>>> unsigned long addr)
>>>>>   }
>>>>>     /**
>>>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>>>>    *
>>>>>    * @bo: amdgpu buffer object
>>>>>    *
>>>>> - * Remove any registration of MMU notifier updates from the
>>>>> buffer object.
>>>>> + * Remove any registration of HMM mirror updates from the buffer
>>>>> object.
>>>>>    */
>>>>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>>>>   {
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> index eb0f432..0e27526 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>>>>       AMDGPU_MN_TYPE_HSA,
>>>>>   };
>>>>>   -#if defined(CONFIG_MMU_NOTIFIER)
>>>>> +#if defined(CONFIG_HMM)
>>>>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>>>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                     ` <de28cee0-3461-4f99-eeae-b793de00ca58-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-15  7:46                       ` Christian König
       [not found]                         ` <e4cf7212-4340-8639-c8c1-057e4d1083f0-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-09-15  7:46 UTC (permalink / raw)
  To: Felix Kuehling, Philip Yang,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Jerome Glisse

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking and
>>>>> unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has invalidate_start
>>> callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to update
>>> CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the re-validation of
> userptr BOs (currently calling get_user_pages) force synchronization
> with the ongoing page table invalidation through the mmap_sem or other
> MM locks?

No and yes. We don't hold any other locks while doing command 
submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are 
certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                         ` <e4cf7212-4340-8639-c8c1-057e4d1083f0-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-27  6:53                           ` Kuehling, Felix
       [not found]                             ` <DM5PR12MB17078469EB6D3AF1D53B788992140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kuehling, Felix @ 2018-09-27  6:53 UTC (permalink / raw)
  To: Koenig, Christian, Yang, Philip,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Jerome Glisse

I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>; Yang, Philip <Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org; Jerome Glisse <j.glisse@gmail.com>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple 
>>>>> overlapping or nested invalidation ranges. But if you'r locking 
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which 
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has 
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to 
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because 
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page 
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the 
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the 
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user 
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue 
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user 
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue 
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates 
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding 
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the 
> re-validation of userptr BOs (currently calling get_user_pages) force 
> synchronization with the ongoing page table invalidation through the 
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                             ` <DM5PR12MB17078469EB6D3AF1D53B788992140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-27  6:59                               ` Koenig, Christian
       [not found]                                 ` <a76b71ac-4b5b-45d7-b48b-6d0e4a7e7524-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Koenig, Christian @ 2018-09-27  6:59 UTC (permalink / raw)
  To: Kuehling, Felix
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 4751 bytes --]

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>; Yang, Philip <Philip.Yang@amd.com>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 6876 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                 ` <a76b71ac-4b5b-45d7-b48b-6d0e4a7e7524-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2018-09-27  8:47                                   ` Kuehling, Felix
       [not found]                                     ` <DM5PR12MB1707D5E46617B2936F800F1992140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kuehling, Felix @ 2018-09-27  8:47 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 6861 bytes --]

So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 12377 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                     ` <DM5PR12MB1707D5E46617B2936F800F1992140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-27  9:27                                       ` Christian König
       [not found]                                         ` <d752c19b-6d2e-c7c1-1cd7-651e25b8f708-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-09-27  9:27 UTC (permalink / raw)
  To: Kuehling, Felix
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 8861 bytes --]

That is correct, but take a look what we do when after calling the 
amdgpu_mn_read_lock():

>         /* No memory allocation is allowed while holding the mn lock */
>         amdgpu_mn_lock(p->mn);
>         amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>                 struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>
>                 if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>                         r = -ERESTARTSYS;
>                         goto error_abort;
>                 }
>         }

We double check that there wasn't any page table modification while we 
prepared the submission and restart the whole process when there 
actually was some update.

The reason why we need to do this is here:
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>         amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release 
the lock so that any invalidation will now block on our command 
submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if 
there was any update to the page tables.

The issue with that approach is that adding a fence can't be made 
undone, so if we find that there actually was an update to the page 
tables we would need to somehow turn the CS into a dummy (e.g. overwrite 
all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>
> So back to my previous question:
>
> >> But do we really need another lock for this? Wouldn't the
>
> >> re-validation of userptr BOs (currently calling get_user_pages) force
>
> >> synchronization with the ongoing page table invalidation through the
>
> >> mmap_sem or other MM locks?
>
> >
>
> > No and yes. We don't hold any other locks while doing command 
> submission, but I expect that HMM has its own mechanism to prevent that.
>
> >
>
> > Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are 
> certainly not using this mechanism correctly.
>
> The existing amdgpu_mn_lock/unlock should block the MMU notifier while 
> a command submission is in progress. It should also block command 
> submission while an MMU notifier is in progress.
>
> What we lose with HMM is the ability to hold a read-lock for the 
> entire duration of the invalidate_range_start until 
> invalidate_range_end. As I understand it, that lock is meant to 
> prevent new command submissions while the page tables are being 
> updated by the kernel. But my point is, that get_user_pages or 
> hmm_vma_fault should do the same kind of thing. Before the command 
> submission can go ahead, it needs to update the userptr addresses. If 
> the page tables are still being updated, it will block there even 
> without holding the amdgpu_mn_read_lock.
>
> Regards,
>
>   Felix
>
> *From:* Koenig, Christian
> *Sent:* Thursday, September 27, 2018 3:00 AM
> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
> mmu notifier v4
>
> No, that won't work. We would still run into lock inversion problems.
>
> What we could do with the scheduler is to turn submissions into 
> dummies if we find that the page tables are now outdated.
>
> But that would be really hacky and I'm not sure if that would really 
> work in all cases.
>
> Christian.
>
> Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>
> I had a chat with Jerome yesterday. He pointed out that the new 
> blockable parameter can be used to infer whether the MMU notifier is 
> being called  in a reclaim operation. So if blockable==true, it should 
> even be safe to take the BO reservation lock without problems. I think 
> with that we should be able to remove the read-write locking 
> completely and go back to locking (or try-locking for 
> blockable==false) the reservation locks in the MMU notifier?
>
> Regards,
>   Felix
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
> <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On Behalf Of Christian 
> König
> Sent: Saturday, September 15, 2018 3:47 AM
> To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
> Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
> mmu notifier v4
>
> Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> > On 2018-09-14 01:52 PM, Christian König wrote:
> >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
> >>> On 2018-09-14 03:51 AM, Christian König wrote:
> >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
> >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
> >>>>> [SNIP]
> >>>>>>    +    amdgpu_mn_read_unlock(amn);
> >>>>>> +
> >>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
> >>>>> overlapping or nested invalidation ranges. But if you'r locking
> >>>>> and unlocking in the same function. Is that still a concern?
> >>> I don't understand the possible recursive case, but
> >>> amdgpu_mn_read_lock() still support recursive locking.
> >>>> Well the real problem is that unlocking them here won't work.
> >>>>
> >>>> We need to hold the lock until we are sure that the operation which
> >>>> updates the page tables is completed.
> >>>>
> >>> The reason for this change is because hmm mirror has
> >>> invalidate_start callback, no invalidate_end callback
> >>>
> >>> Check mmu_notifier.c and hmm.c again, below is entire logic to
> >>> update CPU page tables and callback:
> >>>
> >>> mn lock amn->lock is used to protect interval tree access because
> >>> user may submit/register new userptr anytime.
> >>> This is same for old and new way.
> >>>
> >>> step 2 guarantee the GPU operation is done before updating CPU page
> >>> table.
> >>>
> >>> So I think the change is safe. We don't need hold mn lock until the
> >>> CPU page tables update is completed.
> >> No, that isn't even remotely correct. The lock doesn't protects the
> >> interval tree.
> >>
> >>> Old:
> >>>     1. down_read_non_owner(&amn->lock)
> >>>     2. loop to handle BOs from node->bos through interval tree
> >>> amn->object nodes
> >>>         gfx: wait for pending BOs fence operation done, mark user
> >>> pages dirty
> >>>         kfd: evict user queues of the process, wait for queue
> >>> unmap/map operation done
> >>>     3. update CPU page tables
> >>>     4. up_read(&amn->lock)
> >>>
> >>> New, switch step 3 and 4
> >>>     1. down_read_non_owner(&amn->lock)
> >>>     2. loop to handle BOs from node->bos through interval tree
> >>> amn->object nodes
> >>>         gfx: wait for pending BOs fence operation done, mark user
> >>> pages dirty
> >>>         kfd: evict user queues of the process, wait for queue
> >>> unmap/map operation done
> >>>     3. up_read(&amn->lock)
> >>>     4. update CPU page tables
> >> The lock is there to make sure that we serialize page table updates
> >> with command submission.
> > As I understand it, the idea is to prevent command submission (adding
> > new fences to BOs) while a page table invalidation is in progress.
>
> Yes, exactly.
>
> > But do we really need another lock for this? Wouldn't the
> > re-validation of userptr BOs (currently calling get_user_pages) force
> > synchronization with the ongoing page table invalidation through the
> > mmap_sem or other MM locks?
>
> No and yes. We don't hold any other locks while doing command 
> submission, but I expect that HMM has its own mechanism to prevent that.
>
> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are 
> certainly not using this mechanism correctly.
>
> Regards,
> Christian.
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>


[-- Attachment #1.2: Type: text/html, Size: 17500 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                         ` <d752c19b-6d2e-c7c1-1cd7-651e25b8f708-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-27 11:08                                           ` Kuehling, Felix
       [not found]                                             ` <DM5PR12MB17077A78E0F95BFBA57F2E1A92140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kuehling, Felix @ 2018-09-27 11:08 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 9183 bytes --]

> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

I don’t see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org; Jerome Glisse <j.glisse@gmail.com>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():


        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:

        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com><mailto:j.glisse@gmail.com>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>; Yang, Philip <Philip.Yang@amd.com<mailto:Philip.Yang@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com<mailto:j.glisse@gmail.com>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 17441 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                             ` <DM5PR12MB17077A78E0F95BFBA57F2E1A92140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-27 11:23                                               ` Christian König
       [not found]                                                 ` <425fe859-c780-48a5-a2c6-c3bf2b9abb38-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-09-27 11:23 UTC (permalink / raw)
  To: Kuehling, Felix
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 11583 bytes --]

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>
> > We double check that there wasn't any page table modification while 
> we prepared the submission and restart the whole process when there 
> actually was some update.
> >
> > The reason why we need to do this is here:
> >
>
> > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> >        amdgpu_mn_unlock(p->mn);
>
> >
> > Only after the new fence is added to the buffer object we can 
> release the lock so that any invalidation will now block on our 
> command submission to finish before it modifies the page table.
>
> I don’t see why this requires holding the read-lock until 
> invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while 
> the mn read-lock is held in invalidate_range_start notifier.
>

That's not related to amdgpu_ttm_tt_affect_userptr(), this function 
could actually be called outside the lock.

The problem is here:
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment 
between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the 
fence to the reservation object.

Regards,
Christian.


> Regards,
>
>   Felix
>
> *From:*Koenig, Christian
> *Sent:* Thursday, September 27, 2018 5:27 AM
> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
> mmu notifier v4
>
> That is correct, but take a look what we do when after calling the 
> amdgpu_mn_read_lock():
>
>
>             /* No memory allocation is allowed while holding the mn
>     lock */
>             amdgpu_mn_lock(p->mn);
>             amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>                     struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>
>                     if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>                             r = -ERESTARTSYS;
>                             goto error_abort;
>                     }
>             }
>
>
> We double check that there wasn't any page table modification while we 
> prepared the submission and restart the whole process when there 
> actually was some update.
>
> The reason why we need to do this is here:
>
>     ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>             amdgpu_mn_unlock(p->mn);
>
>
> Only after the new fence is added to the buffer object we can release 
> the lock so that any invalidation will now block on our command 
> submission to finish before it modifies the page table.
>
> The only other option would be to add the fence first and then check 
> if there was any update to the page tables.
>
> The issue with that approach is that adding a fence can't be made 
> undone, so if we find that there actually was an update to the page 
> tables we would need to somehow turn the CS into a dummy (e.g. 
> overwrite all IBs with NOPs or something like that) and still submit it.
>
> Not sure if that is actually possible.
>
> Regards,
> Christian.
>
> Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>
>     So back to my previous question:
>
>     >> But do we really need another lock for this? Wouldn't the
>
>     >> re-validation of userptr BOs (currently calling get_user_pages)
>     force
>
>     >> synchronization with the ongoing page table invalidation
>     through the
>
>     >> mmap_sem or other MM locks?
>
>     >
>
>     > No and yes. We don't hold any other locks while doing command
>     submission, but I expect that HMM has its own mechanism to prevent
>     that.
>
>     >
>
>     > Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are
>     certainly not using this mechanism correctly.
>
>     The existing amdgpu_mn_lock/unlock should block the MMU notifier
>     while a command submission is in progress. It should also block
>     command submission while an MMU notifier is in progress.
>
>     What we lose with HMM is the ability to hold a read-lock for the
>     entire duration of the invalidate_range_start until
>     invalidate_range_end. As I understand it, that lock is meant to
>     prevent new command submissions while the page tables are being
>     updated by the kernel. But my point is, that get_user_pages or
>     hmm_vma_fault should do the same kind of thing. Before the command
>     submission can go ahead, it needs to update the userptr addresses.
>     If the page tables are still being updated, it will block there
>     even without holding the amdgpu_mn_read_lock.
>
>     Regards,
>
>       Felix
>
>     *From:* Koenig, Christian
>     *Sent:* Thursday, September 27, 2018 3:00 AM
>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>     *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to
>     replace mmu notifier v4
>
>     No, that won't work. We would still run into lock inversion problems.
>
>     What we could do with the scheduler is to turn submissions into
>     dummies if we find that the page tables are now outdated.
>
>     But that would be really hacky and I'm not sure if that would
>     really work in all cases.
>
>     Christian.
>
>     Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>     <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>
>     I had a chat with Jerome yesterday. He pointed out that the new
>     blockable parameter can be used to infer whether the MMU notifier
>     is being called  in a reclaim operation. So if blockable==true, it
>     should even be safe to take the BO reservation lock without
>     problems. I think with that we should be able to remove the
>     read-write locking completely and go back to locking (or
>     try-locking for blockable==false) the reservation locks in the MMU
>     notifier?
>
>     Regards,
>       Felix
>
>     -----Original Message-----
>     From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On Behalf Of
>     Christian König
>     Sent: Saturday, September 15, 2018 3:47 AM
>     To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>     <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>     amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>     Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>     replace mmu notifier v4
>
>     Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>     > On 2018-09-14 01:52 PM, Christian König wrote:
>     >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>     >>> On 2018-09-14 03:51 AM, Christian König wrote:
>     >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>     >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>     >>>>> [SNIP]
>     >>>>>>    + amdgpu_mn_read_unlock(amn);
>     >>>>>> +
>     >>>>> amdgpu_mn_read_lock/unlock support recursive locking for
>     multiple
>     >>>>> overlapping or nested invalidation ranges. But if you'r locking
>     >>>>> and unlocking in the same function. Is that still a concern?
>     >>> I don't understand the possible recursive case, but
>     >>> amdgpu_mn_read_lock() still support recursive locking.
>     >>>> Well the real problem is that unlocking them here won't work.
>     >>>>
>     >>>> We need to hold the lock until we are sure that the operation
>     which
>     >>>> updates the page tables is completed.
>     >>>>
>     >>> The reason for this change is because hmm mirror has
>     >>> invalidate_start callback, no invalidate_end callback
>     >>>
>     >>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>     >>> update CPU page tables and callback:
>     >>>
>     >>> mn lock amn->lock is used to protect interval tree access because
>     >>> user may submit/register new userptr anytime.
>     >>> This is same for old and new way.
>     >>>
>     >>> step 2 guarantee the GPU operation is done before updating CPU
>     page
>     >>> table.
>     >>>
>     >>> So I think the change is safe. We don't need hold mn lock
>     until the
>     >>> CPU page tables update is completed.
>     >> No, that isn't even remotely correct. The lock doesn't protects
>     the
>     >> interval tree.
>     >>
>     >>> Old:
>     >>>     1. down_read_non_owner(&amn->lock)
>     >>>     2. loop to handle BOs from node->bos through interval tree
>     >>> amn->object nodes
>     >>>         gfx: wait for pending BOs fence operation done, mark user
>     >>> pages dirty
>     >>>         kfd: evict user queues of the process, wait for queue
>     >>> unmap/map operation done
>     >>>     3. update CPU page tables
>     >>>     4. up_read(&amn->lock)
>     >>>
>     >>> New, switch step 3 and 4
>     >>>     1. down_read_non_owner(&amn->lock)
>     >>>     2. loop to handle BOs from node->bos through interval tree
>     >>> amn->object nodes
>     >>>         gfx: wait for pending BOs fence operation done, mark user
>     >>> pages dirty
>     >>>         kfd: evict user queues of the process, wait for queue
>     >>> unmap/map operation done
>     >>>     3. up_read(&amn->lock)
>     >>>     4. update CPU page tables
>     >> The lock is there to make sure that we serialize page table
>     updates
>     >> with command submission.
>     > As I understand it, the idea is to prevent command submission
>     (adding
>     > new fences to BOs) while a page table invalidation is in progress.
>
>     Yes, exactly.
>
>     > But do we really need another lock for this? Wouldn't the
>     > re-validation of userptr BOs (currently calling get_user_pages)
>     force
>     > synchronization with the ongoing page table invalidation through
>     the
>     > mmap_sem or other MM locks?
>
>     No and yes. We don't hold any other locks while doing command
>     submission, but I expect that HMM has its own mechanism to prevent
>     that.
>
>     Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are
>     certainly not using this mechanism correctly.
>
>     Regards,
>     Christian.
>     _______________________________________________
>     amd-gfx mailing list
>     amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>     https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>


[-- Attachment #1.2: Type: text/html, Size: 23316 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                 ` <425fe859-c780-48a5-a2c6-c3bf2b9abb38-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-27 13:18                                                   ` Kuehling, Felix
       [not found]                                                     ` <DM5PR12MB17072AE77EB0DE3AD22B8D7892140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kuehling, Felix @ 2018-09-27 13:18 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 10697 bytes --]

> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

I’m not planning to change that. I don’t think there is any need to change it.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org; Jerome Glisse <j.glisse@gmail.com>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.


I don’t see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:

ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.




Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com><mailto:j.glisse@gmail.com>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():



        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:


        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com><mailto:j.glisse@gmail.com>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>; Yang, Philip <Philip.Yang@amd.com<mailto:Philip.Yang@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com<mailto:j.glisse@gmail.com>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 20677 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                     ` <DM5PR12MB17072AE77EB0DE3AD22B8D7892140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-27 13:21                                                       ` Christian König
       [not found]                                                         ` <9b427976-f2ff-8ba1-6ebf-588ca95aef80-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-09-27 13:21 UTC (permalink / raw)
  To: Kuehling, Felix
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 14005 bytes --]

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
>
> > The problem is here:
> >
>
> > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>
> > amdgpu_mn_unlock(p->mn);
>
> >
> > We need to hold the lock until the fence is added to the reservation 
> object.
> >
> > Otherwise somebody could have changed the page tables just in the 
> moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and 
> adding the fence to the reservation object.
>
> I’m not planning to change that. I don’t think there is any need to 
> change it.
>

Yeah, but when HMM doesn't provide both the start and the end hock of 
the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.

> Regards,
>
>   Felix
>
> *From:*Koenig, Christian
> *Sent:* Thursday, September 27, 2018 7:24 AM
> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
> mmu notifier v4
>
> Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>
>     > We double check that there wasn't any page table modification
>     while we prepared the submission and restart the whole process
>     when there actually was some update.
>     >
>     > The reason why we need to do this is here:
>     >
>
>     > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>     >        amdgpu_mn_unlock(p->mn);
>
>     >
>     > Only after the new fence is added to the buffer object we can
>     release the lock so that any invalidation will now block on our
>     command submission to finish before it modifies the page table.
>
>
>     I don’t see why this requires holding the read-lock until
>     invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called
>     while the mn read-lock is held in invalidate_range_start notifier.
>
>
> That's not related to amdgpu_ttm_tt_affect_userptr(), this function 
> could actually be called outside the lock.
>
> The problem is here:
>
>     ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>
>     amdgpu_mn_unlock(p->mn);
>
>
> We need to hold the lock until the fence is added to the reservation 
> object.
>
> Otherwise somebody could have changed the page tables just in the 
> moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and 
> adding the fence to the reservation object.
>
> Regards,
> Christian.
>
>
>
>     Regards,
>
>       Felix
>
>     *From:*Koenig, Christian
>     *Sent:* Thursday, September 27, 2018 5:27 AM
>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>     *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>     replace mmu notifier v4
>
>     That is correct, but take a look what we do when after calling the
>     amdgpu_mn_read_lock():
>
>
>
>                 /* No memory allocation is allowed while holding the
>         mn lock */
>                 amdgpu_mn_lock(p->mn);
>                 amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>                         struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>
>                         if
>         (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>                                 r = -ERESTARTSYS;
>                                 goto error_abort;
>                         }
>                 }
>
>
>     We double check that there wasn't any page table modification
>     while we prepared the submission and restart the whole process
>     when there actually was some update.
>
>     The reason why we need to do this is here:
>
>
>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>                 amdgpu_mn_unlock(p->mn);
>
>
>     Only after the new fence is added to the buffer object we can
>     release the lock so that any invalidation will now block on our
>     command submission to finish before it modifies the page table.
>
>     The only other option would be to add the fence first and then
>     check if there was any update to the page tables.
>
>     The issue with that approach is that adding a fence can't be made
>     undone, so if we find that there actually was an update to the
>     page tables we would need to somehow turn the CS into a dummy
>     (e.g. overwrite all IBs with NOPs or something like that) and
>     still submit it.
>
>     Not sure if that is actually possible.
>
>     Regards,
>     Christian.
>
>     Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>
>         So back to my previous question:
>
>         >> But do we really need another lock for this? Wouldn't the
>
>         >> re-validation of userptr BOs (currently calling
>         get_user_pages) force
>
>         >> synchronization with the ongoing page table invalidation
>         through the
>
>         >> mmap_sem or other MM locks?
>
>         >
>
>         > No and yes. We don't hold any other locks while doing
>         command submission, but I expect that HMM has its own
>         mechanism to prevent that.
>
>         >
>
>         > Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we
>         are certainly not using this mechanism correctly.
>
>         The existing amdgpu_mn_lock/unlock should block the MMU
>         notifier while a command submission is in progress. It should
>         also block command submission while an MMU notifier is in
>         progress.
>
>         What we lose with HMM is the ability to hold a read-lock for
>         the entire duration of the invalidate_range_start until
>         invalidate_range_end. As I understand it, that lock is meant
>         to prevent new command submissions while the page tables are
>         being updated by the kernel. But my point is, that
>         get_user_pages or hmm_vma_fault should do the same kind of
>         thing. Before the command submission can go ahead, it needs to
>         update the userptr addresses. If the page tables are still
>         being updated, it will block there even without holding the
>         amdgpu_mn_read_lock.
>
>         Regards,
>
>           Felix
>
>         *From:* Koenig, Christian
>         *Sent:* Thursday, September 27, 2018 3:00 AM
>         *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>         *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>         <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>         *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to
>         replace mmu notifier v4
>
>         No, that won't work. We would still run into lock inversion
>         problems.
>
>         What we could do with the scheduler is to turn submissions
>         into dummies if we find that the page tables are now outdated.
>
>         But that would be really hacky and I'm not sure if that would
>         really work in all cases.
>
>         Christian.
>
>         Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>         <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>
>         I had a chat with Jerome yesterday. He pointed out that the
>         new blockable parameter can be used to infer whether the MMU
>         notifier is being called in a reclaim operation. So if
>         blockable==true, it should even be safe to take the BO
>         reservation lock without problems. I think with that we should
>         be able to remove the read-write locking completely and go
>         back to locking (or try-locking for blockable==false) the
>         reservation locks in the MMU notifier?
>
>         Regards,
>           Felix
>
>         -----Original Message-----
>         From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>         <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On Behalf Of
>         Christian König
>         Sent: Saturday, September 15, 2018 3:47 AM
>         To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>         <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>         amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>         Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>         replace mmu notifier v4
>
>         Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>         > On 2018-09-14 01:52 PM, Christian König wrote:
>         >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>         >>> On 2018-09-14 03:51 AM, Christian König wrote:
>         >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>         >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>         >>>>> [SNIP]
>         >>>>>>    + amdgpu_mn_read_unlock(amn);
>         >>>>>> +
>         >>>>> amdgpu_mn_read_lock/unlock support recursive locking for
>         multiple
>         >>>>> overlapping or nested invalidation ranges. But if you'r
>         locking
>         >>>>> and unlocking in the same function. Is that still a concern?
>         >>> I don't understand the possible recursive case, but
>         >>> amdgpu_mn_read_lock() still support recursive locking.
>         >>>> Well the real problem is that unlocking them here won't work.
>         >>>>
>         >>>> We need to hold the lock until we are sure that the
>         operation which
>         >>>> updates the page tables is completed.
>         >>>>
>         >>> The reason for this change is because hmm mirror has
>         >>> invalidate_start callback, no invalidate_end callback
>         >>>
>         >>> Check mmu_notifier.c and hmm.c again, below is entire
>         logic to
>         >>> update CPU page tables and callback:
>         >>>
>         >>> mn lock amn->lock is used to protect interval tree access
>         because
>         >>> user may submit/register new userptr anytime.
>         >>> This is same for old and new way.
>         >>>
>         >>> step 2 guarantee the GPU operation is done before updating
>         CPU page
>         >>> table.
>         >>>
>         >>> So I think the change is safe. We don't need hold mn lock
>         until the
>         >>> CPU page tables update is completed.
>         >> No, that isn't even remotely correct. The lock doesn't
>         protects the
>         >> interval tree.
>         >>
>         >>> Old:
>         >>>     1. down_read_non_owner(&amn->lock)
>         >>>     2. loop to handle BOs from node->bos through interval tree
>         >>> amn->object nodes
>         >>>         gfx: wait for pending BOs fence operation done,
>         mark user
>         >>> pages dirty
>         >>>         kfd: evict user queues of the process, wait for queue
>         >>> unmap/map operation done
>         >>>     3. update CPU page tables
>         >>>     4. up_read(&amn->lock)
>         >>>
>         >>> New, switch step 3 and 4
>         >>>     1. down_read_non_owner(&amn->lock)
>         >>>     2. loop to handle BOs from node->bos through interval tree
>         >>> amn->object nodes
>         >>>         gfx: wait for pending BOs fence operation done,
>         mark user
>         >>> pages dirty
>         >>>         kfd: evict user queues of the process, wait for queue
>         >>> unmap/map operation done
>         >>>     3. up_read(&amn->lock)
>         >>>     4. update CPU page tables
>         >> The lock is there to make sure that we serialize page table
>         updates
>         >> with command submission.
>         > As I understand it, the idea is to prevent command
>         submission (adding
>         > new fences to BOs) while a page table invalidation is in
>         progress.
>
>         Yes, exactly.
>
>         > But do we really need another lock for this? Wouldn't the
>         > re-validation of userptr BOs (currently calling
>         get_user_pages) force
>         > synchronization with the ongoing page table invalidation
>         through the
>         > mmap_sem or other MM locks?
>
>         No and yes. We don't hold any other locks while doing command
>         submission, but I expect that HMM has its own mechanism to
>         prevent that.
>
>         Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we
>         are certainly not using this mechanism correctly.
>
>         Regards,
>         Christian.
>         _______________________________________________
>         amd-gfx mailing list
>         amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>         https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>


[-- Attachment #1.2: Type: text/html, Size: 27992 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                         ` <9b427976-f2ff-8ba1-6ebf-588ca95aef80-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-27 13:41                                                           ` Kuehling, Felix
       [not found]                                                             ` <DM5PR12MB17072879EC907150027D6F7892140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kuehling, Felix @ 2018-09-27 13:41 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 11837 bytes --]

> I’m not planning to change that. I don’t think there is any need to change it.
>
> Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.

My whole argument is that you don’t need to hold the read lock until the invalidate_range_end. Just read_lock and read_unlock in the invalidate_range_start function.

Regards,
  Felix


From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:22 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org; Jerome Glisse <j.glisse@gmail.com>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.


I’m not planning to change that. I don’t think there is any need to change it.

Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.



Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com><mailto:j.glisse@gmail.com>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.



I don’t see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:


ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.





Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com><mailto:j.glisse@gmail.com>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():




        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:



        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com><mailto:Felix.Kuehling@amd.com>
Cc: Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang@amd.com>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com><mailto:j.glisse@gmail.com>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com<mailto:Felix.Kuehling@amd.com>>; Yang, Philip <Philip.Yang@amd.com<mailto:Philip.Yang@amd.com>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>; Jerome Glisse <j.glisse@gmail.com<mailto:j.glisse@gmail.com>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[-- Attachment #1.2: Type: text/html, Size: 23069 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                             ` <DM5PR12MB17072879EC907150027D6F7892140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-27 13:58                                                               ` Koenig, Christian
       [not found]                                                                 ` <58199419-e20f-4ab0-ac1d-a7eb79f5c6f7-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Koenig, Christian @ 2018-09-27 13:58 UTC (permalink / raw)
  To: Kuehling, Felix
  Cc: Jerome Glisse, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 13003 bytes --]

Yeah I understand that, but again that won't work.

In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

Christian.

Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>:
> I’m not planning to change that. I don’t think there is any need to change it.
>
> Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.

My whole argument is that you don’t need to hold the read lock until the invalidate_range_end. Just read_lock and read_unlock in the invalidate_range_start function.

Regards,
  Felix


From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:22 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.


I’m not planning to change that. I don’t think there is any need to change it.

Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.



Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.



I don’t see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:


ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.





Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():




        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:



        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[-- Attachment #1.2: Type: text/html, Size: 22652 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                 ` <58199419-e20f-4ab0-ac1d-a7eb79f5c6f7-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2018-09-27 14:27                                                                   ` Kuehling, Felix
       [not found]                                                                     ` <DM5PR12MB1707273AC4B0C03A3BEDF73092140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kuehling, Felix @ 2018-09-27 14:27 UTC (permalink / raw)
  To: Koenig, Christian, Jerome Glisse
  Cc: Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 14484 bytes --]

> In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

What's the sequence of events you have in mind? Something like this?


  *   Page table is updated and triggers MMU notifier
  *   amdgpu MMU notifier runs and waits for pending CS to finish while holding the read lock
  *   New CS starts just after invalidate_range_start MMU notifier finishes but before the page table update is done
  *   get_user_pages returns outdated physical addresses

I hope that's not actually possible and that get_user_pages or hmm_vma_fault would block until the page table update is done. That is, invalidate_range_start marks the start of a page table update, and while that update is in progress, get_user_pages or hmm_vma_fault block. Jerome, can you comment on that?

Thanks,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:59 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Yeah I understand that, but again that won't work.

In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

Christian.

Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> I'm not planning to change that. I don't think there is any need to change it.
>
> Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.

My whole argument is that you don't need to hold the read lock until the invalidate_range_end. Just read_lock and read_unlock in the invalidate_range_start function.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:22 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

I'm not planning to change that. I don't think there is any need to change it.

Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.


Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.


I don't see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:

ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.




Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():



        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:


        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[-- Attachment #1.2: Type: text/html, Size: 31136 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                     ` <DM5PR12MB1707273AC4B0C03A3BEDF73092140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-27 14:29                                                                       ` Koenig, Christian
       [not found]                                                                         ` <1068c389-56fc-4125-ac40-b1ef2d60eabd-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Koenig, Christian @ 2018-09-27 14:29 UTC (permalink / raw)
  To: Kuehling, Felix
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w, Yang, Philip,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 14709 bytes --]

At least with get_user_pages() that is perfectly possible.

For HMM it could be that this is prevented somehow.

Christian.

Am 27.09.2018 16:27 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>:
> In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

What’s the sequence of events you have in mind? Something like this?


  *   Page table is updated and triggers MMU notifier
  *   amdgpu MMU notifier runs and waits for pending CS to finish while holding the read lock
  *   New CS starts just after invalidate_range_start MMU notifier finishes but before the page table update is done
  *   get_user_pages returns outdated physical addresses

I hope that’s not actually possible and that get_user_pages or hmm_vma_fault would block until the page table update is done. That is, invalidate_range_start marks the start of a page table update, and while that update is in progress, get_user_pages or hmm_vma_fault block. Jerome, can you comment on that?

Thanks,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:59 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Yeah I understand that, but again that won't work.

In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

Christian.

Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> I’m not planning to change that. I don’t think there is any need to change it.
>
> Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.

My whole argument is that you don’t need to hold the read lock until the invalidate_range_end. Just read_lock and read_unlock in the invalidate_range_start function.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:22 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

I’m not planning to change that. I don’t think there is any need to change it.

Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.


Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.


I don’t see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:

ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.




Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():



        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:


        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[-- Attachment #1.2: Type: text/html, Size: 26976 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                         ` <1068c389-56fc-4125-ac40-b1ef2d60eabd-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2018-09-27 14:50                                                                           ` Kuehling, Felix
       [not found]                                                                             ` <DM5PR12MB1707BCD7BFC10EDD594FE90B92140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Kuehling, Felix @ 2018-09-27 14:50 UTC (permalink / raw)
  To: Koenig, Christian, Yang, Philip
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 15463 bytes --]

I think the answer is here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 10:30 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

At least with get_user_pages() that is perfectly possible.

For HMM it could be that this is prevented somehow.

Christian.

Am 27.09.2018 16:27 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

What's the sequence of events you have in mind? Something like this?


  *   Page table is updated and triggers MMU notifier
  *   amdgpu MMU notifier runs and waits for pending CS to finish while holding the read lock
  *   New CS starts just after invalidate_range_start MMU notifier finishes but before the page table update is done
  *   get_user_pages returns outdated physical addresses

I hope that's not actually possible and that get_user_pages or hmm_vma_fault would block until the page table update is done. That is, invalidate_range_start marks the start of a page table update, and while that update is in progress, get_user_pages or hmm_vma_fault block. Jerome, can you comment on that?

Thanks,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:59 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Yeah I understand that, but again that won't work.

In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

Christian.

Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> I'm not planning to change that. I don't think there is any need to change it.
>
> Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.

My whole argument is that you don't need to hold the read lock until the invalidate_range_end. Just read_lock and read_unlock in the invalidate_range_start function.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:22 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.
I'm not planning to change that. I don't think there is any need to change it.

Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

I don't see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.



Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():


        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:

        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[-- Attachment #1.2: Type: text/html, Size: 34048 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                             ` <DM5PR12MB1707BCD7BFC10EDD594FE90B92140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-27 15:36                                                                               ` Christian König
       [not found]                                                                                 ` <11ba3857-9bb0-648e-2806-0533090d9a0a-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-09-27 15:36 UTC (permalink / raw)
  To: Kuehling, Felix, Yang, Philip
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 19660 bytes --]

Yeah, I've read that as well.

My best guess is that we just need to add a call to hmm_vma_range_done() 
after taking the lock and also replace get_user_pages() with 
hmm_vma_get_pfns().

But I'm still not 100% sure how all of that is supposed to work together.

Regards,
Christian.

Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
>
> I think the answer is here: 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216
>
> Regards,
>
>   Felix
>
> *From:*Koenig, Christian
> *Sent:* Thursday, September 27, 2018 10:30 AM
> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> *Cc:* j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
> mmu notifier v4
>
> At least with get_user_pages() that is perfectly possible.
>
> For HMM it could be that this is prevented somehow.
>
> Christian.
>
> Am 27.09.2018 16:27 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>
> > In this case you can end up accessing pages which are invalidated while 
> get_user_pages is in process.
>
> What’s the sequence of events you have in mind? Something like this?
>
>   * Page table is updated and triggers MMU notifier
>   * amdgpu MMU notifier runs and waits for pending CS to finish while
>     holding the read lock
>   * New CS starts just after invalidate_range_start MMU notifier
>     finishes but before the page table update is done
>   * get_user_pages returns outdated physical addresses
>
> I hope that’s not actually possible and that get_user_pages or 
> hmm_vma_fault would block until the page table update is done. That 
> is, invalidate_range_start marks the start of a page table update, and 
> while that update is in progress, get_user_pages or hmm_vma_fault 
> block. Jerome, can you comment on that?
>
> Thanks,
>   Felix
>
> *From:*Koenig, Christian
> *Sent:* Thursday, September 27, 2018 9:59 AM
> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; 
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; 
> Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
> mmu notifier v4
>
> Yeah I understand that, but again that won't work.
>
> In this case you can end up accessing pages which are invalidated 
> while get_user_pages is in process.
>
> Christian.
>
> Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>
> > I’m not planning to change that. I don’t think there is any need to 
> change it.
>
> >
> > Yeah, but when HMM doesn't provide both the start and the end hock 
> of the invalidation this way won't work any more.
> >
> > So we need to find a solution for this,
> > Christian.
>
> My whole argument is that you don’t need to hold the read lock until 
> the invalidate_range_end. Just read_lock and read_unlock in the 
> invalidate_range_start function.
>
> Regards,
>
> Felix
>
> *From:*Koenig, Christian
> *Sent:* Thursday, September 27, 2018 9:22 AM
> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; 
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; 
> Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
> mmu notifier v4
>
> Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
>
>     > The problem is here:
>     >
>
>     > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>
>     > amdgpu_mn_unlock(p->mn);
>
>     >
>     > We need to hold the lock until the fence is added to the
>     reservation object.
>     >
>     > Otherwise somebody could have changed the page tables just in
>     the moment between the check of
>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the
>     reservation object.
>
>     I’m not planning to change that. I don’t think there is any need
>     to change it.
>
>
> Yeah, but when HMM doesn't provide both the start and the end hock of 
> the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.
>
>     Regards,
>
>     Felix
>
>     *From:*Koenig, Christian
>     *Sent:* Thursday, September 27, 2018 7:24 AM
>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>     *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>     replace mmu notifier v4
>
>     Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>
>         > We double check that there wasn't any page table
>         modification while we prepared the submission and restart the
>         whole process when there actually was some update.
>         >
>         > The reason why we need to do this is here:
>         >
>
>         > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>         p->fence);
>         >        amdgpu_mn_unlock(p->mn);
>
>         >
>         > Only after the new fence is added to the buffer object we
>         can release the lock so that any invalidation will now block
>         on our command submission to finish before it modifies the
>         page table.
>
>         I don’t see why this requires holding the read-lock until
>         invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called
>         while the mn read-lock is held in invalidate_range_start notifier.
>
>
>     That's not related to amdgpu_ttm_tt_affect_userptr(), this
>     function could actually be called outside the lock.
>
>     The problem is here:
>
>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>
>         amdgpu_mn_unlock(p->mn);
>
>
>     We need to hold the lock until the fence is added to the
>     reservation object.
>
>     Otherwise somebody could have changed the page tables just in the
>     moment between the check of amdgpu_ttm_tt_userptr_needs_pages()
>     and adding the fence to the reservation object.
>
>     Regards,
>     Christian.
>
>
>         Regards,
>
>         Felix
>
>         *From:*Koenig, Christian
>         *Sent:* Thursday, September 27, 2018 5:27 AM
>         *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>         *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>         <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>         *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>         replace mmu notifier v4
>
>         That is correct, but take a look what we do when after calling
>         the amdgpu_mn_read_lock():
>
>
>                     /* No memory allocation is allowed while holding
>             the mn lock */
>                     amdgpu_mn_lock(p->mn);
>             amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>                             struct amdgpu_bo *bo =
>             ttm_to_amdgpu_bo(e->tv.bo);
>
>                             if
>             (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>                                     r = -ERESTARTSYS;
>                                     goto error_abort;
>                             }
>                     }
>
>
>         We double check that there wasn't any page table modification
>         while we prepared the submission and restart the whole process
>         when there actually was some update.
>
>         The reason why we need to do this is here:
>
>             ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>             p->fence);
>                     amdgpu_mn_unlock(p->mn);
>
>
>         Only after the new fence is added to the buffer object we can
>         release the lock so that any invalidation will now block on
>         our command submission to finish before it modifies the page
>         table.
>
>         The only other option would be to add the fence first and then
>         check if there was any update to the page tables.
>
>         The issue with that approach is that adding a fence can't be
>         made undone, so if we find that there actually was an update
>         to the page tables we would need to somehow turn the CS into a
>         dummy (e.g. overwrite all IBs with NOPs or something like
>         that) and still submit it.
>
>         Not sure if that is actually possible.
>
>         Regards,
>         Christian.
>
>         Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>
>             So back to my previous question:
>
>             >> But do we really need another lock for this? Wouldn't the
>
>             >> re-validation of userptr BOs (currently calling
>             get_user_pages) force
>
>             >> synchronization with the ongoing page table
>             invalidation through the
>
>             >> mmap_sem or other MM locks?
>
>             >
>
>             > No and yes. We don't hold any other locks while doing
>             command submission, but I expect that HMM has its own
>             mechanism to prevent that.
>
>             >
>
>             > Since we don't modify
>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not
>             using this mechanism correctly.
>
>             The existing amdgpu_mn_lock/unlock should block the MMU
>             notifier while a command submission is in progress. It
>             should also block command submission while an MMU notifier
>             is in progress.
>
>             What we lose with HMM is the ability to hold a read-lock
>             for the entire duration of the invalidate_range_start
>             until invalidate_range_end. As I understand it, that lock
>             is meant to prevent new command submissions while the page
>             tables are being updated by the kernel. But my point is,
>             that get_user_pages or hmm_vma_fault should do the same
>             kind of thing. Before the command submission can go ahead,
>             it needs to update the userptr addresses. If the page
>             tables are still being updated, it will block there even
>             without holding the amdgpu_mn_read_lock.
>
>             Regards,
>
>               Felix
>
>             *From:* Koenig, Christian
>             *Sent:* Thursday, September 27, 2018 3:00 AM
>             *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>             *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>             <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>             *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback
>             to replace mmu notifier v4
>
>             No, that won't work. We would still run into lock
>             inversion problems.
>
>             What we could do with the scheduler is to turn submissions
>             into dummies if we find that the page tables are now outdated.
>
>             But that would be really hacky and I'm not sure if that
>             would really work in all cases.
>
>             Christian.
>
>             Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>             <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>
>             I had a chat with Jerome yesterday. He pointed out that
>             the new blockable parameter can be used to infer whether
>             the MMU notifier is being called  in a reclaim operation.
>             So if blockable==true, it should even be safe to take the
>             BO reservation lock without problems. I think with that we
>             should be able to remove the read-write locking completely
>             and go back to locking (or try-locking for
>             blockable==false) the reservation locks in the MMU notifier?
>
>             Regards,
>               Felix
>
>             -----Original Message-----
>             From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>             <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On Behalf
>             Of Christian König
>             Sent: Saturday, September 15, 2018 3:47 AM
>             To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>             <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>             Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback
>             to replace mmu notifier v4
>
>             Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>             > On 2018-09-14 01:52 PM, Christian König wrote:
>             >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>             >>> On 2018-09-14 03:51 AM, Christian König wrote:
>             >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>             >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>             >>>>> [SNIP]
>             >>>>>>    + amdgpu_mn_read_unlock(amn);
>             >>>>>> +
>             >>>>> amdgpu_mn_read_lock/unlock support recursive locking
>             for multiple
>             >>>>> overlapping or nested invalidation ranges. But if
>             you'r locking
>             >>>>> and unlocking in the same function. Is that still a
>             concern?
>             >>> I don't understand the possible recursive case, but
>             >>> amdgpu_mn_read_lock() still support recursive locking.
>             >>>> Well the real problem is that unlocking them here
>             won't work.
>             >>>>
>             >>>> We need to hold the lock until we are sure that the
>             operation which
>             >>>> updates the page tables is completed.
>             >>>>
>             >>> The reason for this change is because hmm mirror has
>             >>> invalidate_start callback, no invalidate_end callback
>             >>>
>             >>> Check mmu_notifier.c and hmm.c again, below is entire
>             logic to
>             >>> update CPU page tables and callback:
>             >>>
>             >>> mn lock amn->lock is used to protect interval tree
>             access because
>             >>> user may submit/register new userptr anytime.
>             >>> This is same for old and new way.
>             >>>
>             >>> step 2 guarantee the GPU operation is done before
>             updating CPU page
>             >>> table.
>             >>>
>             >>> So I think the change is safe. We don't need hold mn
>             lock until the
>             >>> CPU page tables update is completed.
>             >> No, that isn't even remotely correct. The lock doesn't
>             protects the
>             >> interval tree.
>             >>
>             >>> Old:
>             >>>     1. down_read_non_owner(&amn->lock)
>             >>>     2. loop to handle BOs from node->bos through
>             interval tree
>             >>> amn->object nodes
>             >>>         gfx: wait for pending BOs fence operation
>             done, mark user
>             >>> pages dirty
>             >>>         kfd: evict user queues of the process, wait
>             for queue
>             >>> unmap/map operation done
>             >>>     3. update CPU page tables
>             >>>     4. up_read(&amn->lock)
>             >>>
>             >>> New, switch step 3 and 4
>             >>>     1. down_read_non_owner(&amn->lock)
>             >>>     2. loop to handle BOs from node->bos through
>             interval tree
>             >>> amn->object nodes
>             >>>         gfx: wait for pending BOs fence operation
>             done, mark user
>             >>> pages dirty
>             >>>         kfd: evict user queues of the process, wait
>             for queue
>             >>> unmap/map operation done
>             >>>     3. up_read(&amn->lock)
>             >>>     4. update CPU page tables
>             >> The lock is there to make sure that we serialize page
>             table updates
>             >> with command submission.
>             > As I understand it, the idea is to prevent command
>             submission (adding
>             > new fences to BOs) while a page table invalidation is in
>             progress.
>
>             Yes, exactly.
>
>             > But do we really need another lock for this? Wouldn't the
>             > re-validation of userptr BOs (currently calling
>             get_user_pages) force
>             > synchronization with the ongoing page table invalidation
>             through the
>             > mmap_sem or other MM locks?
>
>             No and yes. We don't hold any other locks while doing
>             command submission, but I expect that HMM has its own
>             mechanism to prevent that.
>
>             Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock()
>             we are certainly not using this mechanism correctly.
>
>             Regards,
>             Christian.
>             _______________________________________________
>             amd-gfx mailing list
>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>             https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>


[-- Attachment #1.2: Type: text/html, Size: 49532 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                                 ` <11ba3857-9bb0-648e-2806-0533090d9a0a-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-27 20:17                                                                                   ` Philip Yang
       [not found]                                                                                     ` <16d1faf6-80a4-dc46-bd2a-9cd475808e98-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Philip Yang @ 2018-09-27 20:17 UTC (permalink / raw)
  To: Christian König, Kuehling, Felix
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 21496 bytes --]

I was trying to understand the way how HMM handle this concurrent issue 
and how we handle it in amdgpu_ttm_tt_userptr_needs_pages() and 
amdgpu_ttm_tt_affect_userptr(). HMM uses range->valid flag, we use 
gtt->mmu_invalidations and gtt->last_set_pages. Both use the same lock 
plus flag idea actually.

Thanks for the information, now I understand fence 
ttm_eu_fence_buffer_objects() put to BOs will block CPU page table 
update. This is another side of this concurrent issue I didn't know.

I had same worry that it has issue without invalidate_range_end() 
callback as the calling sequence Felix lists. Now I think it's fine 
after taking a look again today because of mm->mmap_sem usage, this is 
my understanding:

A path:

down_write(&mm->mmap_sem);
mmu_notifier_invalidate_range_start()
     take_lock()
     release_lock()
CPU page table update
mmu_notifier_invalidate_range_end()
up_write(&mm->mmap_sem);

B path:

again:
down_read(&mm->mmap_sem);
hmm_vma_get_pfns()
up_read(&mm->mmap_sem);
....
....
take_lock()
if (!hmm_vma_range_done()) {
    release_lock()
    goto again
}
submit command job...
release_lock()

If you agree, I will submit patch v5 with some minor changes, and submit 
another patch to replace get_user_page() with HMM.

Regards,
Philip

On 2018-09-27 11:36 AM, Christian König wrote:
> Yeah, I've read that as well.
>
> My best guess is that we just need to add a call to 
> hmm_vma_range_done() after taking the lock and also replace 
> get_user_pages() with hmm_vma_get_pfns().
>
> But I'm still not 100% sure how all of that is supposed to work together.
>
> Regards,
> Christian.
>
> Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
>>
>> I think the answer is here: 
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216
>>
>> Regards,
>>
>>   Felix
>>
>> *From:*Koenig, Christian
>> *Sent:* Thursday, September 27, 2018 10:30 AM
>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>> *Cc:* j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
>> mmu notifier v4
>>
>> At least with get_user_pages() that is perfectly possible.
>>
>> For HMM it could be that this is prevented somehow.
>>
>> Christian.
>>
>> Am 27.09.2018 16:27 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>
>> > In this case you can end up accessing pages which are invalidated 
>> while get_user_pages is in process.
>>
>> What’s the sequence of events you have in mind? Something like this?
>>
>>   * Page table is updated and triggers MMU notifier
>>   * amdgpu MMU notifier runs and waits for pending CS to finish while
>>     holding the read lock
>>   * New CS starts just after invalidate_range_start MMU notifier
>>     finishes but before the page table update is done
>>   * get_user_pages returns outdated physical addresses
>>
>> I hope that’s not actually possible and that get_user_pages or 
>> hmm_vma_fault would block until the page table update is done. That 
>> is, invalidate_range_start marks the start of a page table update, 
>> and while that update is in progress, get_user_pages or hmm_vma_fault 
>> block. Jerome, can you comment on that?
>>
>> Thanks,
>>   Felix
>>
>> *From:*Koenig, Christian
>> *Sent:* Thursday, September 27, 2018 9:59 AM
>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
>> mmu notifier v4
>>
>> Yeah I understand that, but again that won't work.
>>
>> In this case you can end up accessing pages which are invalidated 
>> while get_user_pages is in process.
>>
>> Christian.
>>
>> Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>
>> > I’m not planning to change that. I don’t think there is any need to 
>> change it.
>>
>> >
>> > Yeah, but when HMM doesn't provide both the start and the end hock 
>> of the invalidation this way won't work any more.
>> >
>> > So we need to find a solution for this,
>> > Christian.
>>
>> My whole argument is that you don’t need to hold the read lock until 
>> the invalidate_range_end. Just read_lock and read_unlock in the 
>> invalidate_range_start function.
>>
>> Regards,
>>
>> Felix
>>
>> *From:*Koenig, Christian
>> *Sent:* Thursday, September 27, 2018 9:22 AM
>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace 
>> mmu notifier v4
>>
>> Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
>>
>>     > The problem is here:
>>     >
>>
>>     > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>
>>     > amdgpu_mn_unlock(p->mn);
>>
>>     >
>>     > We need to hold the lock until the fence is added to the
>>     reservation object.
>>     >
>>     > Otherwise somebody could have changed the page tables just in
>>     the moment between the check of
>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the
>>     reservation object.
>>
>>     I’m not planning to change that. I don’t think there is any need
>>     to change it.
>>
>>
>> Yeah, but when HMM doesn't provide both the start and the end hock of 
>> the invalidation this way won't work any more.
>>
>> So we need to find a solution for this,
>> Christian.
>>
>>     Regards,
>>
>>     Felix
>>
>>     *From:*Koenig, Christian
>>     *Sent:* Thursday, September 27, 2018 7:24 AM
>>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>     *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>>     replace mmu notifier v4
>>
>>     Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>>
>>         > We double check that there wasn't any page table
>>         modification while we prepared the submission and restart the
>>         whole process when there actually was some update.
>>         >
>>         > The reason why we need to do this is here:
>>         >
>>
>>         > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>         p->fence);
>>         >        amdgpu_mn_unlock(p->mn);
>>
>>         >
>>         > Only after the new fence is added to the buffer object we
>>         can release the lock so that any invalidation will now block
>>         on our command submission to finish before it modifies the
>>         page table.
>>
>>         I don’t see why this requires holding the read-lock until
>>         invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets
>>         called while the mn read-lock is held in
>>         invalidate_range_start notifier.
>>
>>
>>     That's not related to amdgpu_ttm_tt_affect_userptr(), this
>>     function could actually be called outside the lock.
>>
>>     The problem is here:
>>
>>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>
>>         amdgpu_mn_unlock(p->mn);
>>
>>
>>     We need to hold the lock until the fence is added to the
>>     reservation object.
>>
>>     Otherwise somebody could have changed the page tables just in the
>>     moment between the check of amdgpu_ttm_tt_userptr_needs_pages()
>>     and adding the fence to the reservation object.
>>
>>     Regards,
>>     Christian.
>>
>>
>>         Regards,
>>
>>           Felix
>>
>>         *From:*Koenig, Christian
>>         *Sent:* Thursday, September 27, 2018 5:27 AM
>>         *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>         *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>         <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>         *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>>         replace mmu notifier v4
>>
>>         That is correct, but take a look what we do when after
>>         calling the amdgpu_mn_read_lock():
>>
>>
>>                     /* No memory allocation is allowed while holding
>>             the mn lock */
>>                     amdgpu_mn_lock(p->mn);
>>             amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>>                             struct amdgpu_bo *bo =
>>             ttm_to_amdgpu_bo(e->tv.bo);
>>
>>                             if
>>             (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>>                                     r = -ERESTARTSYS;
>>                                     goto error_abort;
>>                             }
>>                     }
>>
>>
>>         We double check that there wasn't any page table modification
>>         while we prepared the submission and restart the whole
>>         process when there actually was some update.
>>
>>         The reason why we need to do this is here:
>>
>>             ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>             p->fence);
>>                     amdgpu_mn_unlock(p->mn);
>>
>>
>>         Only after the new fence is added to the buffer object we can
>>         release the lock so that any invalidation will now block on
>>         our command submission to finish before it modifies the page
>>         table.
>>
>>         The only other option would be to add the fence first and
>>         then check if there was any update to the page tables.
>>
>>         The issue with that approach is that adding a fence can't be
>>         made undone, so if we find that there actually was an update
>>         to the page tables we would need to somehow turn the CS into
>>         a dummy (e.g. overwrite all IBs with NOPs or something like
>>         that) and still submit it.
>>
>>         Not sure if that is actually possible.
>>
>>         Regards,
>>         Christian.
>>
>>         Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>>
>>             So back to my previous question:
>>
>>             >> But do we really need another lock for this? Wouldn't the
>>
>>             >> re-validation of userptr BOs (currently calling
>>             get_user_pages) force
>>
>>             >> synchronization with the ongoing page table
>>             invalidation through the
>>
>>             >> mmap_sem or other MM locks?
>>
>>             >
>>
>>             > No and yes. We don't hold any other locks while doing
>>             command submission, but I expect that HMM has its own
>>             mechanism to prevent that.
>>
>>             >
>>
>>             > Since we don't modify
>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not
>>             using this mechanism correctly.
>>
>>             The existing amdgpu_mn_lock/unlock should block the MMU
>>             notifier while a command submission is in progress. It
>>             should also block command submission while an MMU
>>             notifier is in progress.
>>
>>             What we lose with HMM is the ability to hold a read-lock
>>             for the entire duration of the invalidate_range_start
>>             until invalidate_range_end. As I understand it, that lock
>>             is meant to prevent new command submissions while the
>>             page tables are being updated by the kernel. But my point
>>             is, that get_user_pages or hmm_vma_fault should do the
>>             same kind of thing. Before the command submission can go
>>             ahead, it needs to update the userptr addresses. If the
>>             page tables are still being updated, it will block there
>>             even without holding the amdgpu_mn_read_lock.
>>
>>             Regards,
>>
>>               Felix
>>
>>             *From:* Koenig, Christian
>>             *Sent:* Thursday, September 27, 2018 3:00 AM
>>             *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>             *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>             <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>             *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror
>>             callback to replace mmu notifier v4
>>
>>             No, that won't work. We would still run into lock
>>             inversion problems.
>>
>>             What we could do with the scheduler is to turn
>>             submissions into dummies if we find that the page tables
>>             are now outdated.
>>
>>             But that would be really hacky and I'm not sure if that
>>             would really work in all cases.
>>
>>             Christian.
>>
>>             Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>>             <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>
>>             I had a chat with Jerome yesterday. He pointed out that
>>             the new blockable parameter can be used to infer whether
>>             the MMU notifier is being called  in a reclaim operation.
>>             So if blockable==true, it should even be safe to take the
>>             BO reservation lock without problems. I think with that
>>             we should be able to remove the read-write locking
>>             completely and go back to locking (or try-locking for
>>             blockable==false) the reservation locks in the MMU notifier?
>>
>>             Regards,
>>               Felix
>>
>>             -----Original Message-----
>>             From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>             <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On Behalf
>>             Of Christian König
>>             Sent: Saturday, September 15, 2018 3:47 AM
>>             To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>>             <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>             Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback
>>             to replace mmu notifier v4
>>
>>             Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>>             > On 2018-09-14 01:52 PM, Christian König wrote:
>>             >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>             >>> On 2018-09-14 03:51 AM, Christian König wrote:
>>             >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>             >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>             >>>>> [SNIP]
>>             >>>>>>    + amdgpu_mn_read_unlock(amn);
>>             >>>>>> +
>>             >>>>> amdgpu_mn_read_lock/unlock support recursive
>>             locking for multiple
>>             >>>>> overlapping or nested invalidation ranges. But if
>>             you'r locking
>>             >>>>> and unlocking in the same function. Is that still a
>>             concern?
>>             >>> I don't understand the possible recursive case, but
>>             >>> amdgpu_mn_read_lock() still support recursive locking.
>>             >>>> Well the real problem is that unlocking them here
>>             won't work.
>>             >>>>
>>             >>>> We need to hold the lock until we are sure that the
>>             operation which
>>             >>>> updates the page tables is completed.
>>             >>>>
>>             >>> The reason for this change is because hmm mirror has
>>             >>> invalidate_start callback, no invalidate_end callback
>>             >>>
>>             >>> Check mmu_notifier.c and hmm.c again, below is entire
>>             logic to
>>             >>> update CPU page tables and callback:
>>             >>>
>>             >>> mn lock amn->lock is used to protect interval tree
>>             access because
>>             >>> user may submit/register new userptr anytime.
>>             >>> This is same for old and new way.
>>             >>>
>>             >>> step 2 guarantee the GPU operation is done before
>>             updating CPU page
>>             >>> table.
>>             >>>
>>             >>> So I think the change is safe. We don't need hold mn
>>             lock until the
>>             >>> CPU page tables update is completed.
>>             >> No, that isn't even remotely correct. The lock doesn't
>>             protects the
>>             >> interval tree.
>>             >>
>>             >>> Old:
>>             >>>     1. down_read_non_owner(&amn->lock)
>>             >>>     2. loop to handle BOs from node->bos through
>>             interval tree
>>             >>> amn->object nodes
>>             >>>         gfx: wait for pending BOs fence operation
>>             done, mark user
>>             >>> pages dirty
>>             >>>         kfd: evict user queues of the process, wait
>>             for queue
>>             >>> unmap/map operation done
>>             >>>     3. update CPU page tables
>>             >>>     4. up_read(&amn->lock)
>>             >>>
>>             >>> New, switch step 3 and 4
>>             >>>     1. down_read_non_owner(&amn->lock)
>>             >>>     2. loop to handle BOs from node->bos through
>>             interval tree
>>             >>> amn->object nodes
>>             >>>         gfx: wait for pending BOs fence operation
>>             done, mark user
>>             >>> pages dirty
>>             >>>         kfd: evict user queues of the process, wait
>>             for queue
>>             >>> unmap/map operation done
>>             >>>     3. up_read(&amn->lock)
>>             >>>     4. update CPU page tables
>>             >> The lock is there to make sure that we serialize page
>>             table updates
>>             >> with command submission.
>>             > As I understand it, the idea is to prevent command
>>             submission (adding
>>             > new fences to BOs) while a page table invalidation is
>>             in progress.
>>
>>             Yes, exactly.
>>
>>             > But do we really need another lock for this? Wouldn't the
>>             > re-validation of userptr BOs (currently calling
>>             get_user_pages) force
>>             > synchronization with the ongoing page table
>>             invalidation through the
>>             > mmap_sem or other MM locks?
>>
>>             No and yes. We don't hold any other locks while doing
>>             command submission, but I expect that HMM has its own
>>             mechanism to prevent that.
>>
>>             Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock()
>>             we are certainly not using this mechanism correctly.
>>
>>             Regards,
>>             Christian.
>>             _______________________________________________
>>             amd-gfx mailing list
>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>>             https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>


[-- Attachment #1.2: Type: text/html, Size: 53608 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                                     ` <16d1faf6-80a4-dc46-bd2a-9cd475808e98-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-28  5:25                                                                                       ` Koenig, Christian
       [not found]                                                                                         ` <8f9f5703-214f-488d-9cfe-ccc64e8cd009-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Koenig, Christian @ 2018-09-28  5:25 UTC (permalink / raw)
  To: Yang, Philip
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 17711 bytes --]

No, that is incorrect as well :)

The mmap_sem isn't necessary taken during page table updates.

What you could do is replace get_user_pages() directly with HMM. If I'm not completely mistaken that should work as expected.

Christian.

Am 27.09.2018 22:18 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
I was trying to understand the way how HMM handle this concurrent issue and how we handle it in amdgpu_ttm_tt_userptr_needs_pages() and  amdgpu_ttm_tt_affect_userptr(). HMM uses range->valid flag, we use gtt->mmu_invalidations and gtt->last_set_pages. Both use the same lock plus flag idea actually.

Thanks for the information, now I understand fence ttm_eu_fence_buffer_objects() put to BOs will block CPU page table update. This is another side of this concurrent issue I didn't know.

I had same worry that it has issue without invalidate_range_end() callback as the calling sequence Felix lists. Now I think it's fine after taking a look again today because of mm->mmap_sem usage, this is my understanding:

A path:

down_write(&mm->mmap_sem);
mmu_notifier_invalidate_range_start()
    take_lock()
    release_lock()
CPU page table update
mmu_notifier_invalidate_range_end()
up_write(&mm->mmap_sem);

B path:

again:
down_read(&mm->mmap_sem);
hmm_vma_get_pfns()
up_read(&mm->mmap_sem);
....
....
take_lock()
if (!hmm_vma_range_done()) {
   release_lock()
   goto again
}
submit command job...
release_lock()

If you agree, I will submit patch v5 with some minor changes, and submit another patch to replace get_user_page() with HMM.

Regards,
Philip

On 2018-09-27 11:36 AM, Christian König wrote:
Yeah, I've read that as well.

My best guess is that we just need to add a call to hmm_vma_range_done() after taking the lock and also replace get_user_pages() with hmm_vma_get_pfns().

But I'm still not 100% sure how all of that is supposed to work together.

Regards,
Christian.

Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
I think the answer is here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 10:30 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>; Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

At least with get_user_pages() that is perfectly possible.

For HMM it could be that this is prevented somehow.

Christian.

Am 27.09.2018 16:27 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

What’s the sequence of events you have in mind? Something like this?


  *   Page table is updated and triggers MMU notifier
  *   amdgpu MMU notifier runs and waits for pending CS to finish while holding the read lock
  *   New CS starts just after invalidate_range_start MMU notifier finishes but before the page table update is done
  *   get_user_pages returns outdated physical addresses

I hope that’s not actually possible and that get_user_pages or hmm_vma_fault would block until the page table update is done. That is, invalidate_range_start marks the start of a page table update, and while that update is in progress, get_user_pages or hmm_vma_fault block. Jerome, can you comment on that?

Thanks,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:59 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Yeah I understand that, but again that won't work.

In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

Christian.

Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> I’m not planning to change that. I don’t think there is any need to change it.
>
> Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.

My whole argument is that you don’t need to hold the read lock until the invalidate_range_end. Just read_lock and read_unlock in the invalidate_range_start function.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:22 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.
I’m not planning to change that. I don’t think there is any need to change it.

Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

I don’t see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.



Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():


        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:

        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx






[-- Attachment #1.2: Type: text/html, Size: 32397 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                                         ` <8f9f5703-214f-488d-9cfe-ccc64e8cd009-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2018-09-28 15:07                                                                                           ` Philip Yang
       [not found]                                                                                             ` <fe0d429b-5038-a297-e02e-423302544477-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Philip Yang @ 2018-09-28 15:07 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 23451 bytes --]

For B path, we take mm->mmap_sem, then call hmm_vma_get_pfns() or 
get_user_pages(). This is obvious.

For A path, mmu notifier 
mmu_notifier_invalidate_range_start()/mmu_notifier_invalidate_range_end() 
is called in many places, and the calling path is quit complicated 
inside mm, it's not obvious. I checked many of the them, for example:

do_munmap()
   down_write(&mm->mmap_sem)
   arch_unmap()
     mpx_notify_unmap()...
        zap_bt_entries_mapping()
          zap_page_range()
  up_write(&mm->mmap_sem)

void zap_page_range(struct vm_area_struct *vma, unsigned long start,
         unsigned long size)
{
     struct mm_struct *mm = vma->vm_mm;
     struct mmu_gather tlb;
     unsigned long end = start + size;

     lru_add_drain();
     tlb_gather_mmu(&tlb, mm, start, end);
     update_hiwater_rss(mm);
     mmu_notifier_invalidate_range_start(mm, start, end);
     for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
         unmap_single_vma(&tlb, vma, start, end, NULL);
     mmu_notifier_invalidate_range_end(mm, start, end);
     tlb_finish_mmu(&tlb, start, end);
}

So AFAIK it's okay without invalidate_range_end() callback.

Regards,
Philip

On 2018-09-28 01:25 AM, Koenig, Christian wrote:
> No, that is incorrect as well :)
>
> The mmap_sem isn't necessary taken during page table updates.
>
> What you could do is replace get_user_pages() directly with HMM. If 
> I'm not completely mistaken that should work as expected.
>
> Christian.
>
> Am 27.09.2018 22:18 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
> I was trying to understand the way how HMM handle this concurrent 
> issue and how we handle it in amdgpu_ttm_tt_userptr_needs_pages() and 
> amdgpu_ttm_tt_affect_userptr(). HMM uses range->valid flag, we use 
> gtt->mmu_invalidations and gtt->last_set_pages. Both use the same lock 
> plus flag idea actually.
>
> Thanks for the information, now I understand fence 
> ttm_eu_fence_buffer_objects() put to BOs will block CPU page table 
> update. This is another side of this concurrent issue I didn't know.
>
> I had same worry that it has issue without invalidate_range_end() 
> callback as the calling sequence Felix lists. Now I think it's fine 
> after taking a look again today because of mm->mmap_sem usage, this is 
> my understanding:
>
> A path:
>
> down_write(&mm->mmap_sem);
> mmu_notifier_invalidate_range_start()
>     take_lock()
>     release_lock()
> CPU page table update
> mmu_notifier_invalidate_range_end()
> up_write(&mm->mmap_sem);
>
> B path:
>
> again:
> down_read(&mm->mmap_sem);
> hmm_vma_get_pfns()
> up_read(&mm->mmap_sem);
> ....
> ....
> take_lock()
> if (!hmm_vma_range_done()) {
>    release_lock()
>    goto again
> }
> submit command job...
> release_lock()
>
> If you agree, I will submit patch v5 with some minor changes, and 
> submit another patch to replace get_user_page() with HMM.
>
> Regards,
> Philip
>
> On 2018-09-27 11:36 AM, Christian König wrote:
>> Yeah, I've read that as well.
>>
>> My best guess is that we just need to add a call to 
>> hmm_vma_range_done() after taking the lock and also replace 
>> get_user_pages() with hmm_vma_get_pfns().
>>
>> But I'm still not 100% sure how all of that is supposed to work together.
>>
>> Regards,
>> Christian.
>>
>> Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
>>>
>>> I think the answer is here: 
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216
>>>
>>> Regards,
>>>
>>> Felix
>>>
>>> *From:*Koenig, Christian
>>> *Sent:* Thursday, September 27, 2018 10:30 AM
>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>> *Cc:* j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>> replace mmu notifier v4
>>>
>>> At least with get_user_pages() that is perfectly possible.
>>>
>>> For HMM it could be that this is prevented somehow.
>>>
>>> Christian.
>>>
>>> Am 27.09.2018 16:27 schrieb "Kuehling, Felix" 
>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>
>>> > In this case you can end up accessing pages which are invalidated 
>>> while get_user_pages is in process.
>>>
>>> What’s the sequence of events you have in mind? Something like this?
>>>
>>>   * Page table is updated and triggers MMU notifier
>>>   * amdgpu MMU notifier runs and waits for pending CS to finish
>>>     while holding the read lock
>>>   * New CS starts just after invalidate_range_start MMU notifier
>>>     finishes but before the page table update is done
>>>   * get_user_pages returns outdated physical addresses
>>>
>>> I hope that’s not actually possible and that get_user_pages or 
>>> hmm_vma_fault would block until the page table update is done. That 
>>> is, invalidate_range_start marks the start of a page table update, 
>>> and while that update is in progress, get_user_pages or 
>>> hmm_vma_fault block. Jerome, can you comment on that?
>>>
>>> Thanks,
>>>   Felix
>>>
>>> *From:*Koenig, Christian
>>> *Sent:* Thursday, September 27, 2018 9:59 AM
>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>> replace mmu notifier v4
>>>
>>> Yeah I understand that, but again that won't work.
>>>
>>> In this case you can end up accessing pages which are invalidated 
>>> while get_user_pages is in process.
>>>
>>> Christian.
>>>
>>> Am 27.09.2018 15:41 schrieb "Kuehling, Felix" 
>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>
>>> > I’m not planning to change that. I don’t think there is any need to change it.
>>>
>>> >
>>> > Yeah, but when HMM doesn't provide both the start and the end hock 
>>> of the invalidation this way won't work any more.
>>> >
>>> > So we need to find a solution for this,
>>> > Christian.
>>>
>>> My whole argument is that you don’t need to hold the read lock until 
>>> the invalidate_range_end. Just read_lock and read_unlock in the 
>>> invalidate_range_start function.
>>>
>>> Regards,
>>>
>>> Felix
>>>
>>> *From:*Koenig, Christian
>>> *Sent:* Thursday, September 27, 2018 9:22 AM
>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>> replace mmu notifier v4
>>>
>>> Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
>>>
>>>     > The problem is here:
>>>     >
>>>
>>>     > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>>
>>>     > amdgpu_mn_unlock(p->mn);
>>>
>>>     >
>>>     > We need to hold the lock until the fence is added to the
>>>     reservation object.
>>>     >
>>>     > Otherwise somebody could have changed the page tables just in
>>>     the moment between the check of
>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the
>>>     reservation object.
>>>
>>>     I’m not planning to change that. I don’t think there is any need
>>>     to change it.
>>>
>>>
>>> Yeah, but when HMM doesn't provide both the start and the end hock 
>>> of the invalidation this way won't work any more.
>>>
>>> So we need to find a solution for this,
>>> Christian.
>>>
>>>     Regards,
>>>
>>>       Felix
>>>
>>>     *From:*Koenig, Christian
>>>     *Sent:* Thursday, September 27, 2018 7:24 AM
>>>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>     *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>>>     replace mmu notifier v4
>>>
>>>     Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>>>
>>>         > We double check that there wasn't any page table
>>>         modification while we prepared the submission and restart
>>>         the whole process when there actually was some update.
>>>         >
>>>         > The reason why we need to do this is here:
>>>         >
>>>
>>>         > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>         p->fence);
>>>         >        amdgpu_mn_unlock(p->mn);
>>>
>>>         >
>>>         > Only after the new fence is added to the buffer object we
>>>         can release the lock so that any invalidation will now block
>>>         on our command submission to finish before it modifies the
>>>         page table.
>>>
>>>         I don’t see why this requires holding the read-lock until
>>>         invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets
>>>         called while the mn read-lock is held in
>>>         invalidate_range_start notifier.
>>>
>>>
>>>     That's not related to amdgpu_ttm_tt_affect_userptr(), this
>>>     function could actually be called outside the lock.
>>>
>>>     The problem is here:
>>>
>>>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>         p->fence);
>>>
>>>         amdgpu_mn_unlock(p->mn);
>>>
>>>
>>>     We need to hold the lock until the fence is added to the
>>>     reservation object.
>>>
>>>     Otherwise somebody could have changed the page tables just in
>>>     the moment between the check of
>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the
>>>     reservation object.
>>>
>>>     Regards,
>>>     Christian.
>>>
>>>
>>>         Regards,
>>>
>>>           Felix
>>>
>>>         *From:*Koenig, Christian
>>>         *Sent:* Thursday, September 27, 2018 5:27 AM
>>>         *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>         *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>         <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>         *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback
>>>         to replace mmu notifier v4
>>>
>>>         That is correct, but take a look what we do when after
>>>         calling the amdgpu_mn_read_lock():
>>>
>>>
>>>                     /* No memory allocation is allowed while holding
>>>             the mn lock */
>>>                     amdgpu_mn_lock(p->mn);
>>>             amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>>>                             struct amdgpu_bo *bo =
>>>             ttm_to_amdgpu_bo(e->tv.bo);
>>>
>>>                             if
>>>             (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>>>                                     r = -ERESTARTSYS;
>>>                                     goto error_abort;
>>>                             }
>>>                     }
>>>
>>>
>>>         We double check that there wasn't any page table
>>>         modification while we prepared the submission and restart
>>>         the whole process when there actually was some update.
>>>
>>>         The reason why we need to do this is here:
>>>
>>>             ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>             p->fence);
>>>                     amdgpu_mn_unlock(p->mn);
>>>
>>>
>>>         Only after the new fence is added to the buffer object we
>>>         can release the lock so that any invalidation will now block
>>>         on our command submission to finish before it modifies the
>>>         page table.
>>>
>>>         The only other option would be to add the fence first and
>>>         then check if there was any update to the page tables.
>>>
>>>         The issue with that approach is that adding a fence can't be
>>>         made undone, so if we find that there actually was an update
>>>         to the page tables we would need to somehow turn the CS into
>>>         a dummy (e.g. overwrite all IBs with NOPs or something like
>>>         that) and still submit it.
>>>
>>>         Not sure if that is actually possible.
>>>
>>>         Regards,
>>>         Christian.
>>>
>>>         Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>>>
>>>             So back to my previous question:
>>>
>>>             >> But do we really need another lock for this? Wouldn't
>>>             the
>>>
>>>             >> re-validation of userptr BOs (currently calling
>>>             get_user_pages) force
>>>
>>>             >> synchronization with the ongoing page table
>>>             invalidation through the
>>>
>>>             >> mmap_sem or other MM locks?
>>>
>>>             >
>>>
>>>             > No and yes. We don't hold any other locks while doing
>>>             command submission, but I expect that HMM has its own
>>>             mechanism to prevent that.
>>>
>>>             >
>>>
>>>             > Since we don't modify
>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not
>>>             using this mechanism correctly.
>>>
>>>             The existing amdgpu_mn_lock/unlock should block the MMU
>>>             notifier while a command submission is in progress. It
>>>             should also block command submission while an MMU
>>>             notifier is in progress.
>>>
>>>             What we lose with HMM is the ability to hold a read-lock
>>>             for the entire duration of the invalidate_range_start
>>>             until invalidate_range_end. As I understand it, that
>>>             lock is meant to prevent new command submissions while
>>>             the page tables are being updated by the kernel. But my
>>>             point is, that get_user_pages or hmm_vma_fault should do
>>>             the same kind of thing. Before the command submission
>>>             can go ahead, it needs to update the userptr addresses.
>>>             If the page tables are still being updated, it will
>>>             block there even without holding the amdgpu_mn_read_lock.
>>>
>>>             Regards,
>>>
>>>               Felix
>>>
>>>             *From:* Koenig, Christian
>>>             *Sent:* Thursday, September 27, 2018 3:00 AM
>>>             *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>             *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>             <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>             *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror
>>>             callback to replace mmu notifier v4
>>>
>>>             No, that won't work. We would still run into lock
>>>             inversion problems.
>>>
>>>             What we could do with the scheduler is to turn
>>>             submissions into dummies if we find that the page tables
>>>             are now outdated.
>>>
>>>             But that would be really hacky and I'm not sure if that
>>>             would really work in all cases.
>>>
>>>             Christian.
>>>
>>>             Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>>>             <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>
>>>             I had a chat with Jerome yesterday. He pointed out that
>>>             the new blockable parameter can be used to infer whether
>>>             the MMU notifier is being called  in a reclaim
>>>             operation. So if blockable==true, it should even be safe
>>>             to take the BO reservation lock without problems. I
>>>             think with that we should be able to remove the
>>>             read-write locking completely and go back to locking (or
>>>             try-locking for blockable==false) the reservation locks
>>>             in the MMU notifier?
>>>
>>>             Regards,
>>>               Felix
>>>
>>>             -----Original Message-----
>>>             From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>             <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On
>>>             Behalf Of Christian König
>>>             Sent: Saturday, September 15, 2018 3:47 AM
>>>             To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>>>             <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>             Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback
>>>             to replace mmu notifier v4
>>>
>>>             Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>>>             > On 2018-09-14 01:52 PM, Christian König wrote:
>>>             >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>>             >>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>             >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>             >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>             >>>>> [SNIP]
>>>             >>>>>>    + amdgpu_mn_read_unlock(amn);
>>>             >>>>>> +
>>>             >>>>> amdgpu_mn_read_lock/unlock support recursive
>>>             locking for multiple
>>>             >>>>> overlapping or nested invalidation ranges. But if
>>>             you'r locking
>>>             >>>>> and unlocking in the same function. Is that still
>>>             a concern?
>>>             >>> I don't understand the possible recursive case, but
>>>             >>> amdgpu_mn_read_lock() still support recursive locking.
>>>             >>>> Well the real problem is that unlocking them here
>>>             won't work.
>>>             >>>>
>>>             >>>> We need to hold the lock until we are sure that the
>>>             operation which
>>>             >>>> updates the page tables is completed.
>>>             >>>>
>>>             >>> The reason for this change is because hmm mirror has
>>>             >>> invalidate_start callback, no invalidate_end callback
>>>             >>>
>>>             >>> Check mmu_notifier.c and hmm.c again, below is
>>>             entire logic to
>>>             >>> update CPU page tables and callback:
>>>             >>>
>>>             >>> mn lock amn->lock is used to protect interval tree
>>>             access because
>>>             >>> user may submit/register new userptr anytime.
>>>             >>> This is same for old and new way.
>>>             >>>
>>>             >>> step 2 guarantee the GPU operation is done before
>>>             updating CPU page
>>>             >>> table.
>>>             >>>
>>>             >>> So I think the change is safe. We don't need hold mn
>>>             lock until the
>>>             >>> CPU page tables update is completed.
>>>             >> No, that isn't even remotely correct. The lock
>>>             doesn't protects the
>>>             >> interval tree.
>>>             >>
>>>             >>> Old:
>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>             >>>     2. loop to handle BOs from node->bos through
>>>             interval tree
>>>             >>> amn->object nodes
>>>             >>>         gfx: wait for pending BOs fence operation
>>>             done, mark user
>>>             >>> pages dirty
>>>             >>>         kfd: evict user queues of the process, wait
>>>             for queue
>>>             >>> unmap/map operation done
>>>             >>>     3. update CPU page tables
>>>             >>>     4. up_read(&amn->lock)
>>>             >>>
>>>             >>> New, switch step 3 and 4
>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>             >>>     2. loop to handle BOs from node->bos through
>>>             interval tree
>>>             >>> amn->object nodes
>>>             >>>         gfx: wait for pending BOs fence operation
>>>             done, mark user
>>>             >>> pages dirty
>>>             >>>         kfd: evict user queues of the process, wait
>>>             for queue
>>>             >>> unmap/map operation done
>>>             >>>     3. up_read(&amn->lock)
>>>             >>>     4. update CPU page tables
>>>             >> The lock is there to make sure that we serialize page
>>>             table updates
>>>             >> with command submission.
>>>             > As I understand it, the idea is to prevent command
>>>             submission (adding
>>>             > new fences to BOs) while a page table invalidation is
>>>             in progress.
>>>
>>>             Yes, exactly.
>>>
>>>             > But do we really need another lock for this? Wouldn't the
>>>             > re-validation of userptr BOs (currently calling
>>>             get_user_pages) force
>>>             > synchronization with the ongoing page table
>>>             invalidation through the
>>>             > mmap_sem or other MM locks?
>>>
>>>             No and yes. We don't hold any other locks while doing
>>>             command submission, but I expect that HMM has its own
>>>             mechanism to prevent that.
>>>
>>>             Since we don't modify
>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not
>>>             using this mechanism correctly.
>>>
>>>             Regards,
>>>             Christian.
>>>             _______________________________________________
>>>             amd-gfx mailing list
>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>>>             https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>
>


[-- Attachment #1.2: Type: text/html, Size: 56230 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                                             ` <fe0d429b-5038-a297-e02e-423302544477-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-28 15:13                                                                                               ` Koenig, Christian
       [not found]                                                                                                 ` <b8686e6b-0c3e-4feb-afbd-80397aac31a0-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Koenig, Christian @ 2018-09-28 15:13 UTC (permalink / raw)
  To: Yang, Philip
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 19405 bytes --]

No it definitely isn't.

We have literally worked month on this with the core MM developers.

Making sure that we have a consistent page array is absolutely vital for correct operation.

Please also check Jerome's presentation from XDC it also perfectly explains why this approach won't work correctly.

Christian.

Am 28.09.2018 17:07 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
For B path, we take mm->mmap_sem, then call hmm_vma_get_pfns() or get_user_pages(). This is obvious.

For A path, mmu notifier mmu_notifier_invalidate_range_start()/mmu_notifier_invalidate_range_end() is called in many places, and the calling path is quit complicated inside mm, it's not obvious. I checked many of the them, for example:

do_munmap()
  down_write(&mm->mmap_sem)
  arch_unmap()
    mpx_notify_unmap()...
       zap_bt_entries_mapping()
         zap_page_range()
 up_write(&mm->mmap_sem)

void zap_page_range(struct vm_area_struct *vma, unsigned long start,
        unsigned long size)
{
    struct mm_struct *mm = vma->vm_mm;
    struct mmu_gather tlb;
    unsigned long end = start + size;

    lru_add_drain();
    tlb_gather_mmu(&tlb, mm, start, end);
    update_hiwater_rss(mm);
    mmu_notifier_invalidate_range_start(mm, start, end);
    for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
        unmap_single_vma(&tlb, vma, start, end, NULL);
    mmu_notifier_invalidate_range_end(mm, start, end);
    tlb_finish_mmu(&tlb, start, end);
}

So AFAIK it's okay without invalidate_range_end() callback.

Regards,
Philip

On 2018-09-28 01:25 AM, Koenig, Christian wrote:
No, that is incorrect as well :)

The mmap_sem isn't necessary taken during page table updates.

What you could do is replace get_user_pages() directly with HMM. If I'm not completely mistaken that should work as expected.

Christian.

Am 27.09.2018 22:18 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
I was trying to understand the way how HMM handle this concurrent issue and how we handle it in amdgpu_ttm_tt_userptr_needs_pages() and  amdgpu_ttm_tt_affect_userptr(). HMM uses range->valid flag, we use gtt->mmu_invalidations and gtt->last_set_pages. Both use the same lock plus flag idea actually.

Thanks for the information, now I understand fence ttm_eu_fence_buffer_objects() put to BOs will block CPU page table update. This is another side of this concurrent issue I didn't know.

I had same worry that it has issue without invalidate_range_end() callback as the calling sequence Felix lists. Now I think it's fine after taking a look again today because of mm->mmap_sem usage, this is my understanding:

A path:

down_write(&mm->mmap_sem);
mmu_notifier_invalidate_range_start()
    take_lock()
    release_lock()
CPU page table update
mmu_notifier_invalidate_range_end()
up_write(&mm->mmap_sem);

B path:

again:
down_read(&mm->mmap_sem);
hmm_vma_get_pfns()
up_read(&mm->mmap_sem);
....
....
take_lock()
if (!hmm_vma_range_done()) {
   release_lock()
   goto again
}
submit command job...
release_lock()

If you agree, I will submit patch v5 with some minor changes, and submit another patch to replace get_user_page() with HMM.

Regards,
Philip

On 2018-09-27 11:36 AM, Christian König wrote:
Yeah, I've read that as well.

My best guess is that we just need to add a call to hmm_vma_range_done() after taking the lock and also replace get_user_pages() with hmm_vma_get_pfns().

But I'm still not 100% sure how all of that is supposed to work together.

Regards,
Christian.

Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
I think the answer is here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 10:30 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>; Yang, Philip <Philip.Yang@amd.com><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

At least with get_user_pages() that is perfectly possible.

For HMM it could be that this is prevented somehow.

Christian.

Am 27.09.2018 16:27 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

What’s the sequence of events you have in mind? Something like this?


  *   Page table is updated and triggers MMU notifier
  *   amdgpu MMU notifier runs and waits for pending CS to finish while holding the read lock
  *   New CS starts just after invalidate_range_start MMU notifier finishes but before the page table update is done
  *   get_user_pages returns outdated physical addresses

I hope that’s not actually possible and that get_user_pages or hmm_vma_fault would block until the page table update is done. That is, invalidate_range_start marks the start of a page table update, and while that update is in progress, get_user_pages or hmm_vma_fault block. Jerome, can you comment on that?

Thanks,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:59 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Yeah I understand that, but again that won't work.

In this case you can end up accessing pages which are invalidated while get_user_pages is in process.

Christian.

Am 27.09.2018 15:41 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
> I’m not planning to change that. I don’t think there is any need to change it.
>
> Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.
>
> So we need to find a solution for this,
> Christian.

My whole argument is that you don’t need to hold the read lock until the invalidate_range_end. Just read_lock and read_unlock in the invalidate_range_start function.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 9:22 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
> The problem is here:
>
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> amdgpu_mn_unlock(p->mn);
>
> We need to hold the lock until the fence is added to the reservation object.
>
> Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.
I’m not planning to change that. I don’t think there is any need to change it.

Yeah, but when HMM doesn't provide both the start and the end hock of the invalidation this way won't work any more.

So we need to find a solution for this,
Christian.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 7:24 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
> We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.
>
> The reason why we need to do this is here:
>
>        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>        amdgpu_mn_unlock(p->mn);
>
> Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

I don’t see why this requires holding the read-lock until invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets called while the mn read-lock is held in invalidate_range_start notifier.

That's not related to amdgpu_ttm_tt_affect_userptr(), this function could actually be called outside the lock.

The problem is here:
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
amdgpu_mn_unlock(p->mn);

We need to hold the lock until the fence is added to the reservation object.

Otherwise somebody could have changed the page tables just in the moment between the check of amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the reservation object.

Regards,
Christian.



Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 5:27 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

That is correct, but take a look what we do when after calling the amdgpu_mn_read_lock():


        /* No memory allocation is allowed while holding the mn lock */
        amdgpu_mn_lock(p->mn);
        amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
                struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

                if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
                        r = -ERESTARTSYS;
                        goto error_abort;
                }
        }

We double check that there wasn't any page table modification while we prepared the submission and restart the whole process when there actually was some update.

The reason why we need to do this is here:

        ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
        amdgpu_mn_unlock(p->mn);

Only after the new fence is added to the buffer object we can release the lock so that any invalidation will now block on our command submission to finish before it modifies the page table.

The only other option would be to add the fence first and then check if there was any update to the page tables.

The issue with that approach is that adding a fence can't be made undone, so if we find that there actually was an update to the page tables we would need to somehow turn the CS into a dummy (e.g. overwrite all IBs with NOPs or something like that) and still submit it.

Not sure if that is actually possible.

Regards,
Christian.

Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
So back to my previous question:



>> But do we really need another lock for this? Wouldn't the

>> re-validation of userptr BOs (currently calling get_user_pages) force

>> synchronization with the ongoing page table invalidation through the

>> mmap_sem or other MM locks?

>

> No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

>

> Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

The existing amdgpu_mn_lock/unlock should block the MMU notifier while a command submission is in progress. It should also block command submission while an MMU notifier is in progress.

What we lose with HMM is the ability to hold a read-lock for the entire duration of the invalidate_range_start until invalidate_range_end. As I understand it, that lock is meant to prevent new command submissions while the page tables are being updated by the kernel. But my point is, that get_user_pages or hmm_vma_fault should do the same kind of thing. Before the command submission can go ahead, it needs to update the userptr addresses. If the page tables are still being updated, it will block there even without holding the amdgpu_mn_read_lock.

Regards,
  Felix

From: Koenig, Christian
Sent: Thursday, September 27, 2018 3:00 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org><mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
Cc: Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org><mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org><mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: RE: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

No, that won't work. We would still run into lock inversion problems.

What we could do with the scheduler is to turn submissions into dummies if we find that the page tables are now outdated.

But that would be really hacky and I'm not sure if that would really work in all cases.

Christian.

Am 27.09.2018 08:53 schrieb "Kuehling, Felix" <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
I had a chat with Jerome yesterday. He pointed out that the new blockable parameter can be used to infer whether the MMU notifier is being called  in a reclaim operation. So if blockable==true, it should even be safe to take the BO reservation lock without problems. I think with that we should be able to remove the read-write locking completely and go back to locking (or try-locking for blockable==false) the reservation locks in the MMU notifier?

Regards,
  Felix

-----Original Message-----
From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-bounces@lists.freedesktop.org>> On Behalf Of Christian König
Sent: Saturday, September 15, 2018 3:47 AM
To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org<mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org<mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx@lists.freedesktop.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org<mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
Subject: Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
> On 2018-09-14 01:52 PM, Christian König wrote:
>> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> [SNIP]
>>>>>>    +    amdgpu_mn_read_unlock(amn);
>>>>>> +
>>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>>> overlapping or nested invalidation ranges. But if you'r locking
>>>>> and unlocking in the same function. Is that still a concern?
>>> I don't understand the possible recursive case, but
>>> amdgpu_mn_read_lock() still support recursive locking.
>>>> Well the real problem is that unlocking them here won't work.
>>>>
>>>> We need to hold the lock until we are sure that the operation which
>>>> updates the page tables is completed.
>>>>
>>> The reason for this change is because hmm mirror has
>>> invalidate_start callback, no invalidate_end callback
>>>
>>> Check mmu_notifier.c and hmm.c again, below is entire logic to
>>> update CPU page tables and callback:
>>>
>>> mn lock amn->lock is used to protect interval tree access because
>>> user may submit/register new userptr anytime.
>>> This is same for old and new way.
>>>
>>> step 2 guarantee the GPU operation is done before updating CPU page
>>> table.
>>>
>>> So I think the change is safe. We don't need hold mn lock until the
>>> CPU page tables update is completed.
>> No, that isn't even remotely correct. The lock doesn't protects the
>> interval tree.
>>
>>> Old:
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. update CPU page tables
>>>     4. up_read(&amn->lock)
>>>
>>> New, switch step 3 and 4
>>>     1. down_read_non_owner(&amn->lock)
>>>     2. loop to handle BOs from node->bos through interval tree
>>> amn->object nodes
>>>         gfx: wait for pending BOs fence operation done, mark user
>>> pages dirty
>>>         kfd: evict user queues of the process, wait for queue
>>> unmap/map operation done
>>>     3. up_read(&amn->lock)
>>>     4. update CPU page tables
>> The lock is there to make sure that we serialize page table updates
>> with command submission.
> As I understand it, the idea is to prevent command submission (adding
> new fences to BOs) while a page table invalidation is in progress.

Yes, exactly.

> But do we really need another lock for this? Wouldn't the
> re-validation of userptr BOs (currently calling get_user_pages) force
> synchronization with the ongoing page table invalidation through the
> mmap_sem or other MM locks?

No and yes. We don't hold any other locks while doing command submission, but I expect that HMM has its own mechanism to prevent that.

Since we don't modify amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly not using this mechanism correctly.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org<mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx







[-- Attachment #1.2: Type: text/html, Size: 35207 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                                                 ` <b8686e6b-0c3e-4feb-afbd-80397aac31a0-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2018-10-02 15:05                                                                                                   ` Christian König
       [not found]                                                                                                     ` <09916f9a-3f5f-27ab-01e6-6d77303cf052-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Christian König @ 2018-10-02 15:05 UTC (permalink / raw)
  To: Koenig, Christian, Yang, Philip
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 25624 bytes --]

Checking more code and documentation and thinking about it over my 
vacation I think I have some new conclusions here.

Currently we are using get_user_pages() together with an MMU notifier to 
guarantee coherent address space view, because get_user_pages() works by 
grabbing a reference to the pages and ignoring concurrent page table 
updates.

But HMM uses a different approach by checking the address space for 
modifications using hmm_vma_range_done() and re-trying when the address 
space has changed.

Now what you are trying to do is to change that into get_user_pages() 
and HMM callback and this is what won't work. We can either use 
get_user_pages() with MMU notifier or we can use HMM for the work, but 
we can't mix and match.

So my initial guess was correct that we just need to change both sides 
of the implementation at the same time.

Regards,
Christian.

Am 28.09.2018 um 17:13 schrieb Koenig, Christian:
> No it definitely isn't.
>
> We have literally worked month on this with the core MM developers.
>
> Making sure that we have a consistent page array is absolutely vital 
> for correct operation.
>
> Please also check Jerome's presentation from XDC it also perfectly 
> explains why this approach won't work correctly.
>
> Christian.
>
> Am 28.09.2018 17:07 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
> For B path, we take mm->mmap_sem, then call hmm_vma_get_pfns() or 
> get_user_pages(). This is obvious.
>
> For A path, mmu notifier 
> mmu_notifier_invalidate_range_start()/mmu_notifier_invalidate_range_end() 
> is called in many places, and the calling path is quit complicated 
> inside mm, it's not obvious. I checked many of the them, for example:
>
> do_munmap()
>   down_write(&mm->mmap_sem)
>   arch_unmap()
>     mpx_notify_unmap()...
>        zap_bt_entries_mapping()
>          zap_page_range()
>  up_write(&mm->mmap_sem)
>
> void zap_page_range(struct vm_area_struct *vma, unsigned long start,
>         unsigned long size)
> {
>     struct mm_struct *mm = vma->vm_mm;
>     struct mmu_gather tlb;
>     unsigned long end = start + size;
>
>     lru_add_drain();
>     tlb_gather_mmu(&tlb, mm, start, end);
>     update_hiwater_rss(mm);
>     mmu_notifier_invalidate_range_start(mm, start, end);
>     for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
>         unmap_single_vma(&tlb, vma, start, end, NULL);
>     mmu_notifier_invalidate_range_end(mm, start, end);
>     tlb_finish_mmu(&tlb, start, end);
> }
>
> So AFAIK it's okay without invalidate_range_end() callback.
>
> Regards,
> Philip
>
> On 2018-09-28 01:25 AM, Koenig, Christian wrote:
>> No, that is incorrect as well :)
>>
>> The mmap_sem isn't necessary taken during page table updates.
>>
>> What you could do is replace get_user_pages() directly with HMM. If 
>> I'm not completely mistaken that should work as expected.
>>
>> Christian.
>>
>> Am 27.09.2018 22:18 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
>> I was trying to understand the way how HMM handle this concurrent 
>> issue and how we handle it in amdgpu_ttm_tt_userptr_needs_pages() and 
>> amdgpu_ttm_tt_affect_userptr(). HMM uses range->valid flag, we use 
>> gtt->mmu_invalidations and gtt->last_set_pages. Both use the same 
>> lock plus flag idea actually.
>>
>> Thanks for the information, now I understand fence 
>> ttm_eu_fence_buffer_objects() put to BOs will block CPU page table 
>> update. This is another side of this concurrent issue I didn't know.
>>
>> I had same worry that it has issue without invalidate_range_end() 
>> callback as the calling sequence Felix lists. Now I think it's fine 
>> after taking a look again today because of mm->mmap_sem usage, this 
>> is my understanding:
>>
>> A path:
>>
>> down_write(&mm->mmap_sem);
>> mmu_notifier_invalidate_range_start()
>>     take_lock()
>>     release_lock()
>> CPU page table update
>> mmu_notifier_invalidate_range_end()
>> up_write(&mm->mmap_sem);
>>
>> B path:
>>
>> again:
>> down_read(&mm->mmap_sem);
>> hmm_vma_get_pfns()
>> up_read(&mm->mmap_sem);
>> ....
>> ....
>> take_lock()
>> if (!hmm_vma_range_done()) {
>>    release_lock()
>>    goto again
>> }
>> submit command job...
>> release_lock()
>>
>> If you agree, I will submit patch v5 with some minor changes, and 
>> submit another patch to replace get_user_page() with HMM.
>>
>> Regards,
>> Philip
>>
>> On 2018-09-27 11:36 AM, Christian König wrote:
>>> Yeah, I've read that as well.
>>>
>>> My best guess is that we just need to add a call to 
>>> hmm_vma_range_done() after taking the lock and also replace 
>>> get_user_pages() with hmm_vma_get_pfns().
>>>
>>> But I'm still not 100% sure how all of that is supposed to work 
>>> together.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
>>>>
>>>> I think the answer is here: 
>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216
>>>>
>>>> Regards,
>>>>
>>>> Felix
>>>>
>>>> *From:*Koenig, Christian
>>>> *Sent:* Thursday, September 27, 2018 10:30 AM
>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>> *Cc:* j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
>>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>> replace mmu notifier v4
>>>>
>>>> At least with get_user_pages() that is perfectly possible.
>>>>
>>>> For HMM it could be that this is prevented somehow.
>>>>
>>>> Christian.
>>>>
>>>> Am 27.09.2018 16:27 schrieb "Kuehling, Felix" 
>>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>
>>>> > In this case you can end up accessing pages which are invalidated while 
>>>> get_user_pages is in process.
>>>>
>>>> What’s the sequence of events you have in mind? Something like this?
>>>>
>>>>   * Page table is updated and triggers MMU notifier
>>>>   * amdgpu MMU notifier runs and waits for pending CS to finish
>>>>     while holding the read lock
>>>>   * New CS starts just after invalidate_range_start MMU notifier
>>>>     finishes but before the page table update is done
>>>>   * get_user_pages returns outdated physical addresses
>>>>
>>>> I hope that’s not actually possible and that get_user_pages or 
>>>> hmm_vma_fault would block until the page table update is done. That 
>>>> is, invalidate_range_start marks the start of a page table update, 
>>>> and while that update is in progress, get_user_pages or 
>>>> hmm_vma_fault block. Jerome, can you comment on that?
>>>>
>>>> Thanks,
>>>>   Felix
>>>>
>>>> *From:*Koenig, Christian
>>>> *Sent:* Thursday, September 27, 2018 9:59 AM
>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>> replace mmu notifier v4
>>>>
>>>> Yeah I understand that, but again that won't work.
>>>>
>>>> In this case you can end up accessing pages which are invalidated 
>>>> while get_user_pages is in process.
>>>>
>>>> Christian.
>>>>
>>>> Am 27.09.2018 15:41 schrieb "Kuehling, Felix" 
>>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>
>>>> > I’m not planning to change that. I don’t think there is any need to change it.
>>>>
>>>> >
>>>> > Yeah, but when HMM doesn't provide both the start and the end 
>>>> hock of the invalidation this way won't work any more.
>>>> >
>>>> > So we need to find a solution for this,
>>>> > Christian.
>>>>
>>>> My whole argument is that you don’t need to hold the read lock 
>>>> until the invalidate_range_end. Just read_lock and read_unlock in 
>>>> the invalidate_range_start function.
>>>>
>>>> Regards,
>>>>
>>>>   Felix
>>>>
>>>> *From:*Koenig, Christian
>>>> *Sent:* Thursday, September 27, 2018 9:22 AM
>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>> replace mmu notifier v4
>>>>
>>>> Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
>>>>
>>>>     > The problem is here:
>>>>     >
>>>>
>>>>     > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>>>
>>>>     > amdgpu_mn_unlock(p->mn);
>>>>
>>>>     >
>>>>     > We need to hold the lock until the fence is added to the
>>>>     reservation object.
>>>>     >
>>>>     > Otherwise somebody could have changed the page tables just in
>>>>     the moment between the check of
>>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the
>>>>     reservation object.
>>>>
>>>>     I’m not planning to change that. I don’t think there is any
>>>>     need to change it.
>>>>
>>>>
>>>> Yeah, but when HMM doesn't provide both the start and the end hock 
>>>> of the invalidation this way won't work any more.
>>>>
>>>> So we need to find a solution for this,
>>>> Christian.
>>>>
>>>>     Regards,
>>>>
>>>>       Felix
>>>>
>>>>     *From:*Koenig, Christian
>>>>     *Sent:* Thursday, September 27, 2018 7:24 AM
>>>>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>     *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>>>>     replace mmu notifier v4
>>>>
>>>>     Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>>>>
>>>>         > We double check that there wasn't any page table
>>>>         modification while we prepared the submission and restart
>>>>         the whole process when there actually was some update.
>>>>         >
>>>>         > The reason why we need to do this is here:
>>>>         >
>>>>
>>>>         > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>         p->fence);
>>>>         >        amdgpu_mn_unlock(p->mn);
>>>>
>>>>         >
>>>>         > Only after the new fence is added to the buffer object we
>>>>         can release the lock so that any invalidation will now
>>>>         block on our command submission to finish before it
>>>>         modifies the page table.
>>>>
>>>>         I don’t see why this requires holding the read-lock until
>>>>         invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets
>>>>         called while the mn read-lock is held in
>>>>         invalidate_range_start notifier.
>>>>
>>>>
>>>>     That's not related to amdgpu_ttm_tt_affect_userptr(), this
>>>>     function could actually be called outside the lock.
>>>>
>>>>     The problem is here:
>>>>
>>>>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>         p->fence);
>>>>
>>>>         amdgpu_mn_unlock(p->mn);
>>>>
>>>>
>>>>     We need to hold the lock until the fence is added to the
>>>>     reservation object.
>>>>
>>>>     Otherwise somebody could have changed the page tables just in
>>>>     the moment between the check of
>>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to the
>>>>     reservation object.
>>>>
>>>>     Regards,
>>>>     Christian.
>>>>
>>>>
>>>>         Regards,
>>>>
>>>>           Felix
>>>>
>>>>         *From:*Koenig, Christian
>>>>         *Sent:* Thursday, September 27, 2018 5:27 AM
>>>>         *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>         *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>         <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>         *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback
>>>>         to replace mmu notifier v4
>>>>
>>>>         That is correct, but take a look what we do when after
>>>>         calling the amdgpu_mn_read_lock():
>>>>
>>>>
>>>>                     /* No memory allocation is allowed while
>>>>             holding the mn lock */
>>>>                     amdgpu_mn_lock(p->mn);
>>>>             amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>>>>                             struct amdgpu_bo *bo =
>>>>             ttm_to_amdgpu_bo(e->tv.bo);
>>>>
>>>>                             if
>>>>             (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>>>>                                     r = -ERESTARTSYS;
>>>>                                     goto error_abort;
>>>>                             }
>>>>                     }
>>>>
>>>>
>>>>         We double check that there wasn't any page table
>>>>         modification while we prepared the submission and restart
>>>>         the whole process when there actually was some update.
>>>>
>>>>         The reason why we need to do this is here:
>>>>
>>>>             ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>             p->fence);
>>>>                     amdgpu_mn_unlock(p->mn);
>>>>
>>>>
>>>>         Only after the new fence is added to the buffer object we
>>>>         can release the lock so that any invalidation will now
>>>>         block on our command submission to finish before it
>>>>         modifies the page table.
>>>>
>>>>         The only other option would be to add the fence first and
>>>>         then check if there was any update to the page tables.
>>>>
>>>>         The issue with that approach is that adding a fence can't
>>>>         be made undone, so if we find that there actually was an
>>>>         update to the page tables we would need to somehow turn the
>>>>         CS into a dummy (e.g. overwrite all IBs with NOPs or
>>>>         something like that) and still submit it.
>>>>
>>>>         Not sure if that is actually possible.
>>>>
>>>>         Regards,
>>>>         Christian.
>>>>
>>>>         Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>>>>
>>>>             So back to my previous question:
>>>>
>>>>             >> But do we really need another lock for this?
>>>>             Wouldn't the
>>>>
>>>>             >> re-validation of userptr BOs (currently calling
>>>>             get_user_pages) force
>>>>
>>>>             >> synchronization with the ongoing page table
>>>>             invalidation through the
>>>>
>>>>             >> mmap_sem or other MM locks?
>>>>
>>>>             >
>>>>
>>>>             > No and yes. We don't hold any other locks while doing
>>>>             command submission, but I expect that HMM has its own
>>>>             mechanism to prevent that.
>>>>
>>>>             >
>>>>
>>>>             > Since we don't modify
>>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly
>>>>             not using this mechanism correctly.
>>>>
>>>>             The existing amdgpu_mn_lock/unlock should block the MMU
>>>>             notifier while a command submission is in progress. It
>>>>             should also block command submission while an MMU
>>>>             notifier is in progress.
>>>>
>>>>             What we lose with HMM is the ability to hold a
>>>>             read-lock for the entire duration of the
>>>>             invalidate_range_start until invalidate_range_end. As I
>>>>             understand it, that lock is meant to prevent new
>>>>             command submissions while the page tables are being
>>>>             updated by the kernel. But my point is, that
>>>>             get_user_pages or hmm_vma_fault should do the same kind
>>>>             of thing. Before the command submission can go ahead,
>>>>             it needs to update the userptr addresses. If the page
>>>>             tables are still being updated, it will block there
>>>>             even without holding the amdgpu_mn_read_lock.
>>>>
>>>>             Regards,
>>>>
>>>>               Felix
>>>>
>>>>             *From:* Koenig, Christian
>>>>             *Sent:* Thursday, September 27, 2018 3:00 AM
>>>>             *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>             *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>             <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>             *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror
>>>>             callback to replace mmu notifier v4
>>>>
>>>>             No, that won't work. We would still run into lock
>>>>             inversion problems.
>>>>
>>>>             What we could do with the scheduler is to turn
>>>>             submissions into dummies if we find that the page
>>>>             tables are now outdated.
>>>>
>>>>             But that would be really hacky and I'm not sure if that
>>>>             would really work in all cases.
>>>>
>>>>             Christian.
>>>>
>>>>             Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>>>>             <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>
>>>>             I had a chat with Jerome yesterday. He pointed out that
>>>>             the new blockable parameter can be used to infer
>>>>             whether the MMU notifier is being called  in a reclaim
>>>>             operation. So if blockable==true, it should even be
>>>>             safe to take the BO reservation lock without problems.
>>>>             I think with that we should be able to remove the
>>>>             read-write locking completely and go back to locking
>>>>             (or try-locking for blockable==false) the reservation
>>>>             locks in the MMU notifier?
>>>>
>>>>             Regards,
>>>>               Felix
>>>>
>>>>             -----Original Message-----
>>>>             From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>             <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On
>>>>             Behalf Of Christian König
>>>>             Sent: Saturday, September 15, 2018 3:47 AM
>>>>             To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>>>>             <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>>             Subject: Re: [PATCH] drm/amdgpu: use HMM mirror
>>>>             callback to replace mmu notifier v4
>>>>
>>>>             Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>>>>             > On 2018-09-14 01:52 PM, Christian König wrote:
>>>>             >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>>>             >>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>>             >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>             >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>             >>>>> [SNIP]
>>>>             >>>>>>    + amdgpu_mn_read_unlock(amn);
>>>>             >>>>>> +
>>>>             >>>>> amdgpu_mn_read_lock/unlock support recursive
>>>>             locking for multiple
>>>>             >>>>> overlapping or nested invalidation ranges. But if
>>>>             you'r locking
>>>>             >>>>> and unlocking in the same function. Is that still
>>>>             a concern?
>>>>             >>> I don't understand the possible recursive case, but
>>>>             >>> amdgpu_mn_read_lock() still support recursive locking.
>>>>             >>>> Well the real problem is that unlocking them here
>>>>             won't work.
>>>>             >>>>
>>>>             >>>> We need to hold the lock until we are sure that
>>>>             the operation which
>>>>             >>>> updates the page tables is completed.
>>>>             >>>>
>>>>             >>> The reason for this change is because hmm mirror has
>>>>             >>> invalidate_start callback, no invalidate_end callback
>>>>             >>>
>>>>             >>> Check mmu_notifier.c and hmm.c again, below is
>>>>             entire logic to
>>>>             >>> update CPU page tables and callback:
>>>>             >>>
>>>>             >>> mn lock amn->lock is used to protect interval tree
>>>>             access because
>>>>             >>> user may submit/register new userptr anytime.
>>>>             >>> This is same for old and new way.
>>>>             >>>
>>>>             >>> step 2 guarantee the GPU operation is done before
>>>>             updating CPU page
>>>>             >>> table.
>>>>             >>>
>>>>             >>> So I think the change is safe. We don't need hold
>>>>             mn lock until the
>>>>             >>> CPU page tables update is completed.
>>>>             >> No, that isn't even remotely correct. The lock
>>>>             doesn't protects the
>>>>             >> interval tree.
>>>>             >>
>>>>             >>> Old:
>>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>>             >>>     2. loop to handle BOs from node->bos through
>>>>             interval tree
>>>>             >>> amn->object nodes
>>>>             >>>         gfx: wait for pending BOs fence operation
>>>>             done, mark user
>>>>             >>> pages dirty
>>>>             >>>         kfd: evict user queues of the process, wait
>>>>             for queue
>>>>             >>> unmap/map operation done
>>>>             >>>     3. update CPU page tables
>>>>             >>>     4. up_read(&amn->lock)
>>>>             >>>
>>>>             >>> New, switch step 3 and 4
>>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>>             >>>     2. loop to handle BOs from node->bos through
>>>>             interval tree
>>>>             >>> amn->object nodes
>>>>             >>>         gfx: wait for pending BOs fence operation
>>>>             done, mark user
>>>>             >>> pages dirty
>>>>             >>>         kfd: evict user queues of the process, wait
>>>>             for queue
>>>>             >>> unmap/map operation done
>>>>             >>>     3. up_read(&amn->lock)
>>>>             >>>     4. update CPU page tables
>>>>             >> The lock is there to make sure that we serialize
>>>>             page table updates
>>>>             >> with command submission.
>>>>             > As I understand it, the idea is to prevent command
>>>>             submission (adding
>>>>             > new fences to BOs) while a page table invalidation is
>>>>             in progress.
>>>>
>>>>             Yes, exactly.
>>>>
>>>>             > But do we really need another lock for this? Wouldn't
>>>>             the
>>>>             > re-validation of userptr BOs (currently calling
>>>>             get_user_pages) force
>>>>             > synchronization with the ongoing page table
>>>>             invalidation through the
>>>>             > mmap_sem or other MM locks?
>>>>
>>>>             No and yes. We don't hold any other locks while doing
>>>>             command submission, but I expect that HMM has its own
>>>>             mechanism to prevent that.
>>>>
>>>>             Since we don't modify
>>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly
>>>>             not using this mechanism correctly.
>>>>
>>>>             Regards,
>>>>             Christian.
>>>>             _______________________________________________
>>>>             amd-gfx mailing list
>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>>>>             https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>
>>>
>>
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 64963 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                                                     ` <09916f9a-3f5f-27ab-01e6-6d77303cf052-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-10-03 20:31                                                                                                       ` Philip Yang
       [not found]                                                                                                         ` <bf806477-06b3-61de-fea3-5ad260d92cdd-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 28+ messages in thread
From: Philip Yang @ 2018-10-03 20:31 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 26855 bytes --]

Hi Christian,

Yes, I agree. I am working on patch 2 to replace get_user_page with HMM. 
One problem is in current gfx path, we check if mmu_invalidation 
multiple times in amdgpu_cs_ioctl() path after get_user_page(), 
amdgpu_cs_parser_bos(), amdgpu_cs_list_validate(), and 
amdgpu_cs_submit(). For HMM, hmm_vma_range_done() has to be called once 
and only once after hmm_vma_get_pfns()/hmm_vma_fault(), so I will call 
hmm_vma_range_done() inside amdgpu_cs_submit after holding the mn lock. 
Is my understanding correct?

Philip

On 2018-10-02 11:05 AM, Christian König wrote:
> Checking more code and documentation and thinking about it over my 
> vacation I think I have some new conclusions here.
>
> Currently we are using get_user_pages() together with an MMU notifier 
> to guarantee coherent address space view, because get_user_pages() 
> works by grabbing a reference to the pages and ignoring concurrent 
> page table updates.
>
> But HMM uses a different approach by checking the address space for 
> modifications using hmm_vma_range_done() and re-trying when the 
> address space has changed.
>
> Now what you are trying to do is to change that into get_user_pages() 
> and HMM callback and this is what won't work. We can either use 
> get_user_pages() with MMU notifier or we can use HMM for the work, but 
> we can't mix and match.
>
> So my initial guess was correct that we just need to change both sides 
> of the implementation at the same time.
>
> Regards,
> Christian.
>
> Am 28.09.2018 um 17:13 schrieb Koenig, Christian:
>> No it definitely isn't.
>>
>> We have literally worked month on this with the core MM developers.
>>
>> Making sure that we have a consistent page array is absolutely vital 
>> for correct operation.
>>
>> Please also check Jerome's presentation from XDC it also perfectly 
>> explains why this approach won't work correctly.
>>
>> Christian.
>>
>> Am 28.09.2018 17:07 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
>> For B path, we take mm->mmap_sem, then call hmm_vma_get_pfns() or 
>> get_user_pages(). This is obvious.
>>
>> For A path, mmu notifier 
>> mmu_notifier_invalidate_range_start()/mmu_notifier_invalidate_range_end() 
>> is called in many places, and the calling path is quit complicated 
>> inside mm, it's not obvious. I checked many of the them, for example:
>>
>> do_munmap()
>>   down_write(&mm->mmap_sem)
>>   arch_unmap()
>>     mpx_notify_unmap()...
>>        zap_bt_entries_mapping()
>>          zap_page_range()
>>  up_write(&mm->mmap_sem)
>>
>> void zap_page_range(struct vm_area_struct *vma, unsigned long start,
>>         unsigned long size)
>> {
>>     struct mm_struct *mm = vma->vm_mm;
>>     struct mmu_gather tlb;
>>     unsigned long end = start + size;
>>
>>     lru_add_drain();
>>     tlb_gather_mmu(&tlb, mm, start, end);
>>     update_hiwater_rss(mm);
>>     mmu_notifier_invalidate_range_start(mm, start, end);
>>     for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
>>         unmap_single_vma(&tlb, vma, start, end, NULL);
>>     mmu_notifier_invalidate_range_end(mm, start, end);
>>     tlb_finish_mmu(&tlb, start, end);
>> }
>>
>> So AFAIK it's okay without invalidate_range_end() callback.
>>
>> Regards,
>> Philip
>>
>> On 2018-09-28 01:25 AM, Koenig, Christian wrote:
>>> No, that is incorrect as well :)
>>>
>>> The mmap_sem isn't necessary taken during page table updates.
>>>
>>> What you could do is replace get_user_pages() directly with HMM. If 
>>> I'm not completely mistaken that should work as expected.
>>>
>>> Christian.
>>>
>>> Am 27.09.2018 22:18 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
>>> I was trying to understand the way how HMM handle this concurrent 
>>> issue and how we handle it in amdgpu_ttm_tt_userptr_needs_pages() 
>>> and amdgpu_ttm_tt_affect_userptr(). HMM uses range->valid flag, we 
>>> use gtt->mmu_invalidations and gtt->last_set_pages. Both use the 
>>> same lock plus flag idea actually.
>>>
>>> Thanks for the information, now I understand fence 
>>> ttm_eu_fence_buffer_objects() put to BOs will block CPU page table 
>>> update. This is another side of this concurrent issue I didn't know.
>>>
>>> I had same worry that it has issue without invalidate_range_end() 
>>> callback as the calling sequence Felix lists. Now I think it's fine 
>>> after taking a look again today because of mm->mmap_sem usage, this 
>>> is my understanding:
>>>
>>> A path:
>>>
>>> down_write(&mm->mmap_sem);
>>> mmu_notifier_invalidate_range_start()
>>>     take_lock()
>>>     release_lock()
>>> CPU page table update
>>> mmu_notifier_invalidate_range_end()
>>> up_write(&mm->mmap_sem);
>>>
>>> B path:
>>>
>>> again:
>>> down_read(&mm->mmap_sem);
>>> hmm_vma_get_pfns()
>>> up_read(&mm->mmap_sem);
>>> ....
>>> ....
>>> take_lock()
>>> if (!hmm_vma_range_done()) {
>>>    release_lock()
>>>    goto again
>>> }
>>> submit command job...
>>> release_lock()
>>>
>>> If you agree, I will submit patch v5 with some minor changes, and 
>>> submit another patch to replace get_user_page() with HMM.
>>>
>>> Regards,
>>> Philip
>>>
>>> On 2018-09-27 11:36 AM, Christian König wrote:
>>>> Yeah, I've read that as well.
>>>>
>>>> My best guess is that we just need to add a call to 
>>>> hmm_vma_range_done() after taking the lock and also replace 
>>>> get_user_pages() with hmm_vma_get_pfns().
>>>>
>>>> But I'm still not 100% sure how all of that is supposed to work 
>>>> together.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
>>>>>
>>>>> I think the answer is here: 
>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216
>>>>>
>>>>> Regards,
>>>>>
>>>>> Felix
>>>>>
>>>>> *From:*Koenig, Christian
>>>>> *Sent:* Thursday, September 27, 2018 10:30 AM
>>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>> *Cc:* j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
>>>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>>> replace mmu notifier v4
>>>>>
>>>>> At least with get_user_pages() that is perfectly possible.
>>>>>
>>>>> For HMM it could be that this is prevented somehow.
>>>>>
>>>>> Christian.
>>>>>
>>>>> Am 27.09.2018 16:27 schrieb "Kuehling, Felix" 
>>>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>>
>>>>> > In this case you can end up accessing pages which are invalidated while 
>>>>> get_user_pages is in process.
>>>>>
>>>>> What’s the sequence of events you have in mind? Something like this?
>>>>>
>>>>>   * Page table is updated and triggers MMU notifier
>>>>>   * amdgpu MMU notifier runs and waits for pending CS to finish
>>>>>     while holding the read lock
>>>>>   * New CS starts just after invalidate_range_start MMU notifier
>>>>>     finishes but before the page table update is done
>>>>>   * get_user_pages returns outdated physical addresses
>>>>>
>>>>> I hope that’s not actually possible and that get_user_pages or 
>>>>> hmm_vma_fault would block until the page table update is done. 
>>>>> That is, invalidate_range_start marks the start of a page table 
>>>>> update, and while that update is in progress, get_user_pages or 
>>>>> hmm_vma_fault block. Jerome, can you comment on that?
>>>>>
>>>>> Thanks,
>>>>>   Felix
>>>>>
>>>>> *From:*Koenig, Christian
>>>>> *Sent:* Thursday, September 27, 2018 9:59 AM
>>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>>> replace mmu notifier v4
>>>>>
>>>>> Yeah I understand that, but again that won't work.
>>>>>
>>>>> In this case you can end up accessing pages which are invalidated 
>>>>> while get_user_pages is in process.
>>>>>
>>>>> Christian.
>>>>>
>>>>> Am 27.09.2018 15:41 schrieb "Kuehling, Felix" 
>>>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>>
>>>>> > I’m not planning to change that. I don’t think there is any need to 
>>>>> change it.
>>>>>
>>>>> >
>>>>> > Yeah, but when HMM doesn't provide both the start and the end 
>>>>> hock of the invalidation this way won't work any more.
>>>>> >
>>>>> > So we need to find a solution for this,
>>>>> > Christian.
>>>>>
>>>>> My whole argument is that you don’t need to hold the read lock 
>>>>> until the invalidate_range_end. Just read_lock and read_unlock in 
>>>>> the invalidate_range_start function.
>>>>>
>>>>> Regards,
>>>>>
>>>>>   Felix
>>>>>
>>>>> *From:*Koenig, Christian
>>>>> *Sent:* Thursday, September 27, 2018 9:22 AM
>>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>>> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>>> replace mmu notifier v4
>>>>>
>>>>> Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
>>>>>
>>>>>     > The problem is here:
>>>>>     >
>>>>>
>>>>>     > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>>     p->fence);
>>>>>
>>>>>     > amdgpu_mn_unlock(p->mn);
>>>>>
>>>>>     >
>>>>>     > We need to hold the lock until the fence is added to the
>>>>>     reservation object.
>>>>>     >
>>>>>     > Otherwise somebody could have changed the page tables just
>>>>>     in the moment between the check of
>>>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to
>>>>>     the reservation object.
>>>>>
>>>>>     I’m not planning to change that. I don’t think there is any
>>>>>     need to change it.
>>>>>
>>>>>
>>>>> Yeah, but when HMM doesn't provide both the start and the end hock 
>>>>> of the invalidation this way won't work any more.
>>>>>
>>>>> So we need to find a solution for this,
>>>>> Christian.
>>>>>
>>>>>     Regards,
>>>>>
>>>>>       Felix
>>>>>
>>>>>     *From:*Koenig, Christian
>>>>>     *Sent:* Thursday, September 27, 2018 7:24 AM
>>>>>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>     *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>>>>>     replace mmu notifier v4
>>>>>
>>>>>     Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>>>>>
>>>>>         > We double check that there wasn't any page table
>>>>>         modification while we prepared the submission and restart
>>>>>         the whole process when there actually was some update.
>>>>>         >
>>>>>         > The reason why we need to do this is here:
>>>>>         >
>>>>>
>>>>>         > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>>         p->fence);
>>>>>         > amdgpu_mn_unlock(p->mn);
>>>>>
>>>>>         >
>>>>>         > Only after the new fence is added to the buffer object
>>>>>         we can release the lock so that any invalidation will now
>>>>>         block on our command submission to finish before it
>>>>>         modifies the page table.
>>>>>
>>>>>         I don’t see why this requires holding the read-lock until
>>>>>         invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets
>>>>>         called while the mn read-lock is held in
>>>>>         invalidate_range_start notifier.
>>>>>
>>>>>
>>>>>     That's not related to amdgpu_ttm_tt_affect_userptr(), this
>>>>>     function could actually be called outside the lock.
>>>>>
>>>>>     The problem is here:
>>>>>
>>>>>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>>         p->fence);
>>>>>
>>>>>         amdgpu_mn_unlock(p->mn);
>>>>>
>>>>>
>>>>>     We need to hold the lock until the fence is added to the
>>>>>     reservation object.
>>>>>
>>>>>     Otherwise somebody could have changed the page tables just in
>>>>>     the moment between the check of
>>>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to
>>>>>     the reservation object.
>>>>>
>>>>>     Regards,
>>>>>     Christian.
>>>>>
>>>>>
>>>>>         Regards,
>>>>>
>>>>>           Felix
>>>>>
>>>>>         *From:*Koenig, Christian
>>>>>         *Sent:* Thursday, September 27, 2018 5:27 AM
>>>>>         *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>         *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>>         <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>>>>>         amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>         *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback
>>>>>         to replace mmu notifier v4
>>>>>
>>>>>         That is correct, but take a look what we do when after
>>>>>         calling the amdgpu_mn_read_lock():
>>>>>
>>>>>
>>>>>                     /* No memory allocation is allowed while
>>>>>             holding the mn lock */
>>>>>                     amdgpu_mn_lock(p->mn);
>>>>>             amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>>>>>                             struct amdgpu_bo *bo =
>>>>>             ttm_to_amdgpu_bo(e->tv.bo);
>>>>>
>>>>>                             if
>>>>>             (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>>>>>                                     r = -ERESTARTSYS;
>>>>>                                     goto error_abort;
>>>>>                             }
>>>>>                     }
>>>>>
>>>>>
>>>>>         We double check that there wasn't any page table
>>>>>         modification while we prepared the submission and restart
>>>>>         the whole process when there actually was some update.
>>>>>
>>>>>         The reason why we need to do this is here:
>>>>>
>>>>>             ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>>             p->fence);
>>>>>             amdgpu_mn_unlock(p->mn);
>>>>>
>>>>>
>>>>>         Only after the new fence is added to the buffer object we
>>>>>         can release the lock so that any invalidation will now
>>>>>         block on our command submission to finish before it
>>>>>         modifies the page table.
>>>>>
>>>>>         The only other option would be to add the fence first and
>>>>>         then check if there was any update to the page tables.
>>>>>
>>>>>         The issue with that approach is that adding a fence can't
>>>>>         be made undone, so if we find that there actually was an
>>>>>         update to the page tables we would need to somehow turn
>>>>>         the CS into a dummy (e.g. overwrite all IBs with NOPs or
>>>>>         something like that) and still submit it.
>>>>>
>>>>>         Not sure if that is actually possible.
>>>>>
>>>>>         Regards,
>>>>>         Christian.
>>>>>
>>>>>         Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>>>>>
>>>>>             So back to my previous question:
>>>>>
>>>>>             >> But do we really need another lock for this?
>>>>>             Wouldn't the
>>>>>
>>>>>             >> re-validation of userptr BOs (currently calling
>>>>>             get_user_pages) force
>>>>>
>>>>>             >> synchronization with the ongoing page table
>>>>>             invalidation through the
>>>>>
>>>>>             >> mmap_sem or other MM locks?
>>>>>
>>>>>             >
>>>>>
>>>>>             > No and yes. We don't hold any other locks while
>>>>>             doing command submission, but I expect that HMM has
>>>>>             its own mechanism to prevent that.
>>>>>
>>>>>             >
>>>>>
>>>>>             > Since we don't modify
>>>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly
>>>>>             not using this mechanism correctly.
>>>>>
>>>>>             The existing amdgpu_mn_lock/unlock should block the
>>>>>             MMU notifier while a command submission is in
>>>>>             progress. It should also block command submission
>>>>>             while an MMU notifier is in progress.
>>>>>
>>>>>             What we lose with HMM is the ability to hold a
>>>>>             read-lock for the entire duration of the
>>>>>             invalidate_range_start until invalidate_range_end. As
>>>>>             I understand it, that lock is meant to prevent new
>>>>>             command submissions while the page tables are being
>>>>>             updated by the kernel. But my point is, that
>>>>>             get_user_pages or hmm_vma_fault should do the same
>>>>>             kind of thing. Before the command submission can go
>>>>>             ahead, it needs to update the userptr addresses. If
>>>>>             the page tables are still being updated, it will block
>>>>>             there even without holding the amdgpu_mn_read_lock.
>>>>>
>>>>>             Regards,
>>>>>
>>>>>               Felix
>>>>>
>>>>>             *From:* Koenig, Christian
>>>>>             *Sent:* Thursday, September 27, 2018 3:00 AM
>>>>>             *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>             *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>>             <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>             *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror
>>>>>             callback to replace mmu notifier v4
>>>>>
>>>>>             No, that won't work. We would still run into lock
>>>>>             inversion problems.
>>>>>
>>>>>             What we could do with the scheduler is to turn
>>>>>             submissions into dummies if we find that the page
>>>>>             tables are now outdated.
>>>>>
>>>>>             But that would be really hacky and I'm not sure if
>>>>>             that would really work in all cases.
>>>>>
>>>>>             Christian.
>>>>>
>>>>>             Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>>>>>             <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>>
>>>>>             I had a chat with Jerome yesterday. He pointed out
>>>>>             that the new blockable parameter can be used to infer
>>>>>             whether the MMU notifier is being called  in a reclaim
>>>>>             operation. So if blockable==true, it should even be
>>>>>             safe to take the BO reservation lock without problems.
>>>>>             I think with that we should be able to remove the
>>>>>             read-write locking completely and go back to locking
>>>>>             (or try-locking for blockable==false) the reservation
>>>>>             locks in the MMU notifier?
>>>>>
>>>>>             Regards,
>>>>>               Felix
>>>>>
>>>>>             -----Original Message-----
>>>>>             From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>             <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On
>>>>>             Behalf Of Christian König
>>>>>             Sent: Saturday, September 15, 2018 3:47 AM
>>>>>             To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>>>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>>>>>             <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>>>             Subject: Re: [PATCH] drm/amdgpu: use HMM mirror
>>>>>             callback to replace mmu notifier v4
>>>>>
>>>>>             Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>>>>>             > On 2018-09-14 01:52 PM, Christian König wrote:
>>>>>             >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>>>>             >>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>>>             >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>>             >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>>             >>>>> [SNIP]
>>>>>             >>>>>>    + amdgpu_mn_read_unlock(amn);
>>>>>             >>>>>> +
>>>>>             >>>>> amdgpu_mn_read_lock/unlock support recursive
>>>>>             locking for multiple
>>>>>             >>>>> overlapping or nested invalidation ranges. But
>>>>>             if you'r locking
>>>>>             >>>>> and unlocking in the same function. Is that
>>>>>             still a concern?
>>>>>             >>> I don't understand the possible recursive case, but
>>>>>             >>> amdgpu_mn_read_lock() still support recursive locking.
>>>>>             >>>> Well the real problem is that unlocking them here
>>>>>             won't work.
>>>>>             >>>>
>>>>>             >>>> We need to hold the lock until we are sure that
>>>>>             the operation which
>>>>>             >>>> updates the page tables is completed.
>>>>>             >>>>
>>>>>             >>> The reason for this change is because hmm mirror has
>>>>>             >>> invalidate_start callback, no invalidate_end callback
>>>>>             >>>
>>>>>             >>> Check mmu_notifier.c and hmm.c again, below is
>>>>>             entire logic to
>>>>>             >>> update CPU page tables and callback:
>>>>>             >>>
>>>>>             >>> mn lock amn->lock is used to protect interval tree
>>>>>             access because
>>>>>             >>> user may submit/register new userptr anytime.
>>>>>             >>> This is same for old and new way.
>>>>>             >>>
>>>>>             >>> step 2 guarantee the GPU operation is done before
>>>>>             updating CPU page
>>>>>             >>> table.
>>>>>             >>>
>>>>>             >>> So I think the change is safe. We don't need hold
>>>>>             mn lock until the
>>>>>             >>> CPU page tables update is completed.
>>>>>             >> No, that isn't even remotely correct. The lock
>>>>>             doesn't protects the
>>>>>             >> interval tree.
>>>>>             >>
>>>>>             >>> Old:
>>>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>>>             >>>     2. loop to handle BOs from node->bos through
>>>>>             interval tree
>>>>>             >>> amn->object nodes
>>>>>             >>>         gfx: wait for pending BOs fence operation
>>>>>             done, mark user
>>>>>             >>> pages dirty
>>>>>             >>>         kfd: evict user queues of the process,
>>>>>             wait for queue
>>>>>             >>> unmap/map operation done
>>>>>             >>>     3. update CPU page tables
>>>>>             >>>     4. up_read(&amn->lock)
>>>>>             >>>
>>>>>             >>> New, switch step 3 and 4
>>>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>>>             >>>     2. loop to handle BOs from node->bos through
>>>>>             interval tree
>>>>>             >>> amn->object nodes
>>>>>             >>>         gfx: wait for pending BOs fence operation
>>>>>             done, mark user
>>>>>             >>> pages dirty
>>>>>             >>>         kfd: evict user queues of the process,
>>>>>             wait for queue
>>>>>             >>> unmap/map operation done
>>>>>             >>>     3. up_read(&amn->lock)
>>>>>             >>>     4. update CPU page tables
>>>>>             >> The lock is there to make sure that we serialize
>>>>>             page table updates
>>>>>             >> with command submission.
>>>>>             > As I understand it, the idea is to prevent command
>>>>>             submission (adding
>>>>>             > new fences to BOs) while a page table invalidation
>>>>>             is in progress.
>>>>>
>>>>>             Yes, exactly.
>>>>>
>>>>>             > But do we really need another lock for this?
>>>>>             Wouldn't the
>>>>>             > re-validation of userptr BOs (currently calling
>>>>>             get_user_pages) force
>>>>>             > synchronization with the ongoing page table
>>>>>             invalidation through the
>>>>>             > mmap_sem or other MM locks?
>>>>>
>>>>>             No and yes. We don't hold any other locks while doing
>>>>>             command submission, but I expect that HMM has its own
>>>>>             mechanism to prevent that.
>>>>>
>>>>>             Since we don't modify
>>>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly
>>>>>             not using this mechanism correctly.
>>>>>
>>>>>             Regards,
>>>>>             Christian.
>>>>>             _______________________________________________
>>>>>             amd-gfx mailing list
>>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>>>>>             https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>
>>>>
>>>
>>
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>


[-- Attachment #1.2: Type: text/html, Size: 68292 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4
       [not found]                                                                                                         ` <bf806477-06b3-61de-fea3-5ad260d92cdd-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-04  7:20                                                                                                           ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2018-10-04  7:20 UTC (permalink / raw)
  To: Philip Yang, christian.koenig-5C7GfCeVMHo
  Cc: j.glisse-Re5JQEeQqe8AvxtiuMwx3w, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 27932 bytes --]

> Is my understanding correct?
Yes, of hand that sounds correct to me.

The other occasions should just be early bail out to optimize things 
under memory pressure.

Christian.

Am 03.10.2018 um 22:31 schrieb Philip Yang:
> Hi Christian,
>
> Yes, I agree. I am working on patch 2 to replace get_user_page with 
> HMM. One problem is in current gfx path, we check if mmu_invalidation 
> multiple times in amdgpu_cs_ioctl() path after get_user_page(), 
> amdgpu_cs_parser_bos(), amdgpu_cs_list_validate(), and 
> amdgpu_cs_submit(). For HMM, hmm_vma_range_done() has to be called 
> once and only once after hmm_vma_get_pfns()/hmm_vma_fault(), so I will 
> call hmm_vma_range_done() inside amdgpu_cs_submit after holding the mn 
> lock. Is my understanding correct?
>
> Philip
>
> On 2018-10-02 11:05 AM, Christian König wrote:
>> Checking more code and documentation and thinking about it over my 
>> vacation I think I have some new conclusions here.
>>
>> Currently we are using get_user_pages() together with an MMU notifier 
>> to guarantee coherent address space view, because get_user_pages() 
>> works by grabbing a reference to the pages and ignoring concurrent 
>> page table updates.
>>
>> But HMM uses a different approach by checking the address space for 
>> modifications using hmm_vma_range_done() and re-trying when the 
>> address space has changed.
>>
>> Now what you are trying to do is to change that into get_user_pages() 
>> and HMM callback and this is what won't work. We can either use 
>> get_user_pages() with MMU notifier or we can use HMM for the work, 
>> but we can't mix and match.
>>
>> So my initial guess was correct that we just need to change both 
>> sides of the implementation at the same time.
>>
>> Regards,
>> Christian.
>>
>> Am 28.09.2018 um 17:13 schrieb Koenig, Christian:
>>> No it definitely isn't.
>>>
>>> We have literally worked month on this with the core MM developers.
>>>
>>> Making sure that we have a consistent page array is absolutely vital 
>>> for correct operation.
>>>
>>> Please also check Jerome's presentation from XDC it also perfectly 
>>> explains why this approach won't work correctly.
>>>
>>> Christian.
>>>
>>> Am 28.09.2018 17:07 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
>>> For B path, we take mm->mmap_sem, then call hmm_vma_get_pfns() or 
>>> get_user_pages(). This is obvious.
>>>
>>> For A path, mmu notifier 
>>> mmu_notifier_invalidate_range_start()/mmu_notifier_invalidate_range_end() 
>>> is called in many places, and the calling path is quit complicated 
>>> inside mm, it's not obvious. I checked many of the them, for example:
>>>
>>> do_munmap()
>>>   down_write(&mm->mmap_sem)
>>>   arch_unmap()
>>>     mpx_notify_unmap()...
>>>        zap_bt_entries_mapping()
>>>          zap_page_range()
>>>  up_write(&mm->mmap_sem)
>>>
>>> void zap_page_range(struct vm_area_struct *vma, unsigned long start,
>>>         unsigned long size)
>>> {
>>>     struct mm_struct *mm = vma->vm_mm;
>>>     struct mmu_gather tlb;
>>>     unsigned long end = start + size;
>>>
>>>     lru_add_drain();
>>>     tlb_gather_mmu(&tlb, mm, start, end);
>>>     update_hiwater_rss(mm);
>>>     mmu_notifier_invalidate_range_start(mm, start, end);
>>>     for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
>>>         unmap_single_vma(&tlb, vma, start, end, NULL);
>>>     mmu_notifier_invalidate_range_end(mm, start, end);
>>>     tlb_finish_mmu(&tlb, start, end);
>>> }
>>>
>>> So AFAIK it's okay without invalidate_range_end() callback.
>>>
>>> Regards,
>>> Philip
>>>
>>> On 2018-09-28 01:25 AM, Koenig, Christian wrote:
>>>> No, that is incorrect as well :)
>>>>
>>>> The mmap_sem isn't necessary taken during page table updates.
>>>>
>>>> What you could do is replace get_user_pages() directly with HMM. If 
>>>> I'm not completely mistaken that should work as expected.
>>>>
>>>> Christian.
>>>>
>>>> Am 27.09.2018 22:18 schrieb "Yang, Philip" <Philip.Yang-5C7GfCeVMHo@public.gmane.org>:
>>>> I was trying to understand the way how HMM handle this concurrent 
>>>> issue and how we handle it in amdgpu_ttm_tt_userptr_needs_pages() 
>>>> and  amdgpu_ttm_tt_affect_userptr(). HMM uses range->valid flag, we 
>>>> use gtt->mmu_invalidations and gtt->last_set_pages. Both use the 
>>>> same lock plus flag idea actually.
>>>>
>>>> Thanks for the information, now I understand fence 
>>>> ttm_eu_fence_buffer_objects() put to BOs will block CPU page table 
>>>> update. This is another side of this concurrent issue I didn't know.
>>>>
>>>> I had same worry that it has issue without invalidate_range_end() 
>>>> callback as the calling sequence Felix lists. Now I think it's fine 
>>>> after taking a look again today because of mm->mmap_sem usage, this 
>>>> is my understanding:
>>>>
>>>> A path:
>>>>
>>>> down_write(&mm->mmap_sem);
>>>> mmu_notifier_invalidate_range_start()
>>>>     take_lock()
>>>>     release_lock()
>>>> CPU page table update
>>>> mmu_notifier_invalidate_range_end()
>>>> up_write(&mm->mmap_sem);
>>>>
>>>> B path:
>>>>
>>>> again:
>>>> down_read(&mm->mmap_sem);
>>>> hmm_vma_get_pfns()
>>>> up_read(&mm->mmap_sem);
>>>> ....
>>>> ....
>>>> take_lock()
>>>> if (!hmm_vma_range_done()) {
>>>>    release_lock()
>>>>    goto again
>>>> }
>>>> submit command job...
>>>> release_lock()
>>>>
>>>> If you agree, I will submit patch v5 with some minor changes, and 
>>>> submit another patch to replace get_user_page() with HMM.
>>>>
>>>> Regards,
>>>> Philip
>>>>
>>>> On 2018-09-27 11:36 AM, Christian König wrote:
>>>>> Yeah, I've read that as well.
>>>>>
>>>>> My best guess is that we just need to add a call to 
>>>>> hmm_vma_range_done() after taking the lock and also replace 
>>>>> get_user_pages() with hmm_vma_get_pfns().
>>>>>
>>>>> But I'm still not 100% sure how all of that is supposed to work 
>>>>> together.
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>> Am 27.09.2018 um 16:50 schrieb Kuehling, Felix:
>>>>>>
>>>>>> I think the answer is here: 
>>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/vm/hmm.rst#n216
>>>>>>
>>>>>> Regards,
>>>>>>
>>>>>>   Felix
>>>>>>
>>>>>> *From:*Koenig, Christian
>>>>>> *Sent:* Thursday, September 27, 2018 10:30 AM
>>>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>> *Cc:* j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org; Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>; 
>>>>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>>>> replace mmu notifier v4
>>>>>>
>>>>>> At least with get_user_pages() that is perfectly possible.
>>>>>>
>>>>>> For HMM it could be that this is prevented somehow.
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>> Am 27.09.2018 16:27 schrieb "Kuehling, Felix" 
>>>>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>>>
>>>>>> > In this case you can end up accessing pages which are invalidated while 
>>>>>> get_user_pages is in process.
>>>>>>
>>>>>> What’s the sequence of events you have in mind? Something like this?
>>>>>>
>>>>>>   * Page table is updated and triggers MMU notifier
>>>>>>   * amdgpu MMU notifier runs and waits for pending CS to finish
>>>>>>     while holding the read lock
>>>>>>   * New CS starts just after invalidate_range_start MMU notifier
>>>>>>     finishes but before the page table update is done
>>>>>>   * get_user_pages returns outdated physical addresses
>>>>>>
>>>>>> I hope that’s not actually possible and that get_user_pages or 
>>>>>> hmm_vma_fault would block until the page table update is done. 
>>>>>> That is, invalidate_range_start marks the start of a page table 
>>>>>> update, and while that update is in progress, get_user_pages or 
>>>>>> hmm_vma_fault block. Jerome, can you comment on that?
>>>>>>
>>>>>> Thanks,
>>>>>>   Felix
>>>>>>
>>>>>> *From:*Koenig, Christian
>>>>>> *Sent:* Thursday, September 27, 2018 9:59 AM
>>>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>>>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>>>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>>>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>>>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>>>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>>>> *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>>>> replace mmu notifier v4
>>>>>>
>>>>>> Yeah I understand that, but again that won't work.
>>>>>>
>>>>>> In this case you can end up accessing pages which are invalidated 
>>>>>> while get_user_pages is in process.
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>> Am 27.09.2018 15:41 schrieb "Kuehling, Felix" 
>>>>>> <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>>>
>>>>>> > I’m not planning to change that. I don’t think there is any need to 
>>>>>> change it.
>>>>>>
>>>>>> >
>>>>>> > Yeah, but when HMM doesn't provide both the start and the end 
>>>>>> hock of the invalidation this way won't work any more.
>>>>>> >
>>>>>> > So we need to find a solution for this,
>>>>>> > Christian.
>>>>>>
>>>>>> My whole argument is that you don’t need to hold the read lock 
>>>>>> until the invalidate_range_end. Just read_lock and read_unlock in 
>>>>>> the invalidate_range_start function.
>>>>>>
>>>>>> Regards,
>>>>>>
>>>>>>   Felix
>>>>>>
>>>>>> *From:*Koenig, Christian
>>>>>> *Sent:* Thursday, September 27, 2018 9:22 AM
>>>>>> *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org 
>>>>>> <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>
>>>>>> *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org 
>>>>>> <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
>>>>>> <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse 
>>>>>> <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>>>> *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to 
>>>>>> replace mmu notifier v4
>>>>>>
>>>>>> Am 27.09.2018 um 15:18 schrieb Kuehling, Felix:
>>>>>>
>>>>>>     > The problem is here:
>>>>>>     >
>>>>>>
>>>>>>     > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>>>     p->fence);
>>>>>>
>>>>>>     > amdgpu_mn_unlock(p->mn);
>>>>>>
>>>>>>     >
>>>>>>     > We need to hold the lock until the fence is added to the
>>>>>>     reservation object.
>>>>>>     >
>>>>>>     > Otherwise somebody could have changed the page tables just
>>>>>>     in the moment between the check of
>>>>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to
>>>>>>     the reservation object.
>>>>>>
>>>>>>     I’m not planning to change that. I don’t think there is any
>>>>>>     need to change it.
>>>>>>
>>>>>>
>>>>>> Yeah, but when HMM doesn't provide both the start and the end 
>>>>>> hock of the invalidation this way won't work any more.
>>>>>>
>>>>>> So we need to find a solution for this,
>>>>>> Christian.
>>>>>>
>>>>>>     Regards,
>>>>>>
>>>>>>       Felix
>>>>>>
>>>>>>     *From:*Koenig, Christian
>>>>>>     *Sent:* Thursday, September 27, 2018 7:24 AM
>>>>>>     *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>>     <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>>     *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>>>     <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>>     <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>>     <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>>     *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror callback to
>>>>>>     replace mmu notifier v4
>>>>>>
>>>>>>     Am 27.09.2018 um 13:08 schrieb Kuehling, Felix:
>>>>>>
>>>>>>         > We double check that there wasn't any page table
>>>>>>         modification while we prepared the submission and restart
>>>>>>         the whole process when there actually was some update.
>>>>>>         >
>>>>>>         > The reason why we need to do this is here:
>>>>>>         >
>>>>>>
>>>>>>         > ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>>>         p->fence);
>>>>>>         > amdgpu_mn_unlock(p->mn);
>>>>>>
>>>>>>         >
>>>>>>         > Only after the new fence is added to the buffer object
>>>>>>         we can release the lock so that any invalidation will now
>>>>>>         block on our command submission to finish before it
>>>>>>         modifies the page table.
>>>>>>
>>>>>>         I don’t see why this requires holding the read-lock until
>>>>>>         invalidate_range_end. amdgpu_ttm_tt_affect_userptr gets
>>>>>>         called while the mn read-lock is held in
>>>>>>         invalidate_range_start notifier.
>>>>>>
>>>>>>
>>>>>>     That's not related to amdgpu_ttm_tt_affect_userptr(), this
>>>>>>     function could actually be called outside the lock.
>>>>>>
>>>>>>     The problem is here:
>>>>>>
>>>>>>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated,
>>>>>>         p->fence);
>>>>>>
>>>>>>         amdgpu_mn_unlock(p->mn);
>>>>>>
>>>>>>
>>>>>>     We need to hold the lock until the fence is added to the
>>>>>>     reservation object.
>>>>>>
>>>>>>     Otherwise somebody could have changed the page tables just in
>>>>>>     the moment between the check of
>>>>>>     amdgpu_ttm_tt_userptr_needs_pages() and adding the fence to
>>>>>>     the reservation object.
>>>>>>
>>>>>>     Regards,
>>>>>>     Christian.
>>>>>>
>>>>>>
>>>>>>         Regards,
>>>>>>
>>>>>>           Felix
>>>>>>
>>>>>>         *From:*Koenig, Christian
>>>>>>         *Sent:* Thursday, September 27, 2018 5:27 AM
>>>>>>         *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>>         <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>>         *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>>>         <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>>>>>>         amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>>         <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>>         <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>>         *Subject:* Re: [PATCH] drm/amdgpu: use HMM mirror
>>>>>>         callback to replace mmu notifier v4
>>>>>>
>>>>>>         That is correct, but take a look what we do when after
>>>>>>         calling the amdgpu_mn_read_lock():
>>>>>>
>>>>>>
>>>>>>                     /* No memory allocation is allowed while
>>>>>>             holding the mn lock */
>>>>>>             amdgpu_mn_lock(p->mn);
>>>>>>             amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
>>>>>>                             struct amdgpu_bo *bo =
>>>>>>             ttm_to_amdgpu_bo(e->tv.bo);
>>>>>>
>>>>>>                             if
>>>>>>             (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
>>>>>>                                     r = -ERESTARTSYS;
>>>>>>                                     goto error_abort;
>>>>>>                             }
>>>>>>                     }
>>>>>>
>>>>>>
>>>>>>         We double check that there wasn't any page table
>>>>>>         modification while we prepared the submission and restart
>>>>>>         the whole process when there actually was some update.
>>>>>>
>>>>>>         The reason why we need to do this is here:
>>>>>>
>>>>>>             ttm_eu_fence_buffer_objects(&p->ticket,
>>>>>>             &p->validated, p->fence);
>>>>>>             amdgpu_mn_unlock(p->mn);
>>>>>>
>>>>>>
>>>>>>         Only after the new fence is added to the buffer object we
>>>>>>         can release the lock so that any invalidation will now
>>>>>>         block on our command submission to finish before it
>>>>>>         modifies the page table.
>>>>>>
>>>>>>         The only other option would be to add the fence first and
>>>>>>         then check if there was any update to the page tables.
>>>>>>
>>>>>>         The issue with that approach is that adding a fence can't
>>>>>>         be made undone, so if we find that there actually was an
>>>>>>         update to the page tables we would need to somehow turn
>>>>>>         the CS into a dummy (e.g. overwrite all IBs with NOPs or
>>>>>>         something like that) and still submit it.
>>>>>>
>>>>>>         Not sure if that is actually possible.
>>>>>>
>>>>>>         Regards,
>>>>>>         Christian.
>>>>>>
>>>>>>         Am 27.09.2018 um 10:47 schrieb Kuehling, Felix:
>>>>>>
>>>>>>             So back to my previous question:
>>>>>>
>>>>>>             >> But do we really need another lock for this?
>>>>>>             Wouldn't the
>>>>>>
>>>>>>             >> re-validation of userptr BOs (currently calling
>>>>>>             get_user_pages) force
>>>>>>
>>>>>>             >> synchronization with the ongoing page table
>>>>>>             invalidation through the
>>>>>>
>>>>>>             >> mmap_sem or other MM locks?
>>>>>>
>>>>>>             >
>>>>>>
>>>>>>             > No and yes. We don't hold any other locks while
>>>>>>             doing command submission, but I expect that HMM has
>>>>>>             its own mechanism to prevent that.
>>>>>>
>>>>>>             >
>>>>>>
>>>>>>             > Since we don't modify
>>>>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly
>>>>>>             not using this mechanism correctly.
>>>>>>
>>>>>>             The existing amdgpu_mn_lock/unlock should block the
>>>>>>             MMU notifier while a command submission is in
>>>>>>             progress. It should also block command submission
>>>>>>             while an MMU notifier is in progress.
>>>>>>
>>>>>>             What we lose with HMM is the ability to hold a
>>>>>>             read-lock for the entire duration of the
>>>>>>             invalidate_range_start until invalidate_range_end. As
>>>>>>             I understand it, that lock is meant to prevent new
>>>>>>             command submissions while the page tables are being
>>>>>>             updated by the kernel. But my point is, that
>>>>>>             get_user_pages or hmm_vma_fault should do the same
>>>>>>             kind of thing. Before the command submission can go
>>>>>>             ahead, it needs to update the userptr addresses. If
>>>>>>             the page tables are still being updated, it will
>>>>>>             block there even without holding the amdgpu_mn_read_lock.
>>>>>>
>>>>>>             Regards,
>>>>>>
>>>>>>               Felix
>>>>>>
>>>>>>             *From:* Koenig, Christian
>>>>>>             *Sent:* Thursday, September 27, 2018 3:00 AM
>>>>>>             *To:* Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>>>>>             *Cc:* Yang, Philip <Philip.Yang-5C7GfCeVMHo@public.gmane.org>
>>>>>>             <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>;
>>>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>>             *Subject:* RE: [PATCH] drm/amdgpu: use HMM mirror
>>>>>>             callback to replace mmu notifier v4
>>>>>>
>>>>>>             No, that won't work. We would still run into lock
>>>>>>             inversion problems.
>>>>>>
>>>>>>             What we could do with the scheduler is to turn
>>>>>>             submissions into dummies if we find that the page
>>>>>>             tables are now outdated.
>>>>>>
>>>>>>             But that would be really hacky and I'm not sure if
>>>>>>             that would really work in all cases.
>>>>>>
>>>>>>             Christian.
>>>>>>
>>>>>>             Am 27.09.2018 08:53 schrieb "Kuehling, Felix"
>>>>>>             <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>:
>>>>>>
>>>>>>             I had a chat with Jerome yesterday. He pointed out
>>>>>>             that the new blockable parameter can be used to infer
>>>>>>             whether the MMU notifier is being called  in a
>>>>>>             reclaim operation. So if blockable==true, it should
>>>>>>             even be safe to take the BO reservation lock without
>>>>>>             problems. I think with that we should be able to
>>>>>>             remove the read-write locking completely and go back
>>>>>>             to locking (or try-locking for blockable==false) the
>>>>>>             reservation locks in the MMU notifier?
>>>>>>
>>>>>>             Regards,
>>>>>>               Felix
>>>>>>
>>>>>>             -----Original Message-----
>>>>>>             From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>>             <mailto:amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> On
>>>>>>             Behalf Of Christian König
>>>>>>             Sent: Saturday, September 15, 2018 3:47 AM
>>>>>>             To: Kuehling, Felix <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org
>>>>>>             <mailto:Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>>; Yang, Philip
>>>>>>             <Philip.Yang-5C7GfCeVMHo@public.gmane.org <mailto:Philip.Yang-5C7GfCeVMHo@public.gmane.org>>;
>>>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>; Jerome Glisse
>>>>>>             <j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org <mailto:j.glisse-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>>>>>>             Subject: Re: [PATCH] drm/amdgpu: use HMM mirror
>>>>>>             callback to replace mmu notifier v4
>>>>>>
>>>>>>             Am 14.09.2018 um 22:21 schrieb Felix Kuehling:
>>>>>>             > On 2018-09-14 01:52 PM, Christian König wrote:
>>>>>>             >> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>>>>>>             >>> On 2018-09-14 03:51 AM, Christian König wrote:
>>>>>>             >>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>>>>             >>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>>>             >>>>> [SNIP]
>>>>>>             >>>>>>    + amdgpu_mn_read_unlock(amn);
>>>>>>             >>>>>> +
>>>>>>             >>>>> amdgpu_mn_read_lock/unlock support recursive
>>>>>>             locking for multiple
>>>>>>             >>>>> overlapping or nested invalidation ranges. But
>>>>>>             if you'r locking
>>>>>>             >>>>> and unlocking in the same function. Is that
>>>>>>             still a concern?
>>>>>>             >>> I don't understand the possible recursive case, but
>>>>>>             >>> amdgpu_mn_read_lock() still support recursive
>>>>>>             locking.
>>>>>>             >>>> Well the real problem is that unlocking them
>>>>>>             here won't work.
>>>>>>             >>>>
>>>>>>             >>>> We need to hold the lock until we are sure that
>>>>>>             the operation which
>>>>>>             >>>> updates the page tables is completed.
>>>>>>             >>>>
>>>>>>             >>> The reason for this change is because hmm mirror has
>>>>>>             >>> invalidate_start callback, no invalidate_end callback
>>>>>>             >>>
>>>>>>             >>> Check mmu_notifier.c and hmm.c again, below is
>>>>>>             entire logic to
>>>>>>             >>> update CPU page tables and callback:
>>>>>>             >>>
>>>>>>             >>> mn lock amn->lock is used to protect interval
>>>>>>             tree access because
>>>>>>             >>> user may submit/register new userptr anytime.
>>>>>>             >>> This is same for old and new way.
>>>>>>             >>>
>>>>>>             >>> step 2 guarantee the GPU operation is done before
>>>>>>             updating CPU page
>>>>>>             >>> table.
>>>>>>             >>>
>>>>>>             >>> So I think the change is safe. We don't need hold
>>>>>>             mn lock until the
>>>>>>             >>> CPU page tables update is completed.
>>>>>>             >> No, that isn't even remotely correct. The lock
>>>>>>             doesn't protects the
>>>>>>             >> interval tree.
>>>>>>             >>
>>>>>>             >>> Old:
>>>>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>>>>             >>>     2. loop to handle BOs from node->bos through
>>>>>>             interval tree
>>>>>>             >>> amn->object nodes
>>>>>>             >>>         gfx: wait for pending BOs fence operation
>>>>>>             done, mark user
>>>>>>             >>> pages dirty
>>>>>>             >>>         kfd: evict user queues of the process,
>>>>>>             wait for queue
>>>>>>             >>> unmap/map operation done
>>>>>>             >>>     3. update CPU page tables
>>>>>>             >>>     4. up_read(&amn->lock)
>>>>>>             >>>
>>>>>>             >>> New, switch step 3 and 4
>>>>>>             >>>     1. down_read_non_owner(&amn->lock)
>>>>>>             >>>     2. loop to handle BOs from node->bos through
>>>>>>             interval tree
>>>>>>             >>> amn->object nodes
>>>>>>             >>>         gfx: wait for pending BOs fence operation
>>>>>>             done, mark user
>>>>>>             >>> pages dirty
>>>>>>             >>>         kfd: evict user queues of the process,
>>>>>>             wait for queue
>>>>>>             >>> unmap/map operation done
>>>>>>             >>>     3. up_read(&amn->lock)
>>>>>>             >>>     4. update CPU page tables
>>>>>>             >> The lock is there to make sure that we serialize
>>>>>>             page table updates
>>>>>>             >> with command submission.
>>>>>>             > As I understand it, the idea is to prevent command
>>>>>>             submission (adding
>>>>>>             > new fences to BOs) while a page table invalidation
>>>>>>             is in progress.
>>>>>>
>>>>>>             Yes, exactly.
>>>>>>
>>>>>>             > But do we really need another lock for this?
>>>>>>             Wouldn't the
>>>>>>             > re-validation of userptr BOs (currently calling
>>>>>>             get_user_pages) force
>>>>>>             > synchronization with the ongoing page table
>>>>>>             invalidation through the
>>>>>>             > mmap_sem or other MM locks?
>>>>>>
>>>>>>             No and yes. We don't hold any other locks while doing
>>>>>>             command submission, but I expect that HMM has its own
>>>>>>             mechanism to prevent that.
>>>>>>
>>>>>>             Since we don't modify
>>>>>>             amdgpu_mn_lock()/amdgpu_mn_unlock() we are certainly
>>>>>>             not using this mechanism correctly.
>>>>>>
>>>>>>             Regards,
>>>>>>             Christian.
>>>>>>             _______________________________________________
>>>>>>             amd-gfx mailing list
>>>>>>             amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>>             <mailto:amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>>>>>>             https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>
>>>>>
>>>>
>>>
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 72393 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2018-10-04  7:20 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-13 20:52 [PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4 Philip Yang
     [not found] ` <1536871954-8451-1-git-send-email-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2018-09-13 21:51   ` Felix Kuehling
     [not found]     ` <9d6717ac-23f0-7beb-6e41-58c6e32acdf8-5C7GfCeVMHo@public.gmane.org>
2018-09-14  7:51       ` Christian König
     [not found]         ` <58bc3bb9-b7b1-a32f-e355-c78a23d95215-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-14 17:47           ` Philip Yang
     [not found]             ` <383388c8-1bff-48d9-1044-f16e66bcbfa5-5C7GfCeVMHo@public.gmane.org>
2018-09-14 17:52               ` Christian König
     [not found]                 ` <3850fbeb-5d91-9c14-43c9-45d5d058e15b-5C7GfCeVMHo@public.gmane.org>
2018-09-14 20:21                   ` Felix Kuehling
     [not found]                     ` <de28cee0-3461-4f99-eeae-b793de00ca58-5C7GfCeVMHo@public.gmane.org>
2018-09-15  7:46                       ` Christian König
     [not found]                         ` <e4cf7212-4340-8639-c8c1-057e4d1083f0-5C7GfCeVMHo@public.gmane.org>
2018-09-27  6:53                           ` Kuehling, Felix
     [not found]                             ` <DM5PR12MB17078469EB6D3AF1D53B788992140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-27  6:59                               ` Koenig, Christian
     [not found]                                 ` <a76b71ac-4b5b-45d7-b48b-6d0e4a7e7524-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2018-09-27  8:47                                   ` Kuehling, Felix
     [not found]                                     ` <DM5PR12MB1707D5E46617B2936F800F1992140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-27  9:27                                       ` Christian König
     [not found]                                         ` <d752c19b-6d2e-c7c1-1cd7-651e25b8f708-5C7GfCeVMHo@public.gmane.org>
2018-09-27 11:08                                           ` Kuehling, Felix
     [not found]                                             ` <DM5PR12MB17077A78E0F95BFBA57F2E1A92140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-27 11:23                                               ` Christian König
     [not found]                                                 ` <425fe859-c780-48a5-a2c6-c3bf2b9abb38-5C7GfCeVMHo@public.gmane.org>
2018-09-27 13:18                                                   ` Kuehling, Felix
     [not found]                                                     ` <DM5PR12MB17072AE77EB0DE3AD22B8D7892140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-27 13:21                                                       ` Christian König
     [not found]                                                         ` <9b427976-f2ff-8ba1-6ebf-588ca95aef80-5C7GfCeVMHo@public.gmane.org>
2018-09-27 13:41                                                           ` Kuehling, Felix
     [not found]                                                             ` <DM5PR12MB17072879EC907150027D6F7892140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-27 13:58                                                               ` Koenig, Christian
     [not found]                                                                 ` <58199419-e20f-4ab0-ac1d-a7eb79f5c6f7-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2018-09-27 14:27                                                                   ` Kuehling, Felix
     [not found]                                                                     ` <DM5PR12MB1707273AC4B0C03A3BEDF73092140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-27 14:29                                                                       ` Koenig, Christian
     [not found]                                                                         ` <1068c389-56fc-4125-ac40-b1ef2d60eabd-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2018-09-27 14:50                                                                           ` Kuehling, Felix
     [not found]                                                                             ` <DM5PR12MB1707BCD7BFC10EDD594FE90B92140-2J9CzHegvk9TCtO+SvGBKwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-27 15:36                                                                               ` Christian König
     [not found]                                                                                 ` <11ba3857-9bb0-648e-2806-0533090d9a0a-5C7GfCeVMHo@public.gmane.org>
2018-09-27 20:17                                                                                   ` Philip Yang
     [not found]                                                                                     ` <16d1faf6-80a4-dc46-bd2a-9cd475808e98-5C7GfCeVMHo@public.gmane.org>
2018-09-28  5:25                                                                                       ` Koenig, Christian
     [not found]                                                                                         ` <8f9f5703-214f-488d-9cfe-ccc64e8cd009-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2018-09-28 15:07                                                                                           ` Philip Yang
     [not found]                                                                                             ` <fe0d429b-5038-a297-e02e-423302544477-5C7GfCeVMHo@public.gmane.org>
2018-09-28 15:13                                                                                               ` Koenig, Christian
     [not found]                                                                                                 ` <b8686e6b-0c3e-4feb-afbd-80397aac31a0-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2018-10-02 15:05                                                                                                   ` Christian König
     [not found]                                                                                                     ` <09916f9a-3f5f-27ab-01e6-6d77303cf052-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-10-03 20:31                                                                                                       ` Philip Yang
     [not found]                                                                                                         ` <bf806477-06b3-61de-fea3-5ad260d92cdd-5C7GfCeVMHo@public.gmane.org>
2018-10-04  7:20                                                                                                           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.