All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rob Clark <robdclark@gmail.com>
To: dri-devel@lists.freedesktop.org
Cc: Daniel Vetter <daniel@ffwll.ch>,
	freedreno@lists.freedesktop.org, linux-arm-msm@vger.kernel.org,
	Rob Clark <robdclark@chromium.org>,
	Rob Clark <robdclark@gmail.com>,
	Abhinav Kumar <quic_abhinavk@quicinc.com>,
	Dmitry Baryshkov <dmitry.baryshkov@linaro.org>,
	Sean Paul <sean@poorly.run>, David Airlie <airlied@gmail.com>,
	linux-kernel@vger.kernel.org (open list)
Subject: [PATCH v2 04/23] drm/msm: Decouple vma tracking from obj lock
Date: Mon, 20 Mar 2023 07:43:26 -0700	[thread overview]
Message-ID: <20230320144356.803762-5-robdclark@gmail.com> (raw)
In-Reply-To: <20230320144356.803762-1-robdclark@gmail.com>

From: Rob Clark <robdclark@chromium.org>

We need to use the inuse count to track that a BO is pinned until
we have the hw_fence.  But we want to remove the obj lock from the
job_run() path as this could deadlock against reclaim/shrinker
(because it is blocking the hw_fence from eventually being signaled).
So split that tracking out into a per-vma lock with narrower scope.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.h        |  1 +
 drivers/gpu/drm/msm/msm_gem_vma.c    | 44 ++++++++++++++++++++++++----
 drivers/gpu/drm/msm/msm_ringbuffer.c |  2 +-
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index d3219c523034..1929f09c5b0d 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -59,6 +59,7 @@ struct msm_fence_context;
 
 struct msm_gem_vma {
 	struct drm_mm_node node;
+	spinlock_t lock;
 	uint64_t iova;
 	struct msm_gem_address_space *aspace;
 	struct list_head list;    /* node in msm_gem_object::vmas */
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 2827679dc39a..98287ed99960 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -40,19 +40,28 @@ msm_gem_address_space_get(struct msm_gem_address_space *aspace)
 
 bool msm_gem_vma_inuse(struct msm_gem_vma *vma)
 {
+	bool ret = true;
+
+	spin_lock(&vma->lock);
+
 	if (vma->inuse > 0)
-		return true;
+		goto out;
 
 	while (vma->fence_mask) {
 		unsigned idx = ffs(vma->fence_mask) - 1;
 
 		if (!msm_fence_completed(vma->fctx[idx], vma->fence[idx]))
-			return true;
+			goto out;
 
 		vma->fence_mask &= ~BIT(idx);
 	}
 
-	return false;
+	ret = false;
+
+out:
+	spin_unlock(&vma->lock);
+
+	return ret;
 }
 
 /* Actually unmap memory for the vma */
@@ -73,8 +82,7 @@ void msm_gem_vma_purge(struct msm_gem_vma *vma)
 	vma->mapped = false;
 }
 
-/* Remove reference counts for the mapping */
-void msm_gem_vma_unpin(struct msm_gem_vma *vma)
+static void vma_unpin_locked(struct msm_gem_vma *vma)
 {
 	if (GEM_WARN_ON(!vma->inuse))
 		return;
@@ -82,13 +90,23 @@ void msm_gem_vma_unpin(struct msm_gem_vma *vma)
 		vma->inuse--;
 }
 
+/* Remove reference counts for the mapping */
+void msm_gem_vma_unpin(struct msm_gem_vma *vma)
+{
+	spin_lock(&vma->lock);
+	vma_unpin_locked(vma);
+	spin_unlock(&vma->lock);
+}
+
 /* Replace pin reference with fence: */
 void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx)
 {
+	spin_lock(&vma->lock);
 	vma->fctx[fctx->index] = fctx;
 	vma->fence[fctx->index] = fctx->last_fence;
 	vma->fence_mask |= BIT(fctx->index);
-	msm_gem_vma_unpin(vma);
+	vma_unpin_locked(vma);
+	spin_unlock(&vma->lock);
 }
 
 /* Map and pin vma: */
@@ -103,7 +121,9 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot,
 		return -EINVAL;
 
 	/* Increase the usage counter */
+	spin_lock(&vma->lock);
 	vma->inuse++;
+	spin_unlock(&vma->lock);
 
 	if (vma->mapped)
 		return 0;
@@ -113,11 +133,22 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot,
 	if (!aspace)
 		return 0;
 
+	/*
+	 * NOTE: iommu/io-pgtable can allocate pages, so we cannot hold
+	 * a lock across map/unmap which is also used in the job_run()
+	 * path, as this can cause deadlock in job_run() vs shrinker/
+	 * reclaim.
+	 *
+	 * Revisit this if we can come up with a scheme to pre-alloc pages
+	 * for the pgtable in map/unmap ops.
+	 */
 	ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova, sgt, size, prot);
 
 	if (ret) {
 		vma->mapped = false;
+		spin_lock(&vma->lock);
 		vma->inuse--;
+		spin_unlock(&vma->lock);
 	}
 
 	return ret;
@@ -148,6 +179,7 @@ struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace)
 	if (!vma)
 		return NULL;
 
+	spin_lock_init(&vma->lock);
 	vma->aspace = aspace;
 
 	return vma;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 44a22b283730..31b4fbf96c36 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -23,8 +23,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct drm_gem_object *obj = &submit->bos[i].obj->base;
 
-		msm_gem_lock(obj);
 		msm_gem_vma_unpin_fenced(submit->bos[i].vma, fctx);
+		msm_gem_lock(obj);
 		msm_gem_unpin_locked(obj);
 		msm_gem_unlock(obj);
 		submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED);
-- 
2.39.2


WARNING: multiple messages have this Message-ID (diff)
From: Rob Clark <robdclark@gmail.com>
To: dri-devel@lists.freedesktop.org
Cc: Rob Clark <robdclark@chromium.org>,
	linux-arm-msm@vger.kernel.org,
	Abhinav Kumar <quic_abhinavk@quicinc.com>,
	open list <linux-kernel@vger.kernel.org>,
	Sean Paul <sean@poorly.run>,
	Dmitry Baryshkov <dmitry.baryshkov@linaro.org>,
	freedreno@lists.freedesktop.org
Subject: [PATCH v2 04/23] drm/msm: Decouple vma tracking from obj lock
Date: Mon, 20 Mar 2023 07:43:26 -0700	[thread overview]
Message-ID: <20230320144356.803762-5-robdclark@gmail.com> (raw)
In-Reply-To: <20230320144356.803762-1-robdclark@gmail.com>

From: Rob Clark <robdclark@chromium.org>

We need to use the inuse count to track that a BO is pinned until
we have the hw_fence.  But we want to remove the obj lock from the
job_run() path as this could deadlock against reclaim/shrinker
(because it is blocking the hw_fence from eventually being signaled).
So split that tracking out into a per-vma lock with narrower scope.

Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.h        |  1 +
 drivers/gpu/drm/msm/msm_gem_vma.c    | 44 ++++++++++++++++++++++++----
 drivers/gpu/drm/msm/msm_ringbuffer.c |  2 +-
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index d3219c523034..1929f09c5b0d 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -59,6 +59,7 @@ struct msm_fence_context;
 
 struct msm_gem_vma {
 	struct drm_mm_node node;
+	spinlock_t lock;
 	uint64_t iova;
 	struct msm_gem_address_space *aspace;
 	struct list_head list;    /* node in msm_gem_object::vmas */
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 2827679dc39a..98287ed99960 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -40,19 +40,28 @@ msm_gem_address_space_get(struct msm_gem_address_space *aspace)
 
 bool msm_gem_vma_inuse(struct msm_gem_vma *vma)
 {
+	bool ret = true;
+
+	spin_lock(&vma->lock);
+
 	if (vma->inuse > 0)
-		return true;
+		goto out;
 
 	while (vma->fence_mask) {
 		unsigned idx = ffs(vma->fence_mask) - 1;
 
 		if (!msm_fence_completed(vma->fctx[idx], vma->fence[idx]))
-			return true;
+			goto out;
 
 		vma->fence_mask &= ~BIT(idx);
 	}
 
-	return false;
+	ret = false;
+
+out:
+	spin_unlock(&vma->lock);
+
+	return ret;
 }
 
 /* Actually unmap memory for the vma */
@@ -73,8 +82,7 @@ void msm_gem_vma_purge(struct msm_gem_vma *vma)
 	vma->mapped = false;
 }
 
-/* Remove reference counts for the mapping */
-void msm_gem_vma_unpin(struct msm_gem_vma *vma)
+static void vma_unpin_locked(struct msm_gem_vma *vma)
 {
 	if (GEM_WARN_ON(!vma->inuse))
 		return;
@@ -82,13 +90,23 @@ void msm_gem_vma_unpin(struct msm_gem_vma *vma)
 		vma->inuse--;
 }
 
+/* Remove reference counts for the mapping */
+void msm_gem_vma_unpin(struct msm_gem_vma *vma)
+{
+	spin_lock(&vma->lock);
+	vma_unpin_locked(vma);
+	spin_unlock(&vma->lock);
+}
+
 /* Replace pin reference with fence: */
 void msm_gem_vma_unpin_fenced(struct msm_gem_vma *vma, struct msm_fence_context *fctx)
 {
+	spin_lock(&vma->lock);
 	vma->fctx[fctx->index] = fctx;
 	vma->fence[fctx->index] = fctx->last_fence;
 	vma->fence_mask |= BIT(fctx->index);
-	msm_gem_vma_unpin(vma);
+	vma_unpin_locked(vma);
+	spin_unlock(&vma->lock);
 }
 
 /* Map and pin vma: */
@@ -103,7 +121,9 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot,
 		return -EINVAL;
 
 	/* Increase the usage counter */
+	spin_lock(&vma->lock);
 	vma->inuse++;
+	spin_unlock(&vma->lock);
 
 	if (vma->mapped)
 		return 0;
@@ -113,11 +133,22 @@ msm_gem_vma_map(struct msm_gem_vma *vma, int prot,
 	if (!aspace)
 		return 0;
 
+	/*
+	 * NOTE: iommu/io-pgtable can allocate pages, so we cannot hold
+	 * a lock across map/unmap which is also used in the job_run()
+	 * path, as this can cause deadlock in job_run() vs shrinker/
+	 * reclaim.
+	 *
+	 * Revisit this if we can come up with a scheme to pre-alloc pages
+	 * for the pgtable in map/unmap ops.
+	 */
 	ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova, sgt, size, prot);
 
 	if (ret) {
 		vma->mapped = false;
+		spin_lock(&vma->lock);
 		vma->inuse--;
+		spin_unlock(&vma->lock);
 	}
 
 	return ret;
@@ -148,6 +179,7 @@ struct msm_gem_vma *msm_gem_vma_new(struct msm_gem_address_space *aspace)
 	if (!vma)
 		return NULL;
 
+	spin_lock_init(&vma->lock);
 	vma->aspace = aspace;
 
 	return vma;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 44a22b283730..31b4fbf96c36 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -23,8 +23,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct drm_gem_object *obj = &submit->bos[i].obj->base;
 
-		msm_gem_lock(obj);
 		msm_gem_vma_unpin_fenced(submit->bos[i].vma, fctx);
+		msm_gem_lock(obj);
 		msm_gem_unpin_locked(obj);
 		msm_gem_unlock(obj);
 		submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED);
-- 
2.39.2


  parent reply	other threads:[~2023-03-20 14:44 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-20 14:43 [PATCH v2 00/23] drm/msm+PM+icc: Make job_run() reclaim-safe Rob Clark
2023-03-20 14:43 ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 01/23] drm/msm: Pre-allocate hw_fence Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 16:52   ` Christian König
2023-03-20 16:52     ` Christian König
2023-03-20 20:32     ` Rob Clark
2023-03-20 20:32       ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 02/23] drm/msm: Move submit bo flags update from obj lock Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 03/23] drm/msm/gem: Tidy up VMA API Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` Rob Clark [this message]
2023-03-20 14:43   ` [PATCH v2 04/23] drm/msm: Decouple vma tracking from obj lock Rob Clark
2023-03-20 14:43 ` [PATCH v2 05/23] drm/msm/gem: Simplify vmap vs LRU tracking Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 06/23] drm/gem: Export drm_gem_lru_move_tail_locked() Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 07/23] drm/msm/gem: Move update_lru() Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 08/23] drm/msm/gem: Protect pin_count/madv by LRU lock Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 09/23] drm/msm/gem: Avoid obj lock in job_run() Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 10/23] drm/msm: Switch idr_lock to spinlock Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 11/23] drm/msm: Use idr_preload() Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 12/23] drm/msm/gpu: Move fw loading out of hw_init() path Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 13/23] drm/msm/gpu: Move BO allocation out of hw_init Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 14/23] drm/msm/a6xx: Move ioremap out of hw_init path Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 15/23] PM / devfreq: Drop unneed locking to appease lockdep Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 16/23] PM / devfreq: Teach lockdep about locking order Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 17/23] PM / QoS: Fix constraints alloc vs reclaim locking Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-27 17:53   ` Rafael J. Wysocki
2023-03-27 17:53     ` Rafael J. Wysocki
2023-03-27 19:52     ` Rob Clark
2023-03-27 19:52       ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 18/23] PM / QoS: Decouple request alloc from dev_pm_qos_mtx Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 20:13   ` kernel test robot
2023-03-20 20:13     ` kernel test robot
2023-03-20 20:34   ` kernel test robot
2023-03-20 20:34     ` kernel test robot
2023-03-21  4:53   ` Dan Carpenter
2023-03-21  4:53     ` Dan Carpenter
2023-03-20 14:43 ` [PATCH v2 19/23] PM / QoS: Teach lockdep about dev_pm_qos_mtx locking order Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 20/23] soc: qcom: smd-rpm: Use GFP_ATOMIC in write path Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 21/23] interconnect: Fix locking for runpm vs reclaim Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 22/23] interconnect: Teach lockdep about icc_bw_lock order Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-20 14:43 ` [PATCH v2 23/23] drm/sched: Add (optional) fence signaling annotation Rob Clark
2023-03-20 14:43   ` Rob Clark
2023-03-21  2:55   ` Luben Tuikov
2023-03-21  2:55     ` Luben Tuikov
2023-04-07 17:41 ` (subset) [PATCH v2 00/23] drm/msm+PM+icc: Make job_run() reclaim-safe Bjorn Andersson
2023-04-07 17:41   ` Bjorn Andersson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230320144356.803762-5-robdclark@gmail.com \
    --to=robdclark@gmail.com \
    --cc=airlied@gmail.com \
    --cc=daniel@ffwll.ch \
    --cc=dmitry.baryshkov@linaro.org \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=freedreno@lists.freedesktop.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=quic_abhinavk@quicinc.com \
    --cc=robdclark@chromium.org \
    --cc=sean@poorly.run \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.