All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <deathsimple@vodafone.de>
To: dri-devel@lists.freedesktop.org
Cc: "Christian König" <deathsimple@vodafone.de>
Subject: [PATCH 05/13] drm/radeon: rework gpu lockup detection and processing
Date: Fri, 20 Apr 2012 00:39:12 +0200	[thread overview]
Message-ID: <1334875160-5454-6-git-send-email-deathsimple@vodafone.de> (raw)
In-Reply-To: <1334875160-5454-1-git-send-email-deathsimple@vodafone.de>

Previusly multiple ring could trigger multiple GPU
resets at the same time.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h       |    3 +-
 drivers/gpu/drm/radeon/radeon_fence.c |  146 +++++++++++++++++----------------
 2 files changed, 75 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8801657..85a3aa9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -255,8 +255,7 @@ struct radeon_fence_driver {
 	volatile uint32_t		*cpu_addr;
 	atomic_t			seq;
 	uint32_t			last_seq;
-	unsigned long			last_jiffies;
-	unsigned long			last_timeout;
+	unsigned long			last_activity;
 	wait_queue_head_t		queue;
 	struct list_head		created;
 	struct list_head		emitted;
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 36c411f..1a9765a 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -74,6 +74,10 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 	radeon_fence_ring_emit(rdev, fence->ring, fence);
 	trace_radeon_fence_emit(rdev->ddev, fence->seq);
 	fence->emitted = true;
+	/* are we the first fence on a previusly idle ring? */
+	if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
+		rdev->fence_drv[fence->ring].last_activity = jiffies;
+	}
 	list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
 	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	return 0;
@@ -85,34 +89,14 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 	struct list_head *i, *n;
 	uint32_t seq;
 	bool wake = false;
-	unsigned long cjiffies;
 
 	seq = radeon_fence_read(rdev, ring);
-	if (seq != rdev->fence_drv[ring].last_seq) {
-		rdev->fence_drv[ring].last_seq = seq;
-		rdev->fence_drv[ring].last_jiffies = jiffies;
-		rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-	} else {
-		cjiffies = jiffies;
-		if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) {
-			cjiffies -= rdev->fence_drv[ring].last_jiffies;
-			if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) {
-				/* update the timeout */
-				rdev->fence_drv[ring].last_timeout -= cjiffies;
-			} else {
-				/* the 500ms timeout is elapsed we should test
-				 * for GPU lockup
-				 */
-				rdev->fence_drv[ring].last_timeout = 1;
-			}
-		} else {
-			/* wrap around update last jiffies, we will just wait
-			 * a little longer
-			 */
-			rdev->fence_drv[ring].last_jiffies = cjiffies;
-		}
+	if (seq == rdev->fence_drv[ring].last_seq)
 		return false;
-	}
+
+	rdev->fence_drv[ring].last_seq = seq;
+	rdev->fence_drv[ring].last_activity = jiffies;
+
 	n = NULL;
 	list_for_each(i, &rdev->fence_drv[ring].emitted) {
 		fence = list_entry(i, struct radeon_fence, list);
@@ -207,66 +191,84 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 	struct radeon_device *rdev;
 	unsigned long irq_flags, timeout;
 	u32 seq;
-	int r;
+	int i, r;
+	bool signaled;
 
 	if (fence == NULL) {
 		WARN(1, "Querying an invalid fence : %p !\n", fence);
-		return 0;
+		return -EINVAL;
 	}
+
 	rdev = fence->rdev;
-	if (radeon_fence_signaled(fence)) {
-		return 0;
-	}
-	timeout = rdev->fence_drv[fence->ring].last_timeout;
-retry:
-	/* save current sequence used to check for GPU lockup */
-	seq = rdev->fence_drv[fence->ring].last_seq;
-	trace_radeon_fence_wait_begin(rdev->ddev, seq);
-	if (intr) {
+	signaled = radeon_fence_signaled(fence);
+	while (!signaled) {
+		read_lock_irqsave(&rdev->fence_lock, irq_flags);
+		timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
+		if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) {
+			/* the normal case, timeout is somewhere before last_activity */
+			timeout = rdev->fence_drv[fence->ring].last_activity - timeout;
+		} else {
+			/* either jiffies wrapped around, or no fence was signaled in the last 500ms
+			 * anyway we will just wait for the minimum amount and then check for a lockup */
+			timeout = 1;
+		}
+		/* save current sequence value used to check for GPU lockups */
+		seq = rdev->fence_drv[fence->ring].last_seq;
+		read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+
+		trace_radeon_fence_wait_begin(rdev->ddev, seq);
 		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
-		r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue,
-				radeon_fence_signaled(fence), timeout);
+		if (intr) {
+			r = wait_event_interruptible_timeout(
+				rdev->fence_drv[fence->ring].queue,
+				(signaled = radeon_fence_signaled(fence)), timeout);
+		} else {
+			r = wait_event_timeout(
+				rdev->fence_drv[fence->ring].queue,
+				(signaled = radeon_fence_signaled(fence)), timeout);
+		}
 		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
 		if (unlikely(r < 0)) {
 			return r;
 		}
-	} else {
-		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
-		r = wait_event_timeout(rdev->fence_drv[fence->ring].queue,
-			 radeon_fence_signaled(fence), timeout);
-		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
-	}
-	trace_radeon_fence_wait_end(rdev->ddev, seq);
-	if (unlikely(!radeon_fence_signaled(fence))) {
-		/* we were interrupted for some reason and fence isn't
-		 * isn't signaled yet, resume wait
-		 */
-		if (r) {
-			timeout = r;
-			goto retry;
-		}
-		/* don't protect read access to rdev->fence_drv[t].last_seq
-		 * if we experiencing a lockup the value doesn't change
-		 */
-		if (seq == rdev->fence_drv[fence->ring].last_seq &&
-		    radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
-
-			/* good news we believe it's a lockup */
-			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
-			     fence->seq, seq);
-
-			/* mark the ring as not ready any more */
-			rdev->ring[fence->ring].ready = false;
-			r = radeon_gpu_reset(rdev);
-			if (r)
-				return r;
+		trace_radeon_fence_wait_end(rdev->ddev, seq);
+
+		if (unlikely(!signaled)) {
+			/* we were interrupted for some reason and fence
+			 * isn't signaled yet, resume waiting */
+			if (r) {
+				continue;
+			}
+
+			write_lock_irqsave(&rdev->fence_lock, irq_flags);
+			/* check if sequence value has changed since last_activity */
+			if (seq != rdev->fence_drv[fence->ring].last_seq) {
+				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+				continue;
+			}
+
+			/* change sequence value on all rings, so nobody else things there is a lockup */
+			for (i = 0; i < RADEON_NUM_RINGS; ++i)
+				rdev->fence_drv[i].last_seq -= 0x10000;
+			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+
+			if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
+
+				/* good news we believe it's a lockup */
+				printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
+				     fence->seq, seq);
+
+				/* mark the ring as not ready any more */
+				rdev->ring[fence->ring].ready = false;
+				r = radeon_gpu_reset(rdev);
+				if (r)
+					return r;
+
+				write_lock_irqsave(&rdev->fence_lock, irq_flags);
+				rdev->fence_drv[fence->ring].last_activity = jiffies;
+				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+			}
 		}
-		timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-		write_lock_irqsave(&rdev->fence_lock, irq_flags);
-		rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-		rdev->fence_drv[fence->ring].last_jiffies = jiffies;
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		goto retry;
 	}
 	return 0;
 }
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2012-04-19 22:39 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-19 22:39 Reworking of GPU reset logic Christian König
2012-04-19 22:39 ` [PATCH 01/13] drm/radeon: make radeon_gpu_is_lockup a per ring function Christian König
2012-04-19 22:39 ` [PATCH 02/13] drm/radeon: replace gpu_lockup with ring->ready flag Christian König
2012-04-19 22:39 ` [PATCH 03/13] drm/radeon: register ring debugfs handlers on init Christian König
2012-04-19 22:39 ` [PATCH 04/13] drm/radeon: use central function for IB testing Christian König
2012-04-19 22:39 ` Christian König [this message]
2012-04-19 22:39 ` [PATCH 06/13] drm/radeon: improve sub allocator Christian König
2012-04-20  7:24   ` Michel Dänzer
2012-04-20  9:11     ` Christian König
2012-04-19 22:39 ` [PATCH 07/13] drm/radeon: add sub allocator debugfs file Christian König
2012-04-19 22:39 ` [PATCH 08/13] drm/radeon: add biggest hole tracking and wakequeue to the sa Christian König
2012-04-19 22:39 ` [PATCH 09/13] drm/radeon: simplify semaphore handling Christian König
2012-04-19 22:39 ` [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_* Christian König
2012-04-20  7:20   ` Michel Dänzer
2012-04-20  8:49     ` Christian König
2012-04-20  9:15       ` Michel Dänzer
2012-04-20 10:24         ` Christian König
2012-04-20 11:30           ` Michel Dänzer
2012-04-19 22:39 ` [PATCH 11/13] drm/radeon: rip out the ib pool Christian König
2012-04-19 22:39 ` [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code Christian König
2012-04-24 14:04   ` Dave Airlie
2012-04-24 14:23     ` Christian König
2012-04-19 22:39 ` [PATCH 13/13] drm/radeon: rework recursive gpu reset handling Christian König
2012-04-20  6:57   ` Dave Airlie
2012-04-20  7:50     ` Daniel Vetter
2012-04-20  9:38       ` Christian König
2012-04-19 23:47 ` Reworking of GPU reset logic Jerome Glisse
2012-04-21  9:42   ` Christian König
2012-04-21 14:14     ` Jerome Glisse
2012-04-25 13:01       ` Christian König
2012-04-25 13:30         ` Dave Airlie
2012-04-25 13:46           ` Alex Deucher
2012-04-25 14:26             ` Jerome Glisse
2012-04-23  7:40     ` Michel Dänzer
2012-04-25 12:50       ` Christian König
2012-05-02  4:00 Include request for reset-rework branch v3 j.glisse
2012-05-02  4:00 ` [PATCH 05/13] drm/radeon: rework gpu lockup detection and processing j.glisse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1334875160-5454-6-git-send-email-deathsimple@vodafone.de \
    --to=deathsimple@vodafone.de \
    --cc=dri-devel@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.