All of lore.kernel.org
 help / color / mirror / Atom feed
* Reworking of GPU reset logic
@ 2012-04-19 22:39 Christian König
  2012-04-19 22:39 ` [PATCH 01/13] drm/radeon: make radeon_gpu_is_lockup a per ring function Christian König
                   ` (13 more replies)
  0 siblings, 14 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel

This includes mostly fixes for multi ring lockups and GPU resets, but it should general improve the behavior of the kernel mode driver in case something goes badly wrong.

On the other hand it completely rewrites the IB pool and semaphore handling, so I think there are still a couple of problems in it.

The first four patches were already send to the list, but the current set depends on them so I resend them again.

Cheers,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 01/13] drm/radeon: make radeon_gpu_is_lockup a per ring function
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 02/13] drm/radeon: replace gpu_lockup with ring->ready flag Christian König
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Different rings have different criteria to test
if they are stuck.

v2: rebased on current drm-next

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon.h       |    4 +-
 drivers/gpu/drm/radeon/radeon_asic.c  |   44 ++++++++++++++++++--------------
 drivers/gpu/drm/radeon/radeon_fence.c |    2 +-
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 138b952..bea99e3 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1144,7 +1144,6 @@ struct radeon_asic {
 	int (*resume)(struct radeon_device *rdev);
 	int (*suspend)(struct radeon_device *rdev);
 	void (*vga_set_state)(struct radeon_device *rdev, bool state);
-	bool (*gpu_is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
 	int (*asic_reset)(struct radeon_device *rdev);
 	/* ioctl hw specific callback. Some hw might want to perform special
 	 * operation on specific ioctl. For instance on wait idle some hw
@@ -1173,6 +1172,7 @@ struct radeon_asic {
 		void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp);
 		int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
 		int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
+		bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
 	} ring[RADEON_NUM_RINGS];
 	/* irqs */
 	struct {
@@ -1730,7 +1730,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
 #define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)].cs_parse((p))
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
-#define radeon_gpu_is_lockup(rdev, cp) (rdev)->asic->gpu_is_lockup((rdev), (cp))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
 #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
@@ -1739,6 +1738,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
 #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
 #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
+#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp))
 #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index be4dc2f..958b9ea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -134,7 +134,6 @@ static struct radeon_asic r100_asic = {
 	.suspend = &r100_suspend,
 	.resume = &r100_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r100_gpu_is_lockup,
 	.asic_reset = &r100_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -152,6 +151,7 @@ static struct radeon_asic r100_asic = {
 			.ring_start = &r100_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r100_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -208,7 +208,6 @@ static struct radeon_asic r200_asic = {
 	.suspend = &r100_suspend,
 	.resume = &r100_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r100_gpu_is_lockup,
 	.asic_reset = &r100_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -226,6 +225,7 @@ static struct radeon_asic r200_asic = {
 			.ring_start = &r100_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r100_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -282,7 +282,6 @@ static struct radeon_asic r300_asic = {
 	.suspend = &r300_suspend,
 	.resume = &r300_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &r300_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -300,6 +299,7 @@ static struct radeon_asic r300_asic = {
 			.ring_start = &r300_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -356,7 +356,6 @@ static struct radeon_asic r300_asic_pcie = {
 	.suspend = &r300_suspend,
 	.resume = &r300_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &r300_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -374,6 +373,7 @@ static struct radeon_asic r300_asic_pcie = {
 			.ring_start = &r300_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -430,7 +430,6 @@ static struct radeon_asic r420_asic = {
 	.suspend = &r420_suspend,
 	.resume = &r420_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &r300_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -448,6 +447,7 @@ static struct radeon_asic r420_asic = {
 			.ring_start = &r300_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -504,7 +504,6 @@ static struct radeon_asic rs400_asic = {
 	.suspend = &rs400_suspend,
 	.resume = &rs400_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &r300_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -522,6 +521,7 @@ static struct radeon_asic rs400_asic = {
 			.ring_start = &r300_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -578,7 +578,6 @@ static struct radeon_asic rs600_asic = {
 	.suspend = &rs600_suspend,
 	.resume = &rs600_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &rs600_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -596,6 +595,7 @@ static struct radeon_asic rs600_asic = {
 			.ring_start = &r300_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -652,7 +652,6 @@ static struct radeon_asic rs690_asic = {
 	.suspend = &rs690_suspend,
 	.resume = &rs690_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &rs600_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -670,6 +669,7 @@ static struct radeon_asic rs690_asic = {
 			.ring_start = &r300_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -726,7 +726,6 @@ static struct radeon_asic rv515_asic = {
 	.suspend = &rv515_suspend,
 	.resume = &rv515_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &rs600_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -744,6 +743,7 @@ static struct radeon_asic rv515_asic = {
 			.ring_start = &rv515_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -800,7 +800,6 @@ static struct radeon_asic r520_asic = {
 	.suspend = &rv515_suspend,
 	.resume = &r520_resume,
 	.vga_set_state = &r100_vga_set_state,
-	.gpu_is_lockup = &r300_gpu_is_lockup,
 	.asic_reset = &rs600_asic_reset,
 	.ioctl_wait_idle = NULL,
 	.gui_idle = &r100_gui_idle,
@@ -818,6 +817,7 @@ static struct radeon_asic r520_asic = {
 			.ring_start = &rv515_ring_start,
 			.ring_test = &r100_ring_test,
 			.ib_test = &r100_ib_test,
+			.is_lockup = &r300_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -874,7 +874,6 @@ static struct radeon_asic r600_asic = {
 	.suspend = &r600_suspend,
 	.resume = &r600_resume,
 	.vga_set_state = &r600_vga_set_state,
-	.gpu_is_lockup = &r600_gpu_is_lockup,
 	.asic_reset = &r600_asic_reset,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
 	.gui_idle = &r600_gui_idle,
@@ -891,6 +890,7 @@ static struct radeon_asic r600_asic = {
 			.cs_parse = &r600_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &r600_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -946,7 +946,6 @@ static struct radeon_asic rs780_asic = {
 	.fini = &r600_fini,
 	.suspend = &r600_suspend,
 	.resume = &r600_resume,
-	.gpu_is_lockup = &r600_gpu_is_lockup,
 	.vga_set_state = &r600_vga_set_state,
 	.asic_reset = &r600_asic_reset,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
@@ -964,6 +963,7 @@ static struct radeon_asic rs780_asic = {
 			.cs_parse = &r600_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &r600_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -1020,7 +1020,6 @@ static struct radeon_asic rv770_asic = {
 	.suspend = &rv770_suspend,
 	.resume = &rv770_resume,
 	.asic_reset = &r600_asic_reset,
-	.gpu_is_lockup = &r600_gpu_is_lockup,
 	.vga_set_state = &r600_vga_set_state,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
 	.gui_idle = &r600_gui_idle,
@@ -1037,6 +1036,7 @@ static struct radeon_asic rv770_asic = {
 			.cs_parse = &r600_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &r600_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -1092,7 +1092,6 @@ static struct radeon_asic evergreen_asic = {
 	.fini = &evergreen_fini,
 	.suspend = &evergreen_suspend,
 	.resume = &evergreen_resume,
-	.gpu_is_lockup = &evergreen_gpu_is_lockup,
 	.asic_reset = &evergreen_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
@@ -1110,6 +1109,7 @@ static struct radeon_asic evergreen_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &evergreen_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -1165,7 +1165,6 @@ static struct radeon_asic sumo_asic = {
 	.fini = &evergreen_fini,
 	.suspend = &evergreen_suspend,
 	.resume = &evergreen_resume,
-	.gpu_is_lockup = &evergreen_gpu_is_lockup,
 	.asic_reset = &evergreen_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
@@ -1183,6 +1182,7 @@ static struct radeon_asic sumo_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &evergreen_gpu_is_lockup,
 		},
 	},
 	.irq = {
@@ -1238,7 +1238,6 @@ static struct radeon_asic btc_asic = {
 	.fini = &evergreen_fini,
 	.suspend = &evergreen_suspend,
 	.resume = &evergreen_resume,
-	.gpu_is_lockup = &evergreen_gpu_is_lockup,
 	.asic_reset = &evergreen_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
@@ -1256,6 +1255,7 @@ static struct radeon_asic btc_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &evergreen_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -1321,7 +1321,6 @@ static struct radeon_asic cayman_asic = {
 	.fini = &cayman_fini,
 	.suspend = &cayman_suspend,
 	.resume = &cayman_resume,
-	.gpu_is_lockup = &cayman_gpu_is_lockup,
 	.asic_reset = &cayman_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
@@ -1340,6 +1339,7 @@ static struct radeon_asic cayman_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &cayman_gpu_is_lockup,
 		},
 		[CAYMAN_RING_TYPE_CP1_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1349,6 +1349,7 @@ static struct radeon_asic cayman_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &cayman_gpu_is_lockup,
 		},
 		[CAYMAN_RING_TYPE_CP2_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1358,6 +1359,7 @@ static struct radeon_asic cayman_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &cayman_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -1413,7 +1415,6 @@ static struct radeon_asic trinity_asic = {
 	.fini = &cayman_fini,
 	.suspend = &cayman_suspend,
 	.resume = &cayman_resume,
-	.gpu_is_lockup = &cayman_gpu_is_lockup,
 	.asic_reset = &cayman_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
@@ -1432,6 +1433,7 @@ static struct radeon_asic trinity_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &cayman_gpu_is_lockup,
 		},
 		[CAYMAN_RING_TYPE_CP1_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1441,6 +1443,7 @@ static struct radeon_asic trinity_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &cayman_gpu_is_lockup,
 		},
 		[CAYMAN_RING_TYPE_CP2_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1450,6 +1453,7 @@ static struct radeon_asic trinity_asic = {
 			.cs_parse = &evergreen_cs_parse,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &cayman_gpu_is_lockup,
 		}
 	},
 	.irq = {
@@ -1515,7 +1519,6 @@ static struct radeon_asic si_asic = {
 	.fini = &si_fini,
 	.suspend = &si_suspend,
 	.resume = &si_resume,
-	.gpu_is_lockup = &si_gpu_is_lockup,
 	.asic_reset = &si_asic_reset,
 	.vga_set_state = &r600_vga_set_state,
 	.ioctl_wait_idle = r600_ioctl_wait_idle,
@@ -1534,6 +1537,7 @@ static struct radeon_asic si_asic = {
 			.cs_parse = NULL,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &si_gpu_is_lockup,
 		},
 		[CAYMAN_RING_TYPE_CP1_INDEX] = {
 			.ib_execute = &si_ring_ib_execute,
@@ -1543,6 +1547,7 @@ static struct radeon_asic si_asic = {
 			.cs_parse = NULL,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &si_gpu_is_lockup,
 		},
 		[CAYMAN_RING_TYPE_CP2_INDEX] = {
 			.ib_execute = &si_ring_ib_execute,
@@ -1552,6 +1557,7 @@ static struct radeon_asic si_asic = {
 			.cs_parse = NULL,
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
+			.is_lockup = &si_gpu_is_lockup,
 		}
 	},
 	.irq = {
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 4bd36a3..66b2229 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -259,7 +259,7 @@ retry:
 		 * if we experiencing a lockup the value doesn't change
 		 */
 		if (seq == rdev->fence_drv[fence->ring].last_seq &&
-		    radeon_gpu_is_lockup(rdev, &rdev->ring[fence->ring])) {
+		    radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
 			/* good news we believe it's a lockup */
 			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
 			     fence->seq, seq);
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 02/13] drm/radeon: replace gpu_lockup with ring->ready flag
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
  2012-04-19 22:39 ` [PATCH 01/13] drm/radeon: make radeon_gpu_is_lockup a per ring function Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 03/13] drm/radeon: register ring debugfs handlers on init Christian König
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

It makes no sense at all to have more than one flag.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/r100.c          |    1 -
 drivers/gpu/drm/radeon/r300.c          |    1 -
 drivers/gpu/drm/radeon/radeon.h        |    1 -
 drivers/gpu/drm/radeon/radeon_device.c |    1 -
 drivers/gpu/drm/radeon/radeon_fence.c  |   36 +++++++++++--------------------
 drivers/gpu/drm/radeon/rs600.c         |    1 -
 6 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index e11df77..4b677fc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2300,7 +2300,6 @@ int r100_asic_reset(struct radeon_device *rdev)
 	if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
 		G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
 		dev_err(rdev->dev, "failed to reset GPU\n");
-		rdev->gpu_lockup = true;
 		ret = -1;
 	} else
 		dev_info(rdev->dev, "GPU reset succeed\n");
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index fa14383..a63f432 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -449,7 +449,6 @@ int r300_asic_reset(struct radeon_device *rdev)
 	/* Check if GPU is idle */
 	if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
 		dev_err(rdev->dev, "failed to reset GPU\n");
-		rdev->gpu_lockup = true;
 		ret = -1;
 	} else
 		dev_info(rdev->dev, "GPU reset succeed\n");
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index bea99e3..365334b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1529,7 +1529,6 @@ struct radeon_device {
 	struct radeon_mutex		cs_mutex;
 	struct radeon_wb		wb;
 	struct radeon_dummy_page	dummy_page;
-	bool				gpu_lockup;
 	bool				shutdown;
 	bool				suspend;
 	bool				need_dma32;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 0fb4f89..dedb398 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -714,7 +714,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	rdev->is_atom_bios = false;
 	rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
 	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
-	rdev->gpu_lockup = false;
 	rdev->accel_working = false;
 
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 66b2229..36c411f 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -71,14 +71,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 		return 0;
 	}
 	fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq);
-	if (!rdev->ring[fence->ring].ready)
-		/* FIXME: cp is not running assume everythings is done right
-		 * away
-		 */
-		radeon_fence_write(rdev, fence->seq, fence->ring);
-	else
-		radeon_fence_ring_emit(rdev, fence->ring, fence);
-
+	radeon_fence_ring_emit(rdev, fence->ring, fence);
 	trace_radeon_fence_emit(rdev->ddev, fence->seq);
 	fence->emitted = true;
 	list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
@@ -191,9 +184,6 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 	if (!fence)
 		return true;
 
-	if (fence->rdev->gpu_lockup)
-		return true;
-
 	write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
 	signaled = fence->signaled;
 	/* if we are shuting down report all fence as signaled */
@@ -260,18 +250,16 @@ retry:
 		 */
 		if (seq == rdev->fence_drv[fence->ring].last_seq &&
 		    radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
+
 			/* good news we believe it's a lockup */
 			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
 			     fence->seq, seq);
-			/* FIXME: what should we do ? marking everyone
-			 * as signaled for now
-			 */
-			rdev->gpu_lockup = true;
+
+			/* mark the ring as not ready any more */
+			rdev->ring[fence->ring].ready = false;
 			r = radeon_gpu_reset(rdev);
 			if (r)
 				return r;
-			radeon_fence_write(rdev, fence->seq, fence->ring);
-			rdev->gpu_lockup = false;
 		}
 		timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
 		write_lock_irqsave(&rdev->fence_lock, irq_flags);
@@ -289,10 +277,11 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 	struct radeon_fence *fence;
 	int r;
 
-	if (rdev->gpu_lockup) {
-		return 0;
-	}
 	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	if (!rdev->ring[ring].ready) {
+		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+		return -EBUSY;
+	}
 	if (list_empty(&rdev->fence_drv[ring].emitted)) {
 		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		return 0;
@@ -312,10 +301,11 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
 	struct radeon_fence *fence;
 	int r;
 
-	if (rdev->gpu_lockup) {
-		return 0;
-	}
 	write_lock_irqsave(&rdev->fence_lock, irq_flags);
+	if (!rdev->ring[ring].ready) {
+		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+		return -EBUSY;
+	}
 	if (list_empty(&rdev->fence_drv[ring].emitted)) {
 		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 		return 0;
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index d25cf86..f709fe8 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -396,7 +396,6 @@ int rs600_asic_reset(struct radeon_device *rdev)
 	/* Check if GPU is idle */
 	if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
 		dev_err(rdev->dev, "failed to reset GPU\n");
-		rdev->gpu_lockup = true;
 		ret = -1;
 	} else
 		dev_info(rdev->dev, "GPU reset succeed\n");
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 03/13] drm/radeon: register ring debugfs handlers on init
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
  2012-04-19 22:39 ` [PATCH 01/13] drm/radeon: make radeon_gpu_is_lockup a per ring function Christian König
  2012-04-19 22:39 ` [PATCH 02/13] drm/radeon: replace gpu_lockup with ring->ready flag Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 04/13] drm/radeon: use central function for IB testing Christian König
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Just register the debugfs files on init instead of
checking the chipset type multiple times.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_ring.c |   31 +++++++++++++++++++------------
 1 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index cc33b3d..b6eb1d2 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -34,7 +34,7 @@
 #include "atom.h"
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev);
-int radeon_debugfs_ring_init(struct radeon_device *rdev);
+int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);
 
 u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
 {
@@ -237,9 +237,6 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
 	if (radeon_debugfs_ib_init(rdev)) {
 		DRM_ERROR("Failed to register debugfs file for IB !\n");
 	}
-	if (radeon_debugfs_ring_init(rdev)) {
-		DRM_ERROR("Failed to register debugfs file for rings !\n");
-	}
 	radeon_mutex_unlock(&rdev->ib_pool.mutex);
 	return 0;
 }
@@ -411,6 +408,9 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
 	ring->ring_free_dw = ring->ring_size / 4;
+	if (radeon_debugfs_ring_init(rdev, ring)) {
+		DRM_ERROR("Failed to register debugfs file for rings !\n");
+	}
 	return 0;
 }
 
@@ -501,17 +501,24 @@ static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32];
 static unsigned radeon_debugfs_ib_idx[RADEON_IB_POOL_SIZE];
 #endif
 
-int radeon_debugfs_ring_init(struct radeon_device *rdev)
+int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 #if defined(CONFIG_DEBUG_FS)
-	if (rdev->family >= CHIP_CAYMAN)
-		return radeon_debugfs_add_files(rdev, radeon_debugfs_ring_info_list,
-						ARRAY_SIZE(radeon_debugfs_ring_info_list));
-	else
-		return radeon_debugfs_add_files(rdev, radeon_debugfs_ring_info_list, 1);
-#else
-	return 0;
+	unsigned i;
+	for (i = 0; i < ARRAY_SIZE(radeon_debugfs_ring_info_list); ++i) {
+		struct drm_info_list *info = &radeon_debugfs_ring_info_list[i];
+		int ridx = *(int*)radeon_debugfs_ring_info_list[i].data;
+		unsigned r;
+
+		if (&rdev->ring[ridx] != ring)
+			continue;
+
+		r = radeon_debugfs_add_files(rdev, info, 1);
+		if (r)
+			return r;
+	}
 #endif
+	return 0;
 }
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev)
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 04/13] drm/radeon: use central function for IB testing
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (2 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 03/13] drm/radeon: register ring debugfs handlers on init Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 05/13] drm/radeon: rework gpu lockup detection and processing Christian König
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Removing all the different error messages and
having just one standard behaviour over all
chipset generations.

Signed-off-by: Christian König <deathsimple@vodafone.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/evergreen.c   |    7 ++-----
 drivers/gpu/drm/radeon/ni.c          |    7 ++-----
 drivers/gpu/drm/radeon/r100.c        |    7 ++-----
 drivers/gpu/drm/radeon/r300.c        |    7 ++-----
 drivers/gpu/drm/radeon/r420.c        |    7 ++-----
 drivers/gpu/drm/radeon/r520.c        |    8 +++-----
 drivers/gpu/drm/radeon/r600.c        |    7 ++-----
 drivers/gpu/drm/radeon/radeon.h      |    1 +
 drivers/gpu/drm/radeon/radeon_ring.c |   30 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/rs400.c       |    7 ++-----
 drivers/gpu/drm/radeon/rs600.c       |    7 ++-----
 drivers/gpu/drm/radeon/rs690.c       |    7 ++-----
 drivers/gpu/drm/radeon/rv515.c       |    8 +++-----
 drivers/gpu/drm/radeon/rv770.c       |    7 ++-----
 14 files changed, 57 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index cfa372c..ca47f52 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3248,12 +3248,9 @@ static int evergreen_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	r = r600_audio_init(rdev);
 	if (r) {
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index a48ca53..0146428 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1601,12 +1601,9 @@ static int cayman_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	r = radeon_vm_manager_start(rdev);
 	if (r)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 4b677fc..62f9dab 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3968,12 +3968,9 @@ static int r100_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index a63f432..26e0db8 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1417,12 +1417,9 @@ static int r300_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index f3fcaac..99137be 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -279,12 +279,9 @@ static int r420_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index ebcc15b..b5cf837 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -207,12 +207,10 @@ static int r520_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 4added1..c749e00 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2494,12 +2494,9 @@ int r600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 365334b..8801657 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -793,6 +793,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
 int radeon_ib_pool_start(struct radeon_device *rdev);
 int radeon_ib_pool_suspend(struct radeon_device *rdev);
+int radeon_ib_ring_tests(struct radeon_device *rdev);
 /* Ring access between begin & end cannot sleep */
 int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp);
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index b6eb1d2..1b020ef 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -267,6 +267,36 @@ int radeon_ib_pool_suspend(struct radeon_device *rdev)
 	return radeon_sa_bo_manager_suspend(rdev, &rdev->ib_pool.sa_manager);
 }
 
+int radeon_ib_ring_tests(struct radeon_device *rdev)
+{
+	unsigned i;
+	int r;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_ring *ring = &rdev->ring[i];
+
+		if (!ring->ready)
+			continue;
+
+		r = radeon_ib_test(rdev, i, ring);
+		if (r) {
+			ring->ready = false;
+
+			if (i == RADEON_RING_TYPE_GFX_INDEX) {
+				/* oh, oh, that's really bad */
+				DRM_ERROR("radeon: failed testing IB on GFX ring (%d).\n", r);
+		                rdev->accel_working = false;
+				return r;
+
+			} else {
+				/* still not good, but we can live with it */
+				DRM_ERROR("radeon: failed testing IB on ring %d (%d).\n", i, r);
+			}
+		}
+	}
+	return 0;
+}
+
 /*
  * Ring.
  */
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 4cf381b..a464eb5 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -430,12 +430,9 @@ static int rs400_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index f709fe8..a286dea 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -882,12 +882,9 @@ static int rs600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index f2c3b9d..3277dde 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -647,12 +647,9 @@ static int rs690_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index d8d78fe..7f08ced 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -412,12 +412,10 @@ static int rv515_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "failed testing IB (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index c62ae4b..cacec0e 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1114,12 +1114,9 @@ static int rv770_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		dev_err(rdev->dev, "IB test failed (%d).\n", r);
-		rdev->accel_working = false;
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
 		return r;
-	}
 
 	return 0;
 }
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 05/13] drm/radeon: rework gpu lockup detection and processing
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (3 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 04/13] drm/radeon: use central function for IB testing Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 06/13] drm/radeon: improve sub allocator Christian König
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Previusly multiple ring could trigger multiple GPU
resets at the same time.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h       |    3 +-
 drivers/gpu/drm/radeon/radeon_fence.c |  146 +++++++++++++++++----------------
 2 files changed, 75 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8801657..85a3aa9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -255,8 +255,7 @@ struct radeon_fence_driver {
 	volatile uint32_t		*cpu_addr;
 	atomic_t			seq;
 	uint32_t			last_seq;
-	unsigned long			last_jiffies;
-	unsigned long			last_timeout;
+	unsigned long			last_activity;
 	wait_queue_head_t		queue;
 	struct list_head		created;
 	struct list_head		emitted;
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 36c411f..1a9765a 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -74,6 +74,10 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 	radeon_fence_ring_emit(rdev, fence->ring, fence);
 	trace_radeon_fence_emit(rdev->ddev, fence->seq);
 	fence->emitted = true;
+	/* are we the first fence on a previusly idle ring? */
+	if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
+		rdev->fence_drv[fence->ring].last_activity = jiffies;
+	}
 	list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
 	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 	return 0;
@@ -85,34 +89,14 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 	struct list_head *i, *n;
 	uint32_t seq;
 	bool wake = false;
-	unsigned long cjiffies;
 
 	seq = radeon_fence_read(rdev, ring);
-	if (seq != rdev->fence_drv[ring].last_seq) {
-		rdev->fence_drv[ring].last_seq = seq;
-		rdev->fence_drv[ring].last_jiffies = jiffies;
-		rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-	} else {
-		cjiffies = jiffies;
-		if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) {
-			cjiffies -= rdev->fence_drv[ring].last_jiffies;
-			if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) {
-				/* update the timeout */
-				rdev->fence_drv[ring].last_timeout -= cjiffies;
-			} else {
-				/* the 500ms timeout is elapsed we should test
-				 * for GPU lockup
-				 */
-				rdev->fence_drv[ring].last_timeout = 1;
-			}
-		} else {
-			/* wrap around update last jiffies, we will just wait
-			 * a little longer
-			 */
-			rdev->fence_drv[ring].last_jiffies = cjiffies;
-		}
+	if (seq == rdev->fence_drv[ring].last_seq)
 		return false;
-	}
+
+	rdev->fence_drv[ring].last_seq = seq;
+	rdev->fence_drv[ring].last_activity = jiffies;
+
 	n = NULL;
 	list_for_each(i, &rdev->fence_drv[ring].emitted) {
 		fence = list_entry(i, struct radeon_fence, list);
@@ -207,66 +191,84 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 	struct radeon_device *rdev;
 	unsigned long irq_flags, timeout;
 	u32 seq;
-	int r;
+	int i, r;
+	bool signaled;
 
 	if (fence == NULL) {
 		WARN(1, "Querying an invalid fence : %p !\n", fence);
-		return 0;
+		return -EINVAL;
 	}
+
 	rdev = fence->rdev;
-	if (radeon_fence_signaled(fence)) {
-		return 0;
-	}
-	timeout = rdev->fence_drv[fence->ring].last_timeout;
-retry:
-	/* save current sequence used to check for GPU lockup */
-	seq = rdev->fence_drv[fence->ring].last_seq;
-	trace_radeon_fence_wait_begin(rdev->ddev, seq);
-	if (intr) {
+	signaled = radeon_fence_signaled(fence);
+	while (!signaled) {
+		read_lock_irqsave(&rdev->fence_lock, irq_flags);
+		timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
+		if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) {
+			/* the normal case, timeout is somewhere before last_activity */
+			timeout = rdev->fence_drv[fence->ring].last_activity - timeout;
+		} else {
+			/* either jiffies wrapped around, or no fence was signaled in the last 500ms
+			 * anyway we will just wait for the minimum amount and then check for a lockup */
+			timeout = 1;
+		}
+		/* save current sequence value used to check for GPU lockups */
+		seq = rdev->fence_drv[fence->ring].last_seq;
+		read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+
+		trace_radeon_fence_wait_begin(rdev->ddev, seq);
 		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
-		r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue,
-				radeon_fence_signaled(fence), timeout);
+		if (intr) {
+			r = wait_event_interruptible_timeout(
+				rdev->fence_drv[fence->ring].queue,
+				(signaled = radeon_fence_signaled(fence)), timeout);
+		} else {
+			r = wait_event_timeout(
+				rdev->fence_drv[fence->ring].queue,
+				(signaled = radeon_fence_signaled(fence)), timeout);
+		}
 		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
 		if (unlikely(r < 0)) {
 			return r;
 		}
-	} else {
-		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
-		r = wait_event_timeout(rdev->fence_drv[fence->ring].queue,
-			 radeon_fence_signaled(fence), timeout);
-		radeon_irq_kms_sw_irq_put(rdev, fence->ring);
-	}
-	trace_radeon_fence_wait_end(rdev->ddev, seq);
-	if (unlikely(!radeon_fence_signaled(fence))) {
-		/* we were interrupted for some reason and fence isn't
-		 * isn't signaled yet, resume wait
-		 */
-		if (r) {
-			timeout = r;
-			goto retry;
-		}
-		/* don't protect read access to rdev->fence_drv[t].last_seq
-		 * if we experiencing a lockup the value doesn't change
-		 */
-		if (seq == rdev->fence_drv[fence->ring].last_seq &&
-		    radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
-
-			/* good news we believe it's a lockup */
-			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
-			     fence->seq, seq);
-
-			/* mark the ring as not ready any more */
-			rdev->ring[fence->ring].ready = false;
-			r = radeon_gpu_reset(rdev);
-			if (r)
-				return r;
+		trace_radeon_fence_wait_end(rdev->ddev, seq);
+
+		if (unlikely(!signaled)) {
+			/* we were interrupted for some reason and fence
+			 * isn't signaled yet, resume waiting */
+			if (r) {
+				continue;
+			}
+
+			write_lock_irqsave(&rdev->fence_lock, irq_flags);
+			/* check if sequence value has changed since last_activity */
+			if (seq != rdev->fence_drv[fence->ring].last_seq) {
+				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+				continue;
+			}
+
+			/* change sequence value on all rings, so nobody else things there is a lockup */
+			for (i = 0; i < RADEON_NUM_RINGS; ++i)
+				rdev->fence_drv[i].last_seq -= 0x10000;
+			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+
+			if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
+
+				/* good news we believe it's a lockup */
+				printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
+				     fence->seq, seq);
+
+				/* mark the ring as not ready any more */
+				rdev->ring[fence->ring].ready = false;
+				r = radeon_gpu_reset(rdev);
+				if (r)
+					return r;
+
+				write_lock_irqsave(&rdev->fence_lock, irq_flags);
+				rdev->fence_drv[fence->ring].last_activity = jiffies;
+				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+			}
 		}
-		timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-		write_lock_irqsave(&rdev->fence_lock, irq_flags);
-		rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
-		rdev->fence_drv[fence->ring].last_jiffies = jiffies;
-		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		goto retry;
 	}
 	return 0;
 }
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 06/13] drm/radeon: improve sub allocator
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (4 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 05/13] drm/radeon: rework gpu lockup detection and processing Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-20  7:24   ` Michel Dänzer
  2012-04-19 22:39 ` [PATCH 07/13] drm/radeon: add sub allocator debugfs file Christian König
                   ` (7 subsequent siblings)
  13 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Make the suballocator self containing to locking
and fix a overrun bug which happens with
allocations of different alignments.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h    |    1 +
 drivers/gpu/drm/radeon/radeon_sa.c |   19 ++++++++++++-------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 85a3aa9..1aefbd9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -381,6 +381,7 @@ struct radeon_bo_list {
  * alignment).
  */
 struct radeon_sa_manager {
+	spinlock_t		lock;
 	struct radeon_bo	*bo;
 	struct list_head	sa_bo;
 	unsigned		size;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 4cce47e..4ce5c51 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -37,6 +37,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 {
 	int r;
 
+	spin_lock_init(&sa_manager->lock);
 	sa_manager->bo = NULL;
 	sa_manager->size = size;
 	sa_manager->domain = domain;
@@ -136,30 +137,30 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 	struct radeon_sa_bo *tmp;
 	struct list_head *head;
 	unsigned offset = 0, wasted = 0;
+	unsigned long flags;
 
 	BUG_ON(align > RADEON_GPU_PAGE_SIZE);
 	BUG_ON(size > sa_manager->size);
+	spin_lock_irqsave(&sa_manager->lock, flags);
 
 	/* no one ? */
-	head = sa_manager->sa_bo.prev;
 	if (list_empty(&sa_manager->sa_bo)) {
+		head = &sa_manager->sa_bo;
 		goto out;
 	}
 
 	/* look for a hole big enough */
-	offset = 0;
 	list_for_each_entry(tmp, &sa_manager->sa_bo, list) {
 		/* room before this object ? */
-		if ((tmp->offset - offset) >= size) {
+		if (offset < tmp->offset && (tmp->offset - offset) >= size) {
 			head = tmp->list.prev;
 			goto out;
 		}
 		offset = tmp->offset + tmp->size;
 		wasted = offset % align;
 		if (wasted) {
-			wasted = align - wasted;
+			offset += align - wasted;
 		}
-		offset += wasted;
 	}
 	/* room at the end ? */
 	head = sa_manager->sa_bo.prev;
@@ -167,11 +168,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 	offset = tmp->offset + tmp->size;
 	wasted = offset % align;
 	if (wasted) {
-		wasted = align - wasted;
+		offset += wasted = align - wasted;
 	}
-	offset += wasted;
 	if ((sa_manager->size - offset) < size) {
 		/* failed to find somethings big enough */
+		spin_unlock_irqrestore(&sa_manager->lock, flags);
 		return -ENOMEM;
 	}
 
@@ -180,10 +181,14 @@ out:
 	sa_bo->offset = offset;
 	sa_bo->size = size;
 	list_add(&sa_bo->list, head);
+	spin_unlock_irqrestore(&sa_manager->lock, flags);
 	return 0;
 }
 
 void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
 {
+	unsigned long flags;
+	spin_lock_irqsave(&sa_bo->manager->lock, flags);
 	list_del_init(&sa_bo->list);
+	spin_unlock_irqrestore(&sa_bo->manager->lock, flags);
 }
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 07/13] drm/radeon: add sub allocator debugfs file
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (5 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 06/13] drm/radeon: improve sub allocator Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 08/13] drm/radeon: add biggest hole tracking and wakequeue to the sa Christian König
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Dumping the current allocations.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon_object.h |    5 +++++
 drivers/gpu/drm/radeon/radeon_ring.c   |   22 ++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_sa.c     |   15 +++++++++++++++
 3 files changed, 42 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index f9104be..d9b9333 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -161,5 +161,10 @@ extern int radeon_sa_bo_new(struct radeon_device *rdev,
 			    unsigned size, unsigned align);
 extern void radeon_sa_bo_free(struct radeon_device *rdev,
 			      struct radeon_sa_bo *sa_bo);
+#if defined(CONFIG_DEBUG_FS)
+extern void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
+					 struct seq_file *m);
+#endif
+
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 1b020ef..1d9bce9 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -529,6 +529,23 @@ static int radeon_debugfs_ib_info(struct seq_file *m, void *data)
 static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE];
 static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32];
 static unsigned radeon_debugfs_ib_idx[RADEON_IB_POOL_SIZE];
+
+static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	radeon_sa_bo_dump_debug_info(&rdev->ib_pool.sa_manager, m);
+
+	return 0;
+
+}
+
+static struct drm_info_list radeon_debugfs_sa_list[] = {
+        {"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL},
+};
+
 #endif
 
 int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring)
@@ -555,6 +572,11 @@ int radeon_debugfs_ib_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	unsigned i;
+	int r;
+
+	r = radeon_debugfs_add_files(rdev, radeon_debugfs_sa_list, 1);
+	if (r)
+		return r;
 
 	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
 		sprintf(radeon_debugfs_ib_names[i], "radeon_ib_%04u", i);
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 4ce5c51..013a787 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -192,3 +192,18 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
 	list_del_init(&sa_bo->list);
 	spin_unlock_irqrestore(&sa_bo->manager->lock, flags);
 }
+
+#if defined(CONFIG_DEBUG_FS)
+void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
+				  struct seq_file *m)
+{
+	struct radeon_sa_bo *i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sa_manager->lock, flags);
+	list_for_each_entry(i, &sa_manager->sa_bo, list) {
+		seq_printf(m, "offset %08d: size %4d\n", i->offset, i->size);
+	}
+	spin_unlock_irqrestore(&sa_manager->lock, flags);
+}
+#endif
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 08/13] drm/radeon: add biggest hole tracking and wakequeue to the sa
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (6 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 07/13] drm/radeon: add sub allocator debugfs file Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 09/13] drm/radeon: simplify semaphore handling Christian König
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

With that in place clients are automatically blocking
until their memory request can be handled.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h      |    5 +-
 drivers/gpu/drm/radeon/radeon_ring.c |   18 ++--
 drivers/gpu/drm/radeon/radeon_sa.c   |  192 +++++++++++++++++++++++++---------
 3 files changed, 153 insertions(+), 62 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 1aefbd9..415a496 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -381,17 +381,16 @@ struct radeon_bo_list {
  * alignment).
  */
 struct radeon_sa_manager {
-	spinlock_t		lock;
+	wait_queue_head_t	queue;
 	struct radeon_bo	*bo;
 	struct list_head	sa_bo;
 	unsigned		size;
+	struct list_head	*biggest_hole;
 	uint64_t		gpu_addr;
 	void			*cpu_ptr;
 	uint32_t		domain;
 };
 
-struct radeon_sa_bo;
-
 /* sub-allocation buffer */
 struct radeon_sa_bo {
 	struct list_head		list;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 1d9bce9..5942769 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -205,10 +205,16 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
-	struct radeon_sa_manager tmp;
 	int i, r;
 
-	r = radeon_sa_bo_manager_init(rdev, &tmp,
+	radeon_mutex_lock(&rdev->ib_pool.mutex);
+	if (rdev->ib_pool.ready) {
+		return 0;
+	}
+	rdev->ib_pool.ready = true;
+	radeon_mutex_unlock(&rdev->ib_pool.mutex);
+
+	r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
 				      RADEON_IB_POOL_SIZE*64*1024,
 				      RADEON_GEM_DOMAIN_GTT);
 	if (r) {
@@ -216,14 +222,6 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
 	}
 
 	radeon_mutex_lock(&rdev->ib_pool.mutex);
-	if (rdev->ib_pool.ready) {
-		radeon_mutex_unlock(&rdev->ib_pool.mutex);
-		radeon_sa_bo_manager_fini(rdev, &tmp);
-		return 0;
-	}
-
-	rdev->ib_pool.sa_manager = tmp;
-	INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
 	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
 		rdev->ib_pool.ibs[i].fence = NULL;
 		rdev->ib_pool.ibs[i].idx = i;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 013a787..72ebb77 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -26,6 +26,7 @@
 /*
  * Authors:
  *    Jerome Glisse <glisse@freedesktop.org>
+ *    Christian König <christian.koenig@amd.com>
  */
 #include "drmP.h"
 #include "drm.h"
@@ -37,9 +38,10 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 {
 	int r;
 
-	spin_lock_init(&sa_manager->lock);
+	init_waitqueue_head(&sa_manager->queue);
 	sa_manager->bo = NULL;
 	sa_manager->size = size;
+	sa_manager->biggest_hole = &sa_manager->sa_bo;
 	sa_manager->domain = domain;
 	INIT_LIST_HEAD(&sa_manager->sa_bo);
 
@@ -58,6 +60,7 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
 {
 	struct radeon_sa_bo *sa_bo, *tmp;
 
+	wake_up_all(&sa_manager->queue);
 	if (!list_empty(&sa_manager->sa_bo)) {
 		dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
 	}
@@ -129,81 +132,172 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
  *
  * Alignment can't be bigger than page size
  */
+
+static inline unsigned radeon_sa_bo_hole_start(struct radeon_sa_manager *m,
+					       struct list_head *entry)
+{
+	struct radeon_sa_bo *sa_bo;
+
+	if (entry == &m->sa_bo)
+		return 0;
+
+	sa_bo = list_entry(entry, struct radeon_sa_bo, list);
+	return sa_bo->offset + sa_bo->size;
+}
+
+static inline unsigned radeon_sa_bo_hole_end(struct radeon_sa_manager *m,
+					     struct list_head *entry)
+{
+	if (entry->next == &m->sa_bo)
+		return m->size;
+
+	return list_entry(entry->next, struct radeon_sa_bo, list)->offset;
+}
+
+static inline unsigned radeon_sa_bo_hole_size(struct radeon_sa_manager *m,
+					      struct list_head *entry,
+					      unsigned align)
+{
+	unsigned start, end, wasted;
+	start = radeon_sa_bo_hole_start(m, m->biggest_hole);
+	wasted = start % align;
+	if (wasted)
+		start += align - wasted;
+
+	end = radeon_sa_bo_hole_end(m, m->biggest_hole);
+	return start < end ? end - start : 0;
+}
+
 int radeon_sa_bo_new(struct radeon_device *rdev,
 		     struct radeon_sa_manager *sa_manager,
 		     struct radeon_sa_bo *sa_bo,
 		     unsigned size, unsigned align)
 {
-	struct radeon_sa_bo *tmp;
-	struct list_head *head;
-	unsigned offset = 0, wasted = 0;
-	unsigned long flags;
+	struct list_head *head, *curr, *hole;
+	unsigned start, currsize, wasted, holesize = 0;
+	int r;
 
 	BUG_ON(align > RADEON_GPU_PAGE_SIZE);
 	BUG_ON(size > sa_manager->size);
-	spin_lock_irqsave(&sa_manager->lock, flags);
 
-	/* no one ? */
-	if (list_empty(&sa_manager->sa_bo)) {
-		head = &sa_manager->sa_bo;
-		goto out;
+	spin_lock_irq(&sa_manager->queue.lock);
+
+	r = wait_event_interruptible_locked_irq(sa_manager->queue,
+		radeon_sa_bo_hole_size(sa_manager, sa_manager->biggest_hole, align) >= size
+	);
+	if (r) {
+		spin_unlock(&sa_manager->queue.lock);
+		return r;
 	}
 
-	/* look for a hole big enough */
-	list_for_each_entry(tmp, &sa_manager->sa_bo, list) {
-		/* room before this object ? */
-		if (offset < tmp->offset && (tmp->offset - offset) >= size) {
-			head = tmp->list.prev;
-			goto out;
-		}
-		offset = tmp->offset + tmp->size;
-		wasted = offset % align;
+	curr = head = hole = &sa_manager->sa_bo;
+	do {
+		start = radeon_sa_bo_hole_start(sa_manager, curr);
+		currsize = radeon_sa_bo_hole_end(sa_manager, curr) - start;
+
+		wasted = start % align;
 		if (wasted) {
-			offset += align - wasted;
+			wasted = align - wasted;
+			start += wasted;
+		}
+
+		/* room after current big enough ? */
+		if (currsize >= (size + wasted)) {
+			sa_bo->manager = sa_manager;
+			sa_bo->offset = start;
+			sa_bo->size = size;
+			list_add(&sa_bo->list, curr);
+			
+			/* did we borrowed from the biggest hole ? */
+			if (curr == sa_manager->biggest_hole) {
+
+				/* consider the space left after the newly added sa_bo */
+				curr = curr->next;
+				currsize -= size;
+				if (holesize < currsize) {
+					hole = curr;
+					holesize = currsize;
+				}
+				curr = curr->next;
+
+				while (curr != head) {
+					currsize = radeon_sa_bo_hole_end(sa_manager, curr);
+					currsize -= radeon_sa_bo_hole_start(sa_manager, curr);
+					if (holesize < currsize) {
+						hole = curr;
+						holesize = currsize;
+					}
+					curr = curr->next;
+				}
+				sa_manager->biggest_hole = hole;
+				wake_up_locked(&sa_manager->queue);
+			}
+
+			spin_unlock_irq(&sa_manager->queue.lock);
+			return 0;
+		}
+
+		if (holesize < currsize) {
+			hole = curr;
+			holesize = currsize;
 		}
-	}
-	/* room at the end ? */
-	head = sa_manager->sa_bo.prev;
-	tmp = list_entry(head, struct radeon_sa_bo, list);
-	offset = tmp->offset + tmp->size;
-	wasted = offset % align;
-	if (wasted) {
-		offset += wasted = align - wasted;
-	}
-	if ((sa_manager->size - offset) < size) {
-		/* failed to find somethings big enough */
-		spin_unlock_irqrestore(&sa_manager->lock, flags);
-		return -ENOMEM;
-	}
 
-out:
-	sa_bo->manager = sa_manager;
-	sa_bo->offset = offset;
-	sa_bo->size = size;
-	list_add(&sa_bo->list, head);
-	spin_unlock_irqrestore(&sa_manager->lock, flags);
-	return 0;
+		curr = curr->next;
+	} while (curr != head);
+
+	/* failed to find somethings big enough */
+	spin_unlock_irq(&sa_manager->queue.lock);
+	return -ENOMEM;
 }
 
 void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
 {
+	struct radeon_sa_manager *sa_manager = sa_bo->manager;
 	unsigned long flags;
-	spin_lock_irqsave(&sa_bo->manager->lock, flags);
+
+	spin_lock_irqsave(&sa_manager->queue.lock, flags);
+	if (&sa_bo->list == sa_manager->biggest_hole ||
+	    sa_bo->list.prev == sa_manager->biggest_hole) {
+
+		sa_manager->biggest_hole = sa_bo->list.prev;
+		wake_up_locked(&sa_manager->queue);
+	}
 	list_del_init(&sa_bo->list);
-	spin_unlock_irqrestore(&sa_bo->manager->lock, flags);
+	spin_unlock_irqrestore(&sa_manager->queue.lock, flags);
 }
 
 #if defined(CONFIG_DEBUG_FS)
 void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
 				  struct seq_file *m)
 {
-	struct radeon_sa_bo *i;
+	struct list_head *head, *curr;
+	struct radeon_sa_bo *sa_bo;
+	unsigned start, end;
 	unsigned long flags;
 
-	spin_lock_irqsave(&sa_manager->lock, flags);
-	list_for_each_entry(i, &sa_manager->sa_bo, list) {
-		seq_printf(m, "offset %08d: size %4d\n", i->offset, i->size);
-	}
-	spin_unlock_irqrestore(&sa_manager->lock, flags);
+	spin_lock_irqsave(&sa_manager->queue.lock, flags);
+	curr = head = &sa_manager->sa_bo;
+	do {
+		if (curr != &sa_manager->sa_bo) {
+			sa_bo = list_entry(curr, struct radeon_sa_bo, list);
+			seq_printf(m, "reservation  %p %08d: size %7d\n",
+				   curr, sa_bo->offset, sa_bo->size);
+		}
+
+		start = radeon_sa_bo_hole_start(sa_manager, curr);
+		end = radeon_sa_bo_hole_end(sa_manager, curr);
+		if (start < end) {
+			seq_printf(m, "hole         %p %08d: size %7d\n",
+				   curr, start, end - start);
+		}
+		curr = curr->next;
+	} while (curr != head);
+
+	start = radeon_sa_bo_hole_start(sa_manager, sa_manager->biggest_hole);
+	end = radeon_sa_bo_hole_end(sa_manager, sa_manager->biggest_hole);
+	seq_printf(m, "\nbiggest hole %p %08d: size %7d\n",
+		   sa_manager->biggest_hole, start, end - start);
+
+	spin_unlock_irqrestore(&sa_manager->queue.lock, flags);
 }
 #endif
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 09/13] drm/radeon: simplify semaphore handling
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (7 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 08/13] drm/radeon: add biggest hole tracking and wakequeue to the sa Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_* Christian König
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Directly use the suballocator to get small chunks
of memory. It's equally fast and doesn't crash when
we encounter a GPU reset.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/evergreen.c        |    1 -
 drivers/gpu/drm/radeon/ni.c               |    1 -
 drivers/gpu/drm/radeon/r600.c             |    1 -
 drivers/gpu/drm/radeon/radeon.h           |   29 +------
 drivers/gpu/drm/radeon/radeon_device.c    |    2 -
 drivers/gpu/drm/radeon/radeon_semaphore.c |  137 +++++------------------------
 drivers/gpu/drm/radeon/rv770.c            |    1 -
 drivers/gpu/drm/radeon/si.c               |    1 -
 8 files changed, 26 insertions(+), 147 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index ca47f52..a76389c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3431,7 +3431,6 @@ void evergreen_fini(struct radeon_device *rdev)
 	evergreen_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
-	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_bo_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 0146428..c0b0956 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1773,7 +1773,6 @@ void cayman_fini(struct radeon_device *rdev)
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
-	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c749e00..b8d9335 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2672,7 +2672,6 @@ void r600_fini(struct radeon_device *rdev)
 	r600_vram_scratch_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_gem_fini(rdev);
-	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 415a496..222939f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -427,34 +427,12 @@ int radeon_mode_dumb_destroy(struct drm_file *file_priv,
 /*
  * Semaphores.
  */
-struct radeon_ring;
-
-#define	RADEON_SEMAPHORE_BO_SIZE	256
-
-struct radeon_semaphore_driver {
-	rwlock_t			lock;
-	struct list_head		bo;
-};
-
-struct radeon_semaphore_bo;
-
-/* everything here is constant */
 struct radeon_semaphore {
-	struct list_head		list;
-	uint64_t			gpu_addr;
-	uint32_t			*cpu_ptr;
-	struct radeon_semaphore_bo	*bo;
-};
-
-struct radeon_semaphore_bo {
-	struct list_head		list;
-	struct radeon_ib		*ib;
-	struct list_head		free;
-	struct radeon_semaphore		semaphores[RADEON_SEMAPHORE_BO_SIZE/8];
-	unsigned			nused;
+	struct radeon_sa_bo	sa_bo;
+	signed			waiters;
+	uint64_t		gpu_addr;
 };
 
-void radeon_semaphore_driver_fini(struct radeon_device *rdev);
 int radeon_semaphore_create(struct radeon_device *rdev,
 			    struct radeon_semaphore **semaphore);
 void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
@@ -1518,7 +1496,6 @@ struct radeon_device {
 	struct radeon_mman		mman;
 	rwlock_t			fence_lock;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
-	struct radeon_semaphore_driver	semaphore_drv;
 	struct radeon_ring		ring[RADEON_NUM_RINGS];
 	struct radeon_ib_pool		ib_pool;
 	struct radeon_irq		irq;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index dedb398..8c49990 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -733,11 +733,9 @@ int radeon_device_init(struct radeon_device *rdev,
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->vram_mutex);
 	rwlock_init(&rdev->fence_lock);
-	rwlock_init(&rdev->semaphore_drv.lock);
 	INIT_LIST_HEAD(&rdev->gem.objects);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
 	init_waitqueue_head(&rdev->irq.idle_queue);
-	INIT_LIST_HEAD(&rdev->semaphore_drv.bo);
 	/* initialize vm here */
 	rdev->vm_manager.use_bitmap = 1;
 	rdev->vm_manager.max_pfn = 1 << 20;
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 61dd4e3..4603fab 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -31,108 +31,32 @@
 #include "drm.h"
 #include "radeon.h"
 
-static int radeon_semaphore_add_bo(struct radeon_device *rdev)
+int radeon_semaphore_create(struct radeon_device *rdev,
+			    struct radeon_semaphore **semaphore)
 {
-	struct radeon_semaphore_bo *bo;
-	unsigned long irq_flags;
-	uint64_t gpu_addr;
-	uint32_t *cpu_ptr;
-	int r, i;
-
+	void *cpu_ptr;
+	int r;
 
-	bo = kmalloc(sizeof(struct radeon_semaphore_bo), GFP_KERNEL);
-	if (bo == NULL) {
+	*semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
+	if (*semaphore == NULL) {
 		return -ENOMEM;
 	}
-	INIT_LIST_HEAD(&bo->free);
-	INIT_LIST_HEAD(&bo->list);
-	bo->nused = 0;
 
-	r = radeon_ib_get(rdev, 0, &bo->ib, RADEON_SEMAPHORE_BO_SIZE);
+	r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager, 
+			     &(*semaphore)->sa_bo, 8, 8);
 	if (r) {
-		dev_err(rdev->dev, "failed to get a bo after 5 retry\n");
-		kfree(bo);
+		kfree(*semaphore);
+		*semaphore = NULL;
 		return r;
 	}
-	gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
-	gpu_addr += bo->ib->sa_bo.offset;
-	cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr;
-	cpu_ptr += (bo->ib->sa_bo.offset >> 2);
-	for (i = 0; i < (RADEON_SEMAPHORE_BO_SIZE/8); i++) {
-		bo->semaphores[i].gpu_addr = gpu_addr;
-		bo->semaphores[i].cpu_ptr = cpu_ptr;
-		bo->semaphores[i].bo = bo;
-		list_add_tail(&bo->semaphores[i].list, &bo->free);
-		gpu_addr += 8;
-		cpu_ptr += 2;
-	}
-	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	list_add_tail(&bo->list, &rdev->semaphore_drv.bo);
-	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
-	return 0;
-}
-
-static void radeon_semaphore_del_bo_locked(struct radeon_device *rdev,
-					   struct radeon_semaphore_bo *bo)
-{
-	radeon_sa_bo_free(rdev, &bo->ib->sa_bo);
-	radeon_fence_unref(&bo->ib->fence);
-	list_del(&bo->list);
-	kfree(bo);
-}
-
-void radeon_semaphore_shrink_locked(struct radeon_device *rdev)
-{
-	struct radeon_semaphore_bo *bo, *n;
-
-	if (list_empty(&rdev->semaphore_drv.bo)) {
-		return;
-	}
-	/* only shrink if first bo has free semaphore */
-	bo = list_first_entry(&rdev->semaphore_drv.bo, struct radeon_semaphore_bo, list);
-	if (list_empty(&bo->free)) {
-		return;
-	}
-	list_for_each_entry_safe_continue(bo, n, &rdev->semaphore_drv.bo, list) {
-		if (bo->nused)
-			continue;
-		radeon_semaphore_del_bo_locked(rdev, bo);
-	}
-}
 
-int radeon_semaphore_create(struct radeon_device *rdev,
-			    struct radeon_semaphore **semaphore)
-{
-	struct radeon_semaphore_bo *bo;
-	unsigned long irq_flags;
-	bool do_retry = true;
-	int r;
-
-retry:
-	*semaphore = NULL;
-	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	list_for_each_entry(bo, &rdev->semaphore_drv.bo, list) {
-		if (list_empty(&bo->free))
-			continue;
-		*semaphore = list_first_entry(&bo->free, struct radeon_semaphore, list);
-		(*semaphore)->cpu_ptr[0] = 0;
-		(*semaphore)->cpu_ptr[1] = 0;
-		list_del(&(*semaphore)->list);
-		bo->nused++;
-		break;
-	}
-	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+	(*semaphore)->waiters = 0;	
+	(*semaphore)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
+	(*semaphore)->gpu_addr += (*semaphore)->sa_bo.offset;
 
-	if (*semaphore == NULL) {
-		if (do_retry) {
-			do_retry = false;
-			r = radeon_semaphore_add_bo(rdev);
-			if (r)
-				return r;
-			goto retry;
-		}
-		return -ENOMEM;
-	}
+	cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr;
+	cpu_ptr += (*semaphore)->sa_bo.offset;
+	*((uint64_t*)cpu_ptr) = 0;
 
 	return 0;
 }
@@ -140,39 +64,24 @@ retry:
 void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
 			          struct radeon_semaphore *semaphore)
 {
+	--semaphore->waiters;
 	radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false);
 }
 
 void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 			        struct radeon_semaphore *semaphore)
 {
+	++semaphore->waiters;
 	radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
 }
 
 void radeon_semaphore_free(struct radeon_device *rdev,
 			   struct radeon_semaphore *semaphore)
 {
-	unsigned long irq_flags;
-
-	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	semaphore->bo->nused--;
-	list_add_tail(&semaphore->list, &semaphore->bo->free);
-	radeon_semaphore_shrink_locked(rdev);
-	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
-}
-
-void radeon_semaphore_driver_fini(struct radeon_device *rdev)
-{
-	struct radeon_semaphore_bo *bo, *n;
-	unsigned long irq_flags;
-
-	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
-	/* we force to free everything */
-	list_for_each_entry_safe(bo, n, &rdev->semaphore_drv.bo, list) {
-		if (!list_empty(&bo->free)) {
-			dev_err(rdev->dev, "still in use semaphore\n");
-		}
-		radeon_semaphore_del_bo_locked(rdev, bo);
+	if (semaphore->waiters > 0) {
+		dev_err(rdev->dev, "Semaphore %p has more waiters than signalers,"
+			" hardware lockup imminent!\n", semaphore);
 	}
-	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+	radeon_sa_bo_free(rdev, &semaphore->sa_bo);
+	kfree(semaphore);
 }
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index cacec0e..c6ee54e 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1278,7 +1278,6 @@ void rv770_fini(struct radeon_device *rdev)
 	rv770_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
-	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_bo_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 14919e1..7c0110d 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -4120,7 +4120,6 @@ void si_fini(struct radeon_device *rdev)
 	si_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
-	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_*
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (8 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 09/13] drm/radeon: simplify semaphore handling Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-20  7:20   ` Michel Dänzer
  2012-04-19 22:39 ` [PATCH 11/13] drm/radeon: rip out the ib pool Christian König
                   ` (3 subsequent siblings)
  13 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon_fence.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 1a9765a..764ab7e 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 	}
 	if (list_empty(&rdev->fence_drv[ring].emitted)) {
 		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		return 0;
+		return -ENOENT;
 	}
 	fence = list_entry(rdev->fence_drv[ring].emitted.next,
 			   struct radeon_fence, list);
@@ -310,7 +310,7 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
 	}
 	if (list_empty(&rdev->fence_drv[ring].emitted)) {
 		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
-		return 0;
+		return -ENOENT;
 	}
 	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
 			   struct radeon_fence, list);
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 11/13] drm/radeon: rip out the ib pool
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (9 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_* Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-19 22:39 ` [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code Christian König
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

It isn't necessary any more and the suballocator
seems to perform even better.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h           |   22 +--
 drivers/gpu/drm/radeon/radeon_device.c    |    1 -
 drivers/gpu/drm/radeon/radeon_fence.c     |   44 +++++-
 drivers/gpu/drm/radeon/radeon_gart.c      |   12 +-
 drivers/gpu/drm/radeon/radeon_ring.c      |  240 ++++++++--------------------
 drivers/gpu/drm/radeon/radeon_semaphore.c |    6 +-
 6 files changed, 123 insertions(+), 202 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 222939f..d46d5ac 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -249,6 +249,8 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
 /*
  * Fences.
  */
+struct radeon_ib;
+
 struct radeon_fence_driver {
 	uint32_t			scratch_reg;
 	uint64_t			gpu_addr;
@@ -259,7 +261,6 @@ struct radeon_fence_driver {
 	wait_queue_head_t		queue;
 	struct list_head		created;
 	struct list_head		emitted;
-	struct list_head		signaled;
 	bool				initialized;
 };
 
@@ -274,6 +275,7 @@ struct radeon_fence {
 	/* RB, DMA, etc. */
 	int				ring;
 	struct radeon_semaphore		*semaphore;
+	struct radeon_ib		*ib;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
@@ -289,6 +291,7 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 int radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
+bool radeon_fence_set_associated_ib(struct radeon_fence *fence, struct radeon_ib *ib);
 
 /*
  * Tiling registers
@@ -603,7 +606,6 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
 
 struct radeon_ib {
 	struct radeon_sa_bo	sa_bo;
-	unsigned		idx;
 	uint32_t		length_dw;
 	uint64_t		gpu_addr;
 	uint32_t		*ptr;
@@ -612,18 +614,6 @@ struct radeon_ib {
 	bool			is_const_ib;
 };
 
-/*
- * locking -
- * mutex protects scheduled_ibs, ready, alloc_bm
- */
-struct radeon_ib_pool {
-	struct radeon_mutex		mutex;
-	struct radeon_sa_manager	sa_manager;
-	struct radeon_ib		ibs[RADEON_IB_POOL_SIZE];
-	bool				ready;
-	unsigned			head_id;
-};
-
 struct radeon_ring {
 	struct radeon_bo	*ring_obj;
 	volatile uint32_t	*ring;
@@ -764,7 +754,6 @@ struct si_rlc {
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib **ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
-bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
@@ -1497,7 +1486,8 @@ struct radeon_device {
 	rwlock_t			fence_lock;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
 	struct radeon_ring		ring[RADEON_NUM_RINGS];
-	struct radeon_ib_pool		ib_pool;
+	bool				ib_pool_ready;
+	struct radeon_sa_manager	sa_manager;
 	struct radeon_irq		irq;
 	struct radeon_asic		*asic;
 	struct radeon_gem		gem;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 8c49990..9189f8d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -723,7 +723,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
 	radeon_mutex_init(&rdev->cs_mutex);
-	radeon_mutex_init(&rdev->ib_pool.mutex);
 	for (i = 0; i < RADEON_NUM_RINGS; ++i)
 		mutex_init(&rdev->ring[i].mutex);
 	mutex_init(&rdev->dc_hw_i2c_mutex);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 764ab7e..0e8ac35 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -83,7 +83,8 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
 	return 0;
 }
 
-static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
+static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring,
+				     struct list_head *signaled)
 {
 	struct radeon_fence *fence;
 	struct list_head *i, *n;
@@ -110,7 +111,7 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 		i = n;
 		do {
 			n = i->prev;
-			list_move_tail(i, &rdev->fence_drv[ring].signaled);
+			list_move_tail(i, signaled);
 			fence = list_entry(i, struct radeon_fence, list);
 			fence->signaled = true;
 			i = n;
@@ -120,6 +121,18 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 	return wake;
 }
 
+static void radeon_fence_process_signaled(struct radeon_device *rdev, struct list_head *signaled)
+{
+	struct radeon_fence *fence;
+	struct list_head *i, *n;
+
+	list_for_each_safe(i, n, signaled) {
+		fence = list_entry(i, struct radeon_fence, list);
+		list_del_init(&fence->list);
+		radeon_ib_free(rdev, &fence->ib);
+	}
+}
+
 static void radeon_fence_destroy(struct kref *kref)
 {
 	unsigned long irq_flags;
@@ -152,6 +165,7 @@ int radeon_fence_create(struct radeon_device *rdev,
 	(*fence)->seq = 0;
 	(*fence)->ring = ring;
 	(*fence)->semaphore = NULL;
+	(*fence)->ib = NULL;
 	INIT_LIST_HEAD(&(*fence)->list);
 
 	write_lock_irqsave(&rdev->fence_lock, irq_flags);
@@ -164,6 +178,7 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 {
 	unsigned long irq_flags;
 	bool signaled = false;
+	LIST_HEAD(siglist);
 
 	if (!fence)
 		return true;
@@ -179,10 +194,12 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 		signaled = true;
 	}
 	if (!signaled) {
-		radeon_fence_poll_locked(fence->rdev, fence->ring);
+		radeon_fence_poll_locked(fence->rdev, fence->ring, &siglist);
 		signaled = fence->signaled;
 	}
 	write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
+	radeon_fence_process_signaled(fence->rdev, &siglist);
+
 	return signaled;
 }
 
@@ -341,10 +358,12 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 {
 	unsigned long irq_flags;
 	bool wake;
+	LIST_HEAD(signaled);
 
 	write_lock_irqsave(&rdev->fence_lock, irq_flags);
-	wake = radeon_fence_poll_locked(rdev, ring);
+	wake = radeon_fence_poll_locked(rdev, ring, &signaled);
 	write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+	radeon_fence_process_signaled(rdev, &signaled);
 	if (wake) {
 		wake_up_all(&rdev->fence_drv[ring].queue);
 	}
@@ -373,6 +392,22 @@ int radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 	return not_processed;
 }
 
+bool radeon_fence_set_associated_ib(struct radeon_fence *fence, struct radeon_ib *ib)
+{
+	struct radeon_device *rdev = fence->rdev;
+	unsigned long irq_flags;
+	bool isset = false;
+
+	/* a readlock is suficient, cause this should be called only once */
+	read_lock_irqsave(&rdev->fence_lock, irq_flags);
+	if (fence->emitted && !fence->signaled) {
+		fence->ib = ib;
+		isset = true;
+	}
+	read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+	return isset;
+}
+
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 {
 	unsigned long irq_flags;
@@ -413,7 +448,6 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 	atomic_set(&rdev->fence_drv[ring].seq, 0);
 	INIT_LIST_HEAD(&rdev->fence_drv[ring].created);
 	INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted);
-	INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
 	init_waitqueue_head(&rdev->fence_drv[ring].queue);
 	rdev->fence_drv[ring].initialized = false;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index c58a036..0ab3277 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -434,8 +434,8 @@ retry_id:
 	rdev->vm_manager.use_bitmap |= 1 << id;
 	vm->id = id;
 	list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
-	return radeon_vm_bo_update_pte(rdev, vm, rdev->ib_pool.sa_manager.bo,
-				       &rdev->ib_pool.sa_manager.bo->tbo.mem);
+	return radeon_vm_bo_update_pte(rdev, vm, rdev->sa_manager.bo,
+				       &rdev->sa_manager.bo->tbo.mem);
 }
 
 /* object have to be reserved */
@@ -633,7 +633,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	/* map the ib pool buffer at 0 in virtual address space, set
 	 * read only
 	 */
-	r = radeon_vm_bo_add(rdev, vm, rdev->ib_pool.sa_manager.bo, 0,
+	r = radeon_vm_bo_add(rdev, vm, rdev->sa_manager.bo, 0,
 			     RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED);
 	return r;
 }
@@ -650,12 +650,12 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 	radeon_mutex_unlock(&rdev->cs_mutex);
 
 	/* remove all bo */
-	r = radeon_bo_reserve(rdev->ib_pool.sa_manager.bo, false);
+	r = radeon_bo_reserve(rdev->sa_manager.bo, false);
 	if (!r) {
-		bo_va = radeon_bo_va(rdev->ib_pool.sa_manager.bo, vm);
+		bo_va = radeon_bo_va(rdev->sa_manager.bo, vm);
 		list_del_init(&bo_va->bo_list);
 		list_del_init(&bo_va->vm_list);
-		radeon_bo_unreserve(rdev->ib_pool.sa_manager.bo);
+		radeon_bo_unreserve(rdev->sa_manager.bo);
 		kfree(bo_va);
 	}
 	if (!list_empty(&vm->va)) {
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 5942769..77bed81 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -24,6 +24,7 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  *          Jerome Glisse
+ *          Christian König
  */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -33,8 +34,10 @@
 #include "radeon.h"
 #include "atom.h"
 
-int radeon_debugfs_ib_init(struct radeon_device *rdev);
-int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);
+/*
+ * IB.
+ */
+int radeon_debugfs_sa_init(struct radeon_device *rdev);
 
 u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
 {
@@ -61,123 +64,62 @@ u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
 	return idx_value;
 }
 
-void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
-{
-#if DRM_DEBUG_CODE
-	if (ring->count_dw <= 0) {
-		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
-	}
-#endif
-	ring->ring[ring->wptr++] = v;
-	ring->wptr &= ring->ptr_mask;
-	ring->count_dw--;
-	ring->ring_free_dw--;
-}
-
-/*
- * IB.
- */
-bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib)
-{
-	bool done = false;
-
-	/* only free ib which have been emited */
-	if (ib->fence && ib->fence->emitted) {
-		if (radeon_fence_signaled(ib->fence)) {
-			radeon_fence_unref(&ib->fence);
-			radeon_sa_bo_free(rdev, &ib->sa_bo);
-			done = true;
-		}
-	}
-	return done;
-}
-
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib **ib, unsigned size)
 {
-	struct radeon_fence *fence;
-	unsigned cretry = 0;
-	int r = 0, i, idx;
+	int r;
 
-	*ib = NULL;
-	/* align size on 256 bytes */
-	size = ALIGN(size, 256);
+	*ib = kmalloc(sizeof(struct radeon_ib), GFP_KERNEL);
+	if (*ib == NULL) {
+		return -ENOMEM;
+	}
 
-	r = radeon_fence_create(rdev, &fence, ring);
+	r = radeon_sa_bo_new(rdev, &rdev->sa_manager, &(*ib)->sa_bo, size, 256);
 	if (r) {
-		dev_err(rdev->dev, "failed to create fence for new IB\n");
+		dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
+		kfree(*ib);
+		*ib = NULL;
 		return r;
 	}
 
-	radeon_mutex_lock(&rdev->ib_pool.mutex);
-	idx = rdev->ib_pool.head_id;
-retry:
-	if (cretry > 5) {
-		dev_err(rdev->dev, "failed to get an ib after 5 retry\n");
-		radeon_mutex_unlock(&rdev->ib_pool.mutex);
-		radeon_fence_unref(&fence);
-		return -ENOMEM;
-	}
-	cretry++;
-	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-		radeon_ib_try_free(rdev, &rdev->ib_pool.ibs[idx]);
-		if (rdev->ib_pool.ibs[idx].fence == NULL) {
-			r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager,
-					     &rdev->ib_pool.ibs[idx].sa_bo,
-					     size, 256);
-			if (!r) {
-				*ib = &rdev->ib_pool.ibs[idx];
-				(*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr;
-				(*ib)->ptr += ((*ib)->sa_bo.offset >> 2);
-				(*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
-				(*ib)->gpu_addr += (*ib)->sa_bo.offset;
-				(*ib)->fence = fence;
-				(*ib)->vm_id = 0;
-				(*ib)->is_const_ib = false;
-				/* ib are most likely to be allocated in a ring fashion
-				 * thus rdev->ib_pool.head_id should be the id of the
-				 * oldest ib
-				 */
-				rdev->ib_pool.head_id = (1 + idx);
-				rdev->ib_pool.head_id &= (RADEON_IB_POOL_SIZE - 1);
-				radeon_mutex_unlock(&rdev->ib_pool.mutex);
-				return 0;
-			}
-		}
-		idx = (idx + 1) & (RADEON_IB_POOL_SIZE - 1);
-	}
-	/* this should be rare event, ie all ib scheduled none signaled yet.
-	 */
-	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-		if (rdev->ib_pool.ibs[idx].fence && rdev->ib_pool.ibs[idx].fence->emitted) {
-			r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false);
-			if (!r) {
-				goto retry;
-			}
-			/* an error happened */
-			break;
-		}
-		idx = (idx + 1) & (RADEON_IB_POOL_SIZE - 1);
+	r = radeon_fence_create(rdev, &(*ib)->fence, ring);
+	if (r) {
+		dev_err(rdev->dev, "failed to create fence for new IB (%d)\n", r);
+		radeon_sa_bo_free(rdev, &(*ib)->sa_bo);
+		kfree(*ib);
+		*ib = NULL;
+		return r;
 	}
-	radeon_mutex_unlock(&rdev->ib_pool.mutex);
-	radeon_fence_unref(&fence);
-	return r;
+
+	(*ib)->ptr = rdev->sa_manager.cpu_ptr;
+	(*ib)->ptr += ((*ib)->sa_bo.offset >> 2);
+	(*ib)->gpu_addr = rdev->sa_manager.gpu_addr;
+	(*ib)->gpu_addr += (*ib)->sa_bo.offset;
+	(*ib)->vm_id = 0;
+	(*ib)->is_const_ib = false;
+
+	return 0;
 }
 
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
 {
 	struct radeon_ib *tmp = *ib;
+	bool destroy = true;
 
 	*ib = NULL;
 	if (tmp == NULL) {
 		return;
 	}
-	radeon_mutex_lock(&rdev->ib_pool.mutex);
-	if (tmp->fence && !tmp->fence->emitted) {
+
+	if (tmp->fence) {
+		destroy = !radeon_fence_set_associated_ib(tmp->fence, tmp);
+	}
+
+	if (destroy) {
 		radeon_sa_bo_free(rdev, &tmp->sa_bo);
 		radeon_fence_unref(&tmp->fence);
+		kfree(tmp);
 	}
-	radeon_mutex_unlock(&rdev->ib_pool.mutex);
 }
 
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
@@ -187,7 +129,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 
 	if (!ib->length_dw || !ring->ready) {
 		/* TODO: Nothings in the ib we should report. */
-		DRM_ERROR("radeon: couldn't schedule IB(%u).\n", ib->idx);
+		DRM_ERROR("radeon: couldn't schedule IB.\n");
 		return -EINVAL;
 	}
 
@@ -205,64 +147,45 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
-	int i, r;
+	int r;
 
-	radeon_mutex_lock(&rdev->ib_pool.mutex);
-	if (rdev->ib_pool.ready) {
+	if (rdev->ib_pool_ready) {
 		return 0;
 	}
-	rdev->ib_pool.ready = true;
-	radeon_mutex_unlock(&rdev->ib_pool.mutex);
 
-	r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
+	r = radeon_sa_bo_manager_init(rdev, &rdev->sa_manager,
 				      RADEON_IB_POOL_SIZE*64*1024,
 				      RADEON_GEM_DOMAIN_GTT);
 	if (r) {
 		return r;
 	}
 
-	radeon_mutex_lock(&rdev->ib_pool.mutex);
-	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-		rdev->ib_pool.ibs[i].fence = NULL;
-		rdev->ib_pool.ibs[i].idx = i;
-		rdev->ib_pool.ibs[i].length_dw = 0;
-		INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].sa_bo.list);
+	if (radeon_debugfs_sa_init(rdev)) {
+		DRM_ERROR("Failed to register debugfs file for SA !\n");
 	}
-	rdev->ib_pool.head_id = 0;
-	rdev->ib_pool.ready = true;
+
 	DRM_INFO("radeon: ib pool ready.\n");
+	rdev->ib_pool_ready = true;
 
-	if (radeon_debugfs_ib_init(rdev)) {
-		DRM_ERROR("Failed to register debugfs file for IB !\n");
-	}
-	radeon_mutex_unlock(&rdev->ib_pool.mutex);
 	return 0;
 }
 
 void radeon_ib_pool_fini(struct radeon_device *rdev)
 {
-	unsigned i;
-
-	radeon_mutex_lock(&rdev->ib_pool.mutex);
-	if (rdev->ib_pool.ready) {
-		for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-			radeon_sa_bo_free(rdev, &rdev->ib_pool.ibs[i].sa_bo);
-			radeon_fence_unref(&rdev->ib_pool.ibs[i].fence);
-		}
-		radeon_sa_bo_manager_fini(rdev, &rdev->ib_pool.sa_manager);
-		rdev->ib_pool.ready = false;
+	if (rdev->ib_pool_ready) {
+		radeon_sa_bo_manager_fini(rdev, &rdev->sa_manager);
+		rdev->ib_pool_ready = false;
 	}
-	radeon_mutex_unlock(&rdev->ib_pool.mutex);
 }
 
 int radeon_ib_pool_start(struct radeon_device *rdev)
 {
-	return radeon_sa_bo_manager_start(rdev, &rdev->ib_pool.sa_manager);
+	return radeon_sa_bo_manager_start(rdev, &rdev->sa_manager);
 }
 
 int radeon_ib_pool_suspend(struct radeon_device *rdev)
 {
-	return radeon_sa_bo_manager_suspend(rdev, &rdev->ib_pool.sa_manager);
+	return radeon_sa_bo_manager_suspend(rdev, &rdev->sa_manager);
 }
 
 int radeon_ib_ring_tests(struct radeon_device *rdev)
@@ -298,6 +221,21 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
 /*
  * Ring.
  */
+int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);
+
+void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+	if (ring->count_dw <= 0) {
+		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+	}
+#endif
+	ring->ring[ring->wptr++] = v;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw--;
+	ring->ring_free_dw--;
+}
+
 int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	/* r1xx-r5xx only has CP ring */
@@ -504,37 +442,13 @@ static struct drm_info_list radeon_debugfs_ring_info_list[] = {
 	{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
 };
 
-static int radeon_debugfs_ib_info(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_ib *ib = &rdev->ib_pool.ibs[*((unsigned*)node->info_ent->data)];
-	unsigned i;
-
-	if (ib == NULL) {
-		return 0;
-	}
-	seq_printf(m, "IB %04u\n", ib->idx);
-	seq_printf(m, "IB fence %p\n", ib->fence);
-	seq_printf(m, "IB size %05u dwords\n", ib->length_dw);
-	for (i = 0; i < ib->length_dw; i++) {
-		seq_printf(m, "[%05u]=0x%08X\n", i, ib->ptr[i]);
-	}
-	return 0;
-}
-
-static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE];
-static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32];
-static unsigned radeon_debugfs_ib_idx[RADEON_IB_POOL_SIZE];
-
 static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct radeon_device *rdev = dev->dev_private;
 
-	radeon_sa_bo_dump_debug_info(&rdev->ib_pool.sa_manager, m);
+	radeon_sa_bo_dump_debug_info(&rdev->sa_manager, m);
 
 	return 0;
 
@@ -566,26 +480,10 @@ int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *rin
 	return 0;
 }
 
-int radeon_debugfs_ib_init(struct radeon_device *rdev)
+int radeon_debugfs_sa_init(struct radeon_device *rdev)
 {
 #if defined(CONFIG_DEBUG_FS)
-	unsigned i;
-	int r;
-
-	r = radeon_debugfs_add_files(rdev, radeon_debugfs_sa_list, 1);
-	if (r)
-		return r;
-
-	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-		sprintf(radeon_debugfs_ib_names[i], "radeon_ib_%04u", i);
-		radeon_debugfs_ib_idx[i] = i;
-		radeon_debugfs_ib_list[i].name = radeon_debugfs_ib_names[i];
-		radeon_debugfs_ib_list[i].show = &radeon_debugfs_ib_info;
-		radeon_debugfs_ib_list[i].driver_features = 0;
-		radeon_debugfs_ib_list[i].data = &radeon_debugfs_ib_idx[i];
-	}
-	return radeon_debugfs_add_files(rdev, radeon_debugfs_ib_list,
-					RADEON_IB_POOL_SIZE);
+	return radeon_debugfs_add_files(rdev, radeon_debugfs_sa_list, 1);
 #else
 	return 0;
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 4603fab..a09cc05 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 		return -ENOMEM;
 	}
 
-	r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager, 
+	r = radeon_sa_bo_new(rdev, &rdev->sa_manager, 
 			     &(*semaphore)->sa_bo, 8, 8);
 	if (r) {
 		kfree(*semaphore);
@@ -51,10 +51,10 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 	}
 
 	(*semaphore)->waiters = 0;	
-	(*semaphore)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
+	(*semaphore)->gpu_addr = rdev->sa_manager.gpu_addr;
 	(*semaphore)->gpu_addr += (*semaphore)->sa_bo.offset;
 
-	cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr;
+	cpu_ptr = rdev->sa_manager.cpu_ptr;
 	cpu_ptr += (*semaphore)->sa_bo.offset;
 	*((uint64_t*)cpu_ptr) = 0;
 
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (10 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 11/13] drm/radeon: rip out the ib pool Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-24 14:04   ` Dave Airlie
  2012-04-19 22:39 ` [PATCH 13/13] drm/radeon: rework recursive gpu reset handling Christian König
  2012-04-19 23:47 ` Reworking of GPU reset logic Jerome Glisse
  13 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Rings need to lock in order, otherwise
the ring subsystem can deadlock.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h           |    4 ++
 drivers/gpu/drm/radeon/radeon_cs.c        |   33 ++++++------------
 drivers/gpu/drm/radeon/radeon_semaphore.c |   53 +++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_ttm.c       |   46 +++++++++++--------------
 4 files changed, 88 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d46d5ac..5912cab 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -442,6 +442,10 @@ void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
 				  struct radeon_semaphore *semaphore);
 void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 				struct radeon_semaphore *semaphore);
+int radeon_semaphore_sync_rings(struct radeon_device *rdev,
+				struct radeon_semaphore *semaphore,
+				bool sync_to[RADEON_NUM_RINGS],
+				int dst_ring);
 void radeon_semaphore_free(struct radeon_device *rdev,
 			   struct radeon_semaphore *semaphore);
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index e7b0b5d..de40d33 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -118,6 +118,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
 static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 {
 	bool sync_to_ring[RADEON_NUM_RINGS] = { };
+	bool need_sync = false;
 	int i, r;
 
 	for (i = 0; i < p->nrelocs; i++) {
@@ -128,34 +129,22 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 			struct radeon_fence *fence = p->relocs[i].robj->tbo.sync_obj;
 			if (!radeon_fence_signaled(fence)) {
 				sync_to_ring[fence->ring] = true;
+				need_sync = true;
 			}
 		}
 	}
 
-	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-		/* no need to sync to our own or unused rings */
-		if (i == p->ring || !sync_to_ring[i] || !p->rdev->ring[i].ready)
-			continue;
-
-		if (!p->ib->fence->semaphore) {
-			r = radeon_semaphore_create(p->rdev, &p->ib->fence->semaphore);
-			if (r)
-				return r;
-		}
-
-		r = radeon_ring_lock(p->rdev, &p->rdev->ring[i], 3);
-		if (r)
-			return r;
-		radeon_semaphore_emit_signal(p->rdev, i, p->ib->fence->semaphore);
-		radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[i]);
+	if (!need_sync) {
+		return 0;
+	}
 
-		r = radeon_ring_lock(p->rdev, &p->rdev->ring[p->ring], 3);
-		if (r)
-			return r;
-		radeon_semaphore_emit_wait(p->rdev, p->ring, p->ib->fence->semaphore);
-		radeon_ring_unlock_commit(p->rdev, &p->rdev->ring[p->ring]);
+	r = radeon_semaphore_create(p->rdev, &p->ib->fence->semaphore);
+	if (r) {
+		return r;
 	}
-	return 0;
+
+	return radeon_semaphore_sync_rings(p->rdev, p->ib->fence->semaphore,
+					   sync_to_ring, p->ring);
 }
 
 int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index a09cc05..ff1ce90 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -75,6 +75,59 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 	radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
 }
 
+int radeon_semaphore_sync_rings(struct radeon_device *rdev,
+				struct radeon_semaphore *semaphore,
+				bool sync_to[RADEON_NUM_RINGS],
+				int dst_ring)
+{
+	int i, r;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		unsigned num_ops = i == dst_ring ? RADEON_NUM_RINGS : 1;
+
+		/* don't lock unused rings */
+		if (!sync_to[i] && i != dst_ring)
+			continue;
+
+		/* prevent GPU deadlocks */
+		if (!rdev->ring[i].ready) {
+			dev_err(rdev->dev, "Trying to sync to a disabled ring!");
+			r = -EINVAL;
+			goto error;
+		}
+
+                r = radeon_ring_lock(rdev, &rdev->ring[i], num_ops * 4);
+                if (r)
+			goto error;
+	}
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		/* no need to sync to our own or unused rings */
+		if (!sync_to[i] || i == dst_ring)
+                        continue;
+
+		radeon_semaphore_emit_signal(rdev, i, semaphore);
+		radeon_semaphore_emit_wait(rdev, dst_ring, semaphore);
+	}
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+
+		/* don't unlock unused rings */
+		if (!sync_to[i] && i != dst_ring)
+			continue;
+
+		radeon_ring_unlock_commit(rdev, &rdev->ring[i]);
+	}
+
+	return 0;
+
+error:
+	/* unlock all locks taken so far */
+	for (--i; i >= 0; --i)
+		radeon_ring_unlock_undo(rdev, &rdev->ring[i]);
+	return r;
+}
+
 void radeon_semaphore_free(struct radeon_device *rdev,
 			   struct radeon_semaphore *semaphore)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index f493c64..5e3d54d 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -222,8 +222,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 {
 	struct radeon_device *rdev;
 	uint64_t old_start, new_start;
-	struct radeon_fence *fence;
-	int r, i;
+	struct radeon_fence *fence, *old_fence;
+	int r;
 
 	rdev = radeon_get_rdev(bo->bdev);
 	r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev));
@@ -242,6 +242,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 		break;
 	default:
 		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
+		radeon_fence_unref(&fence);
 		return -EINVAL;
 	}
 	switch (new_mem->mem_type) {
@@ -253,42 +254,35 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 		break;
 	default:
 		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
+		radeon_fence_unref(&fence);
 		return -EINVAL;
 	}
 	if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
+		radeon_fence_unref(&fence);
 		return -EINVAL;
 	}
 
 	BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
 
 	/* sync other rings */
-	if (rdev->family >= CHIP_R600) {
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-			/* no need to sync to our own or unused rings */
-			if (i == radeon_copy_ring_index(rdev) || !rdev->ring[i].ready)
-				continue;
-
-			if (!fence->semaphore) {
-				r = radeon_semaphore_create(rdev, &fence->semaphore);
-				/* FIXME: handle semaphore error */
-				if (r)
-					continue;
-			}
+	old_fence = bo->sync_obj;
+	if (old_fence && old_fence->ring != fence->ring
+	    && !radeon_fence_signaled(old_fence)) {
+		bool sync_to_ring[RADEON_NUM_RINGS] = { };
+		sync_to_ring[old_fence->ring] = true;
+
+		r = radeon_semaphore_create(rdev, &fence->semaphore);
+		if (r) {
+			radeon_fence_unref(&fence);
+			return r;
+		}
 
-			r = radeon_ring_lock(rdev, &rdev->ring[i], 3);
-			/* FIXME: handle ring lock error */
-			if (r)
-				continue;
-			radeon_semaphore_emit_signal(rdev, i, fence->semaphore);
-			radeon_ring_unlock_commit(rdev, &rdev->ring[i]);
-
-			r = radeon_ring_lock(rdev, &rdev->ring[radeon_copy_ring_index(rdev)], 3);
-			/* FIXME: handle ring lock error */
-			if (r)
-				continue;
-			radeon_semaphore_emit_wait(rdev, radeon_copy_ring_index(rdev), fence->semaphore);
-			radeon_ring_unlock_commit(rdev, &rdev->ring[radeon_copy_ring_index(rdev)]);
+		r = radeon_semaphore_sync_rings(rdev, fence->semaphore,
+						sync_to_ring, fence->ring);
+		if (r) {
+			radeon_fence_unref(&fence);
+			return r;
 		}
 	}
 
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* [PATCH 13/13] drm/radeon: rework recursive gpu reset handling
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (11 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code Christian König
@ 2012-04-19 22:39 ` Christian König
  2012-04-20  6:57   ` Dave Airlie
  2012-04-19 23:47 ` Reworking of GPU reset logic Jerome Glisse
  13 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2012-04-19 22:39 UTC (permalink / raw)
  To: dri-devel; +Cc: Christian König

Instead of all this humpy pumpy with recursive
mutex (which also fixes only halve of the problem)
move the actual gpu reset out of the fence code,
return -EDEADLK and then reset the gpu in the
calling ioctl function.

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/radeon.h        |   44 +-------------------------------
 drivers/gpu/drm/radeon/radeon_cs.c     |   21 +++++++++++---
 drivers/gpu/drm/radeon/radeon_device.c |    7 +----
 drivers/gpu/drm/radeon/radeon_fence.c  |   10 ++-----
 drivers/gpu/drm/radeon/radeon_gart.c   |   16 ++++++------
 drivers/gpu/drm/radeon/radeon_gem.c    |   14 ++++++++++
 6 files changed, 43 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5912cab..aadc334 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -155,48 +155,6 @@ static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len)
 #endif
 bool radeon_get_bios(struct radeon_device *rdev);
 
-
-/*
- * Mutex which allows recursive locking from the same process.
- */
-struct radeon_mutex {
-	struct mutex		mutex;
-	struct task_struct	*owner;
-	int			level;
-};
-
-static inline void radeon_mutex_init(struct radeon_mutex *mutex)
-{
-	mutex_init(&mutex->mutex);
-	mutex->owner = NULL;
-	mutex->level = 0;
-}
-
-static inline void radeon_mutex_lock(struct radeon_mutex *mutex)
-{
-	if (mutex_trylock(&mutex->mutex)) {
-		/* The mutex was unlocked before, so it's ours now */
-		mutex->owner = current;
-	} else if (mutex->owner != current) {
-		/* Another process locked the mutex, take it */
-		mutex_lock(&mutex->mutex);
-		mutex->owner = current;
-	}
-	/* Otherwise the mutex was already locked by this process */
-
-	mutex->level++;
-}
-
-static inline void radeon_mutex_unlock(struct radeon_mutex *mutex)
-{
-	if (--mutex->level > 0)
-		return;
-
-	mutex->owner = NULL;
-	mutex_unlock(&mutex->mutex);
-}
-
-
 /*
  * Dummy page
  */
@@ -1497,7 +1455,7 @@ struct radeon_device {
 	struct radeon_gem		gem;
 	struct radeon_pm		pm;
 	uint32_t			bios_scratch[RADEON_BIOS_NUM_SCRATCH];
-	struct radeon_mutex		cs_mutex;
+	struct mutex			cs_mutex;
 	struct radeon_wb		wb;
 	struct radeon_dummy_page	dummy_page;
 	bool				shutdown;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index de40d33..2486b21 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -496,15 +496,23 @@ out:
 	return r;
 }
 
+static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
+{
+	if (r == -EDEADLK) {
+		r = radeon_gpu_reset(rdev);
+	}
+	return r;
+}
+
 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_cs_parser parser;
 	int r;
 
-	radeon_mutex_lock(&rdev->cs_mutex);
+	mutex_lock(&rdev->cs_mutex);
 	if (!rdev->accel_working) {
-		radeon_mutex_unlock(&rdev->cs_mutex);
+		mutex_unlock(&rdev->cs_mutex);
 		return -EBUSY;
 	}
 	/* initialize parser */
@@ -517,7 +525,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r) {
 		DRM_ERROR("Failed to initialize parser !\n");
 		radeon_cs_parser_fini(&parser, r);
-		radeon_mutex_unlock(&rdev->cs_mutex);
+		r = radeon_cs_handle_lockup(rdev, r);
+		mutex_unlock(&rdev->cs_mutex);
 		return r;
 	}
 	r = radeon_cs_parser_relocs(&parser);
@@ -525,7 +534,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to parse relocation %d!\n", r);
 		radeon_cs_parser_fini(&parser, r);
-		radeon_mutex_unlock(&rdev->cs_mutex);
+		r = radeon_cs_handle_lockup(rdev, r);
+		mutex_unlock(&rdev->cs_mutex);
 		return r;
 	}
 	r = radeon_cs_ib_chunk(rdev, &parser);
@@ -538,7 +548,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	}
 out:
 	radeon_cs_parser_fini(&parser, r);
-	radeon_mutex_unlock(&rdev->cs_mutex);
+	r = radeon_cs_handle_lockup(rdev, r);
+	mutex_unlock(&rdev->cs_mutex);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 9189f8d..04f3f40 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -722,7 +722,7 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
-	radeon_mutex_init(&rdev->cs_mutex);
+	mutex_init(&rdev->cs_mutex);
 	for (i = 0; i < RADEON_NUM_RINGS; ++i)
 		mutex_init(&rdev->ring[i].mutex);
 	mutex_init(&rdev->dc_hw_i2c_mutex);
@@ -983,9 +983,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 	int r;
 	int resched;
 
-	/* Prevent CS ioctl from interfering */
-	radeon_mutex_lock(&rdev->cs_mutex);
-
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
@@ -1000,8 +997,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 		ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 	}
 
-	radeon_mutex_unlock(&rdev->cs_mutex);
-
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
 		dev_info(rdev->dev, "GPU reset failed\n");
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 0e8ac35..ea7aee3 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -267,6 +267,8 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 			/* change sequence value on all rings, so nobody else things there is a lockup */
 			for (i = 0; i < RADEON_NUM_RINGS; ++i)
 				rdev->fence_drv[i].last_seq -= 0x10000;
+
+			rdev->fence_drv[fence->ring].last_activity = jiffies;
 			write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 
 			if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
@@ -277,13 +279,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 
 				/* mark the ring as not ready any more */
 				rdev->ring[fence->ring].ready = false;
-				r = radeon_gpu_reset(rdev);
-				if (r)
-					return r;
-
-				write_lock_irqsave(&rdev->fence_lock, irq_flags);
-				rdev->fence_drv[fence->ring].last_activity = jiffies;
-				write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+				return -EDEADLK;
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 0ab3277..450ae77 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -356,13 +356,13 @@ int radeon_vm_manager_suspend(struct radeon_device *rdev)
 {
 	struct radeon_vm *vm, *tmp;
 
-	radeon_mutex_lock(&rdev->cs_mutex);
+	mutex_lock(&rdev->cs_mutex);
 	/* unbind all active vm */
 	list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
 		radeon_vm_unbind_locked(rdev, vm);
 	}
 	rdev->vm_manager.funcs->fini(rdev);
-	radeon_mutex_unlock(&rdev->cs_mutex);
+	mutex_unlock(&rdev->cs_mutex);
 	return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
 }
 
@@ -480,9 +480,9 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
 	if (last_pfn > vm->last_pfn) {
 		/* grow va space 32M by 32M */
 		unsigned align = ((32 << 20) >> 12) - 1;
-		radeon_mutex_lock(&rdev->cs_mutex);
+		mutex_lock(&rdev->cs_mutex);
 		radeon_vm_unbind_locked(rdev, vm);
-		radeon_mutex_unlock(&rdev->cs_mutex);
+		mutex_unlock(&rdev->cs_mutex);
 		vm->last_pfn = (last_pfn + align) & ~align;
 	}
 	head = &vm->va;
@@ -598,9 +598,9 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 		return 0;
 
 	mutex_lock(&vm->mutex);
-	radeon_mutex_lock(&rdev->cs_mutex);
+	mutex_lock(&rdev->cs_mutex);
 	radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
-	radeon_mutex_unlock(&rdev->cs_mutex);
+	mutex_unlock(&rdev->cs_mutex);
 	list_del(&bo_va->vm_list);
 	mutex_unlock(&vm->mutex);
 	list_del(&bo_va->bo_list);
@@ -645,9 +645,9 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 
 	mutex_lock(&vm->mutex);
 
-	radeon_mutex_lock(&rdev->cs_mutex);
+	mutex_lock(&rdev->cs_mutex);
 	radeon_vm_unbind_locked(rdev, vm);
-	radeon_mutex_unlock(&rdev->cs_mutex);
+	mutex_unlock(&rdev->cs_mutex);
 
 	/* remove all bo */
 	r = radeon_bo_reserve(rdev->sa_manager.bo, false);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index c7008b5..f65377f 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -154,6 +154,15 @@ void radeon_gem_object_close(struct drm_gem_object *obj,
 	radeon_bo_unreserve(rbo);
 }
 
+static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r)
+{
+	if (r == -EDEADLK) {
+		mutex_lock(&rdev->cs_mutex);
+		r = radeon_gpu_reset(rdev);
+		mutex_unlock(&rdev->cs_mutex);
+	}
+	return r;
+}
 
 /*
  * GEM ioctls.
@@ -210,12 +219,14 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 					args->initial_domain, false,
 					false, &gobj);
 	if (r) {
+		r = radeon_gem_handle_lockup(rdev, r);
 		return r;
 	}
 	r = drm_gem_handle_create(filp, gobj, &handle);
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(gobj);
 	if (r) {
+		r = radeon_gem_handle_lockup(rdev, r);
 		return r;
 	}
 	args->handle = handle;
@@ -245,6 +256,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
 
 	drm_gem_object_unreference_unlocked(gobj);
+	r = radeon_gem_handle_lockup(robj->rdev, r);
 	return r;
 }
 
@@ -301,6 +313,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	drm_gem_object_unreference_unlocked(gobj);
+	r = radeon_gem_handle_lockup(robj->rdev, r);
 	return r;
 }
 
@@ -322,6 +335,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	if (robj->rdev->asic->ioctl_wait_idle)
 		robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
 	drm_gem_object_unreference_unlocked(gobj);
+	r = radeon_gem_handle_lockup(robj->rdev, r);
 	return r;
 }
 
-- 
1.7.5.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-19 22:39 Reworking of GPU reset logic Christian König
                   ` (12 preceding siblings ...)
  2012-04-19 22:39 ` [PATCH 13/13] drm/radeon: rework recursive gpu reset handling Christian König
@ 2012-04-19 23:47 ` Jerome Glisse
  2012-04-21  9:42   ` Christian König
  13 siblings, 1 reply; 36+ messages in thread
From: Jerome Glisse @ 2012-04-19 23:47 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

2012/4/19 Christian König <deathsimple@vodafone.de>:
> This includes mostly fixes for multi ring lockups and GPU resets, but it should general improve the behavior of the kernel mode driver in case something goes badly wrong.
>
> On the other hand it completely rewrites the IB pool and semaphore handling, so I think there are still a couple of problems in it.
>
> The first four patches were already send to the list, but the current set depends on them so I resend them again.
>
> Cheers,
> Christian.

I did a quick review, it looks mostly good, but as it's sensitive code
i would like to spend sometime on
it. Probably next week. Note that i had some work on this area too, i
mostly want to drop all the debugfs
related to this and add some new more usefull (basicly something that
allow you to read all the data
needed to replay a locking up ib). I also was looking into Dave reset
thread and your solution of moving
reset in ioctl return path sounds good too but i need to convince my
self that it encompass all possible
case.

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 13/13] drm/radeon: rework recursive gpu reset handling
  2012-04-19 22:39 ` [PATCH 13/13] drm/radeon: rework recursive gpu reset handling Christian König
@ 2012-04-20  6:57   ` Dave Airlie
  2012-04-20  7:50     ` Daniel Vetter
  0 siblings, 1 reply; 36+ messages in thread
From: Dave Airlie @ 2012-04-20  6:57 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

2012/4/19 Christian König <deathsimple@vodafone.de>:
> Instead of all this humpy pumpy with recursive
> mutex (which also fixes only halve of the problem)
> move the actual gpu reset out of the fence code,
> return -EDEADLK and then reset the gpu in the
> calling ioctl function.

I'm trying to figure out if this has any disadvantages over doing what
I proposed before and just kicking a thread to reset the gpu.

It seems like this should also avoid the locking problems, I'd like to
make sure we don't return -EDEADLK to userspace by accident anywhere,
since I don't think it prepared for it and it would be an ABI change.

Dave.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_*
  2012-04-19 22:39 ` [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_* Christian König
@ 2012-04-20  7:20   ` Michel Dänzer
  2012-04-20  8:49     ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2012-04-20  7:20 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-04-20 at 00:39 +0200, Christian König wrote: 
> Signed-off-by: Christian König <deathsimple@vodafone.de>
> ---
>  drivers/gpu/drm/radeon/radeon_fence.c |    4 ++--
>  1 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> index 1a9765a..764ab7e 100644
> --- a/drivers/gpu/drm/radeon/radeon_fence.c
> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> @@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
>  	}
>  	if (list_empty(&rdev->fence_drv[ring].emitted)) {
>  		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
> -		return 0;
> +		return -ENOENT;
>  	}
>  	fence = list_entry(rdev->fence_drv[ring].emitted.next,
>  			   struct radeon_fence, list);
> @@ -310,7 +310,7 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
>  	}
>  	if (list_empty(&rdev->fence_drv[ring].emitted)) {
>  		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
> -		return 0;
> +		return -ENOENT;
>  	}
>  	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
>  			   struct radeon_fence, list);

It seems weird to declare a fence wait as failed when there are no
outstanding fences in the first place. If there are callers which
require outstanding fences, they should probably handle that themselves.


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 06/13] drm/radeon: improve sub allocator
  2012-04-19 22:39 ` [PATCH 06/13] drm/radeon: improve sub allocator Christian König
@ 2012-04-20  7:24   ` Michel Dänzer
  2012-04-20  9:11     ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2012-04-20  7:24 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-04-20 at 00:39 +0200, Christian König wrote: 
> Make the suballocator self containing to locking
> and fix a overrun bug which happens with
> allocations of different alignments.

Sounds like this should be split up into two changes. :)


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 13/13] drm/radeon: rework recursive gpu reset handling
  2012-04-20  6:57   ` Dave Airlie
@ 2012-04-20  7:50     ` Daniel Vetter
  2012-04-20  9:38       ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Daniel Vetter @ 2012-04-20  7:50 UTC (permalink / raw)
  To: Dave Airlie; +Cc: Christian König, dri-devel

On Fri, Apr 20, 2012 at 07:57:09AM +0100, Dave Airlie wrote:
> 2012/4/19 Christian König <deathsimple@vodafone.de>:
> > Instead of all this humpy pumpy with recursive
> > mutex (which also fixes only halve of the problem)
> > move the actual gpu reset out of the fence code,
> > return -EDEADLK and then reset the gpu in the
> > calling ioctl function.
> 
> I'm trying to figure out if this has any disadvantages over doing what
> I proposed before and just kicking a thread to reset the gpu.
> 
> It seems like this should also avoid the locking problems, I'd like to
> make sure we don't return -EDEADLK to userspace by accident anywhere,
> since I don't think it prepared for it and it would be an ABI change.

Fyi, the trick i915 uses to solve the reset problem is to bail out with
-EAGAIN and rely on drmIOCtl restarting the ioctl. This way we use the
same codepaths we use to bail out when getting a signal, and thanks to X
these are rather well-tested. The hangcheck code also fires of a work item to
do all the reset magic. In all the ioctls that might wait for the gpu we
have a fancy piece of code which checks whether a gpu reset is pending,
and if so waits for that to complete. It also checks whether the reset
succeeded and if not bails out with -EIO.
-Daniel
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_*
  2012-04-20  7:20   ` Michel Dänzer
@ 2012-04-20  8:49     ` Christian König
  2012-04-20  9:15       ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2012-04-20  8:49 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

On 20.04.2012 09:20, Michel Dänzer wrote:
> On Fre, 2012-04-20 at 00:39 +0200, Christian König wrote:
>> Signed-off-by: Christian König<deathsimple@vodafone.de>
>> ---
>>   drivers/gpu/drm/radeon/radeon_fence.c |    4 ++--
>>   1 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
>> index 1a9765a..764ab7e 100644
>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>> @@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
>>   	}
>>   	if (list_empty(&rdev->fence_drv[ring].emitted)) {
>>   		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>> -		return 0;
>> +		return -ENOENT;
>>   	}
>>   	fence = list_entry(rdev->fence_drv[ring].emitted.next,
>>   			   struct radeon_fence, list);
>> @@ -310,7 +310,7 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
>>   	}
>>   	if (list_empty(&rdev->fence_drv[ring].emitted)) {
>>   		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>> -		return 0;
>> +		return -ENOENT;
>>   	}
>>   	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
>>   			   struct radeon_fence, list);
> It seems weird to declare a fence wait as failed when there are no
> outstanding fences in the first place. If there are callers which
> require outstanding fences, they should probably handle that themselves.

Why that sounds so weird? Ok, maybe for radeon_fence_wait_last that's 
questionable, but for radeon_fence_wait_next it's quite clear to me that 
we should signal the caller that there is no fence to wait for.

The problem I wanted to fix with that is the usage of 
radeon_fence_wait_next in radeon_ring_alloc (for example):
> int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring 
> *ring, unsigned ndw)
> {
>         int r;
>
>         /* Align requested size with padding so unlock_commit can
>          * pad safely */
>         ndw = (ndw + ring->align_mask) & ~ring->align_mask;
>         while (ndw > (ring->ring_free_dw - 1)) {
>                 radeon_ring_free_size(rdev, ring);
>                 if (ndw < ring->ring_free_dw) {
>                         break;
>                 }
>                 r = radeon_fence_wait_next(rdev, 
> radeon_ring_index(rdev, ring));
>                 if (r)
>                         return r;
>         }
>         ring->count_dw = ndw;
>         ring->wptr_old = ring->wptr;
>         return 0;
> }
If the ring is full, but actually has no more fences in it (which in my 
case was caused by my stupidity and actually shouldn't happen otherwise) 
this loop will just busy wait with a critical mutex locked for something 
that never happens.

It actually shouldn't make any difference for real world applications, 
but it just prevents the users of those functions from doing something 
stupid.

Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 06/13] drm/radeon: improve sub allocator
  2012-04-20  7:24   ` Michel Dänzer
@ 2012-04-20  9:11     ` Christian König
  0 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-20  9:11 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

On 20.04.2012 09:24, Michel Dänzer wrote:
> On Fre, 2012-04-20 at 00:39 +0200, Christian König wrote:
>> Make the suballocator self containing to locking
>> and fix a overrun bug which happens with
>> allocations of different alignments.
> Sounds like this should be split up into two changes. :)

Yeah you are probably right.

But wait a moment, thinking about it some more the overrun bug is 
actually a quite critical bug in mainline, cause it quickly leads to 
memory corruption if two users of the SA have different alignment 
restrictions. And hell, there currently is at least the IB pool (256 
bytes) and the VM code (4096 bytes alignment) using it, so this could be 
responsible for some of the VM corruptions we are seeing!!!

Going to write a patch immediately,
Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_*
  2012-04-20  8:49     ` Christian König
@ 2012-04-20  9:15       ` Michel Dänzer
  2012-04-20 10:24         ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2012-04-20  9:15 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-04-20 at 10:49 +0200, Christian König wrote: 
> On 20.04.2012 09:20, Michel Dänzer wrote:
> > On Fre, 2012-04-20 at 00:39 +0200, Christian König wrote:
> >> Signed-off-by: Christian König<deathsimple@vodafone.de>
> >> ---
> >>   drivers/gpu/drm/radeon/radeon_fence.c |    4 ++--
> >>   1 files changed, 2 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> >> index 1a9765a..764ab7e 100644
> >> --- a/drivers/gpu/drm/radeon/radeon_fence.c
> >> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> >> @@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
> >>   	}
> >>   	if (list_empty(&rdev->fence_drv[ring].emitted)) {
> >>   		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
> >> -		return 0;
> >> +		return -ENOENT;
> >>   	}
> >>   	fence = list_entry(rdev->fence_drv[ring].emitted.next,
> >>   			   struct radeon_fence, list);
> >> @@ -310,7 +310,7 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
> >>   	}
> >>   	if (list_empty(&rdev->fence_drv[ring].emitted)) {
> >>   		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
> >> -		return 0;
> >> +		return -ENOENT;
> >>   	}
> >>   	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
> >>   			   struct radeon_fence, list);
> > It seems weird to declare a fence wait as failed when there are no
> > outstanding fences in the first place. If there are callers which
> > require outstanding fences, they should probably handle that themselves.
> 
> Why that sounds so weird? Ok, maybe for radeon_fence_wait_last that's 
> questionable,

Indeed. It happens not to break radeon_suspend_kms because it doesn't
check the return value, but otherwise it would fail spuriously.


> but for radeon_fence_wait_next it's quite clear to me that 
> we should signal the caller that there is no fence to wait for.
> 
> The problem I wanted to fix with that is the usage of 
> radeon_fence_wait_next in radeon_ring_alloc (for example):
> > int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring 
> > *ring, unsigned ndw)
> > {
> >         int r;
> >
> >         /* Align requested size with padding so unlock_commit can
> >          * pad safely */
> >         ndw = (ndw + ring->align_mask) & ~ring->align_mask;
> >         while (ndw > (ring->ring_free_dw - 1)) {
> >                 radeon_ring_free_size(rdev, ring);
> >                 if (ndw < ring->ring_free_dw) {
> >                         break;
> >                 }
> >                 r = radeon_fence_wait_next(rdev, 
> > radeon_ring_index(rdev, ring));
> >                 if (r)
> >                         return r;
> >         }
> >         ring->count_dw = ndw;
> >         ring->wptr_old = ring->wptr;
> >         return 0;
> > }
> If the ring is full, but actually has no more fences in it (which in my 
> case was caused by my stupidity and actually shouldn't happen otherwise) 
> this loop will just busy wait with a critical mutex locked for something 
> that never happens.

My suggestion was to explicitly check for that in radeon_ring_alloc. But
I guess right now it doesn't really matter, as it's the only caller. :)


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 13/13] drm/radeon: rework recursive gpu reset handling
  2012-04-20  7:50     ` Daniel Vetter
@ 2012-04-20  9:38       ` Christian König
  0 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-20  9:38 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

On 20.04.2012 09:50, Daniel Vetter wrote:
> On Fri, Apr 20, 2012 at 07:57:09AM +0100, Dave Airlie wrote:
>> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>>> Instead of all this humpy pumpy with recursive
>>> mutex (which also fixes only halve of the problem)
>>> move the actual gpu reset out of the fence code,
>>> return -EDEADLK and then reset the gpu in the
>>> calling ioctl function.
>> I'm trying to figure out if this has any disadvantages over doing what
>> I proposed before and just kicking a thread to reset the gpu.
>>
>> It seems like this should also avoid the locking problems, I'd like to
>> make sure we don't return -EDEADLK to userspace by accident anywhere,
>> since I don't think it prepared for it and it would be an ABI change.
> Fyi, the trick i915 uses to solve the reset problem is to bail out with
> -EAGAIN and rely on drmIOCtl restarting the ioctl. This way we use the
> same codepaths we use to bail out when getting a signal, and thanks to X
> these are rather well-tested. The hangcheck code also fires of a work item to
> do all the reset magic. In all the ioctls that might wait for the gpu we
> have a fancy piece of code which checks whether a gpu reset is pending,
> and if so waits for that to complete. It also checks whether the reset
> succeeded and if not bails out with -EIO.
> -Daniel
Well I considered using an asynchronous work item also, but didn't know 
how to probably prevent multiple GPU resets at the same time, signaling 
the result back to the ioctls, etc.. It just seemed to be more 
complicated without any real benefit (maybe except that you don't have 
to check every ioctl result separately, but there are not so many).

Also I didn't know what to tell userspace to retry the current 
operation, but if it's already prepared for -EAGAIN than this sounds 
like the proper solution here.

And regarding returning -EDEADLK to userspace: I think I handle every 
ioctl that could cause the lockup detection to run, but checking that 
again won't hurt.

Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_*
  2012-04-20  9:15       ` Michel Dänzer
@ 2012-04-20 10:24         ` Christian König
  2012-04-20 11:30           ` Michel Dänzer
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2012-04-20 10:24 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

On 20.04.2012 11:15, Michel Dänzer wrote:
> On Fre, 2012-04-20 at 10:49 +0200, Christian König wrote:
>> On 20.04.2012 09:20, Michel Dänzer wrote:
>>> On Fre, 2012-04-20 at 00:39 +0200, Christian König wrote:
>>>> Signed-off-by: Christian König<deathsimple@vodafone.de>
>>>> ---
>>>>    drivers/gpu/drm/radeon/radeon_fence.c |    4 ++--
>>>>    1 files changed, 2 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
>>>> index 1a9765a..764ab7e 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
>>>> @@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
>>>>    	}
>>>>    	if (list_empty(&rdev->fence_drv[ring].emitted)) {
>>>>    		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>>>> -		return 0;
>>>> +		return -ENOENT;
>>>>    	}
>>>>    	fence = list_entry(rdev->fence_drv[ring].emitted.next,
>>>>    			   struct radeon_fence, list);
>>>> @@ -310,7 +310,7 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
>>>>    	}
>>>>    	if (list_empty(&rdev->fence_drv[ring].emitted)) {
>>>>    		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
>>>> -		return 0;
>>>> +		return -ENOENT;
>>>>    	}
>>>>    	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
>>>>    			   struct radeon_fence, list);
>>> It seems weird to declare a fence wait as failed when there are no
>>> outstanding fences in the first place. If there are callers which
>>> require outstanding fences, they should probably handle that themselves..
>> Why that sounds so weird? Ok, maybe for radeon_fence_wait_last that's
>> questionable,
> Indeed. It happens not to break radeon_suspend_kms because it doesn't
> check the return value, but otherwise it would fail spuriously.
>
>
>> but for radeon_fence_wait_next it's quite clear to me that
>> we should signal the caller that there is no fence to wait for.
>>
>> The problem I wanted to fix with that is the usage of
>> radeon_fence_wait_next in radeon_ring_alloc (for example):
>>> int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring
>>> *ring, unsigned ndw)
>>> {
>>>          int r;
>>>
>>>          /* Align requested size with padding so unlock_commit can
>>>           * pad safely */
>>>          ndw = (ndw + ring->align_mask)&  ~ring->align_mask;
>>>          while (ndw>  (ring->ring_free_dw - 1)) {
>>>                  radeon_ring_free_size(rdev, ring);
>>>                  if (ndw<  ring->ring_free_dw) {
>>>                          break;
>>>                  }
>>>                  r = radeon_fence_wait_next(rdev,
>>> radeon_ring_index(rdev, ring));
>>>                  if (r)
>>>                          return r;
>>>          }
>>>          ring->count_dw = ndw;
>>>          ring->wptr_old = ring->wptr;
>>>          return 0;
>>> }
>> If the ring is full, but actually has no more fences in it (which in my
>> case was caused by my stupidity and actually shouldn't happen otherwise)
>> this loop will just busy wait with a critical mutex locked for something
>> that never happens.
> My suggestion was to explicitly check for that in radeon_ring_alloc. But
> I guess right now it doesn't really matter, as it's the only caller. :)
Yeah, but when we check that explicitly we need to call into the fence 
code twice, without locking in between, so the result of the first call 
could change before the second call happens etc... well that's just crap.

So what do you think of this: Just add the -ENOENT to fence_wait_next 
and rename fence_wait_last to fence_wait_empty instead?

Well I'm just trying to program defensively here, making it as robust as 
possible to both technically and human errors.

Christian.





_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_*
  2012-04-20 10:24         ` Christian König
@ 2012-04-20 11:30           ` Michel Dänzer
  0 siblings, 0 replies; 36+ messages in thread
From: Michel Dänzer @ 2012-04-20 11:30 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Fre, 2012-04-20 at 12:24 +0200, Christian König wrote: 
> On 20.04.2012 11:15, Michel Dänzer wrote:
> > On Fre, 2012-04-20 at 10:49 +0200, Christian König wrote:
> >> On 20.04.2012 09:20, Michel Dänzer wrote:
> >>> On Fre, 2012-04-20 at 00:39 +0200, Christian König wrote:
> >>>> Signed-off-by: Christian König<deathsimple@vodafone.de>
> >>>> ---
> >>>>    drivers/gpu/drm/radeon/radeon_fence.c |    4 ++--
> >>>>    1 files changed, 2 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
> >>>> index 1a9765a..764ab7e 100644
> >>>> --- a/drivers/gpu/drm/radeon/radeon_fence.c
> >>>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c
> >>>> @@ -286,7 +286,7 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
> >>>>    	}
> >>>>    	if (list_empty(&rdev->fence_drv[ring].emitted)) {
> >>>>    		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
> >>>> -		return 0;
> >>>> +		return -ENOENT;
> >>>>    	}
> >>>>    	fence = list_entry(rdev->fence_drv[ring].emitted.next,
> >>>>    			   struct radeon_fence, list);
> >>>> @@ -310,7 +310,7 @@ int radeon_fence_wait_last(struct radeon_device *rdev, int ring)
> >>>>    	}
> >>>>    	if (list_empty(&rdev->fence_drv[ring].emitted)) {
> >>>>    		write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
> >>>> -		return 0;
> >>>> +		return -ENOENT;
> >>>>    	}
> >>>>    	fence = list_entry(rdev->fence_drv[ring].emitted.prev,
> >>>>    			   struct radeon_fence, list);
> >>> It seems weird to declare a fence wait as failed when there are no
> >>> outstanding fences in the first place. If there are callers which
> >>> require outstanding fences, they should probably handle that themselves..
> >> Why that sounds so weird? Ok, maybe for radeon_fence_wait_last that's
> >> questionable,
> > Indeed. It happens not to break radeon_suspend_kms because it doesn't
> > check the return value, but otherwise it would fail spuriously.
> >
> >
> >> but for radeon_fence_wait_next it's quite clear to me that
> >> we should signal the caller that there is no fence to wait for.
> >>
> >> The problem I wanted to fix with that is the usage of
> >> radeon_fence_wait_next in radeon_ring_alloc (for example):
> >>> int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring
> >>> *ring, unsigned ndw)
> >>> {
> >>>          int r;
> >>>
> >>>          /* Align requested size with padding so unlock_commit can
> >>>           * pad safely */
> >>>          ndw = (ndw + ring->align_mask)&  ~ring->align_mask;
> >>>          while (ndw>  (ring->ring_free_dw - 1)) {
> >>>                  radeon_ring_free_size(rdev, ring);
> >>>                  if (ndw<  ring->ring_free_dw) {
> >>>                          break;
> >>>                  }
> >>>                  r = radeon_fence_wait_next(rdev,
> >>> radeon_ring_index(rdev, ring));
> >>>                  if (r)
> >>>                          return r;
> >>>          }
> >>>          ring->count_dw = ndw;
> >>>          ring->wptr_old = ring->wptr;
> >>>          return 0;
> >>> }
> >> If the ring is full, but actually has no more fences in it (which in my
> >> case was caused by my stupidity and actually shouldn't happen otherwise)
> >> this loop will just busy wait with a critical mutex locked for something
> >> that never happens.
> > My suggestion was to explicitly check for that in radeon_ring_alloc. But
> > I guess right now it doesn't really matter, as it's the only caller. :)
> Yeah, but when we check that explicitly we need to call into the fence 
> code twice, without locking in between, so the result of the first call 
> could change before the second call happens etc... well that's just crap.
> 
> So what do you think of this: Just add the -ENOENT to fence_wait_next 
> and rename fence_wait_last to fence_wait_empty instead?

Sounds good.


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-19 23:47 ` Reworking of GPU reset logic Jerome Glisse
@ 2012-04-21  9:42   ` Christian König
  2012-04-21 14:14     ` Jerome Glisse
  2012-04-23  7:40     ` Michel Dänzer
  0 siblings, 2 replies; 36+ messages in thread
From: Christian König @ 2012-04-21  9:42 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: dri-devel

On 20.04.2012 01:47, Jerome Glisse wrote:
> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>> This includes mostly fixes for multi ring lockups and GPU resets, but it should general improve the behavior of the kernel mode driver in case something goes badly wrong.
>>
>> On the other hand it completely rewrites the IB pool and semaphore handling, so I think there are still a couple of problems in it.
>>
>> The first four patches were already send to the list, but the current set depends on them so I resend them again.
>>
>> Cheers,
>> Christian.
> I did a quick review, it looks mostly good, but as it's sensitive code
> i would like to spend sometime on
> it. Probably next week. Note that i had some work on this area too, i
> mostly want to drop all the debugfs
> related to this and add some new more usefull (basicly something that
> allow you to read all the data
> needed to replay a locking up ib). I also was looking into Dave reset
> thread and your solution of moving
> reset in ioctl return path sounds good too but i need to convince my
> self that it encompass all possible
> case.
>
> Cheers,
> Jerome
>
After sleeping a night over it I already reworked the patch for 
improving the SA performance, so please wait at least for v2 before 
taking a look at it :)

Regarding the debugging of lockups I had the following on my "in mind 
todo" list:
1. Rework the chip specific lockup detection code a bit more and 
probably clean it up a bit.
2. Make the timeout a module parameter, cause compute task sometimes 
block a ring for more than 10 seconds.
3. Keep track of the actually RPTR offset a fence is emitted to
3. Keep track of all the BOs a IB is touching.
4. Now if a lockup happens start with the last successfully signaled 
fence and dump the ring content after that RPTR offset till the first 
not signaled fence.
5. Then if this fence references to an IB dump it's content and the BOs 
it is touching.
6. Dump everything on the ring after that fence until you reach the RPTR 
of the next fence or the WPTR of the ring.
7. If there is a next fence repeat the whole thing at number 5.

If I'm not completely wrong that should give you practically every 
information available, and we probably should put that behind another 
module option, cause we are going to spam syslog pretty much here. Feel 
free to add/modify the ideas on this list.

Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-21  9:42   ` Christian König
@ 2012-04-21 14:14     ` Jerome Glisse
  2012-04-25 13:01       ` Christian König
  2012-04-23  7:40     ` Michel Dänzer
  1 sibling, 1 reply; 36+ messages in thread
From: Jerome Glisse @ 2012-04-21 14:14 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

2012/4/21 Christian König <deathsimple@vodafone.de>:
> On 20.04.2012 01:47, Jerome Glisse wrote:
>>
>> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>>>
>>> This includes mostly fixes for multi ring lockups and GPU resets, but it
>>> should general improve the behavior of the kernel mode driver in case
>>> something goes badly wrong.
>>>
>>> On the other hand it completely rewrites the IB pool and semaphore
>>> handling, so I think there are still a couple of problems in it.
>>>
>>> The first four patches were already send to the list, but the current set
>>> depends on them so I resend them again.
>>>
>>> Cheers,
>>> Christian.
>>
>> I did a quick review, it looks mostly good, but as it's sensitive code
>> i would like to spend sometime on
>> it. Probably next week. Note that i had some work on this area too, i
>> mostly want to drop all the debugfs
>> related to this and add some new more usefull (basicly something that
>> allow you to read all the data
>> needed to replay a locking up ib). I also was looking into Dave reset
>> thread and your solution of moving
>> reset in ioctl return path sounds good too but i need to convince my
>> self that it encompass all possible
>> case.
>>
>> Cheers,
>> Jerome
>>
> After sleeping a night over it I already reworked the patch for improving
> the SA performance, so please wait at least for v2 before taking a look at
> it :)
>
> Regarding the debugging of lockups I had the following on my "in mind todo"
> list:
> 1. Rework the chip specific lockup detection code a bit more and probably
> clean it up a bit.
> 2. Make the timeout a module parameter, cause compute task sometimes block a
> ring for more than 10 seconds.
> 3. Keep track of the actually RPTR offset a fence is emitted to
> 3. Keep track of all the BOs a IB is touching.
> 4. Now if a lockup happens start with the last successfully signaled fence
> and dump the ring content after that RPTR offset till the first not signaled
> fence.
> 5. Then if this fence references to an IB dump it's content and the BOs it
> is touching.
> 6. Dump everything on the ring after that fence until you reach the RPTR of
> the next fence or the WPTR of the ring.
> 7. If there is a next fence repeat the whole thing at number 5.
>
> If I'm not completely wrong that should give you practically every
> information available, and we probably should put that behind another module
> option, cause we are going to spam syslog pretty much here. Feel free to
> add/modify the ideas on this list.
>
> Christian.

What i have is similar, i am assuming only ib trigger lockup, before each ib
emit to scratch reg ib offset in sa and ib size. For each ib keep bo list. On
lockup allocate big memory to copy the whole ib and all the bo referenced
by the ib (i am using my bof format as i already have userspace tools).

Remove all the debugfs file. Just add a new one that gave you the first faulty
ib. On read of this file kernel free the memory. Kernel should also free the
memory after a while or better would be to enable the lockup copy only if
some kernel radeon option is enabled.

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-21  9:42   ` Christian König
  2012-04-21 14:14     ` Jerome Glisse
@ 2012-04-23  7:40     ` Michel Dänzer
  2012-04-25 12:50       ` Christian König
  1 sibling, 1 reply; 36+ messages in thread
From: Michel Dänzer @ 2012-04-23  7:40 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Sam, 2012-04-21 at 11:42 +0200, Christian König wrote: 
> 
> Regarding the debugging of lockups I had the following on my "in mind 
> todo" list:
> 1. Rework the chip specific lockup detection code a bit more and 
> probably clean it up a bit.
> 2. Make the timeout a module parameter, cause compute task sometimes 
> block a ring for more than 10 seconds.

A better solution for that would be to improve the detection of the GPU
making progress, also for graphics operations. We should try to reduce
the timeout rather than making it even larger.


-- 
Earthling Michel Dänzer           |                   http://www.amd.com
Libre software enthusiast         |          Debian, X and DRI developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code
  2012-04-19 22:39 ` [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code Christian König
@ 2012-04-24 14:04   ` Dave Airlie
  2012-04-24 14:23     ` Christian König
  0 siblings, 1 reply; 36+ messages in thread
From: Dave Airlie @ 2012-04-24 14:04 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

2012/4/19 Christian König <deathsimple@vodafone.de>:
> Rings need to lock in order, otherwise
> the ring subsystem can deadlock.

No sure if its the commit or not but I was profiling on an r700 and
saw it create a semaphore for ring syncing.

surely the r700 would have no need?

Dave.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code
  2012-04-24 14:04   ` Dave Airlie
@ 2012-04-24 14:23     ` Christian König
  0 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-24 14:23 UTC (permalink / raw)
  To: Dave Airlie; +Cc: dri-devel

On 24.04.2012 16:04, Dave Airlie wrote:
> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>> Rings need to lock in order, otherwise
>> the ring subsystem can deadlock.
> No sure if its the commit or not but I was profiling on an r700 and
> saw it create a semaphore for ring syncing.
>
> surely the r700 would have no need?
>
In theory the first hardware with more than a gfx ring was some r6xx 
(but of course we haven't released any code/documentation for that yet), 
so it's ok that the code generally checks for inter ring synchronization 
needs on r7xx cards.

But with the current mainline it should just boil down to an not taken 
"if" branch, witch it currently doesn't (*crap*). Just tested with my 
RV710 and I can confirm that it unnecessarily allocates a semaphore. 
Another bug on my todo list for this patchset, going to send out an v2 
soon, but going to test it through first.

Thanks for the comment,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-23  7:40     ` Michel Dänzer
@ 2012-04-25 12:50       ` Christian König
  0 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-25 12:50 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

On 23.04.2012 09:40, Michel Dänzer wrote:
> On Sam, 2012-04-21 at 11:42 +0200, Christian König wrote:
>> Regarding the debugging of lockups I had the following on my "in mind
>> todo" list:
>> 1. Rework the chip specific lockup detection code a bit more and
>> probably clean it up a bit.
>> 2. Make the timeout a module parameter, cause compute task sometimes
>> block a ring for more than 10 seconds.
> A better solution for that would be to improve the detection of the GPU
> making progress, also for graphics operations. We should try to reduce
> the timeout rather than making it even larger.

Well, let's call it a more complete solution.

Since making the parameter configurable don't necessary means we are 
going to increase it. I usually set it to zero now, since that disables 
lockup detection at all and enables me to dig into the reason for 
something getting stuck.

Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-21 14:14     ` Jerome Glisse
@ 2012-04-25 13:01       ` Christian König
  2012-04-25 13:30         ` Dave Airlie
  0 siblings, 1 reply; 36+ messages in thread
From: Christian König @ 2012-04-25 13:01 UTC (permalink / raw)
  To: Jerome Glisse; +Cc: dri-devel

On 21.04.2012 16:14, Jerome Glisse wrote:
> 2012/4/21 Christian König<deathsimple@vodafone.de>:
>> On 20.04.2012 01:47, Jerome Glisse wrote:
>>> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>>>> This includes mostly fixes for multi ring lockups and GPU resets, but it
>>>> should general improve the behavior of the kernel mode driver in case
>>>> something goes badly wrong.
>>>>
>>>> On the other hand it completely rewrites the IB pool and semaphore
>>>> handling, so I think there are still a couple of problems in it.
>>>>
>>>> The first four patches were already send to the list, but the current set
>>>> depends on them so I resend them again.
>>>>
>>>> Cheers,
>>>> Christian.
>>> I did a quick review, it looks mostly good, but as it's sensitive code
>>> i would like to spend sometime on
>>> it. Probably next week. Note that i had some work on this area too, i
>>> mostly want to drop all the debugfs
>>> related to this and add some new more usefull (basicly something that
>>> allow you to read all the data
>>> needed to replay a locking up ib). I also was looking into Dave reset
>>> thread and your solution of moving
>>> reset in ioctl return path sounds good too but i need to convince my
>>> self that it encompass all possible
>>> case.
>>>
>>> Cheers,
>>> Jerome
>>>
>> After sleeping a night over it I already reworked the patch for improving
>> the SA performance, so please wait at least for v2 before taking a look at
>> it :)
>>
>> Regarding the debugging of lockups I had the following on my "in mind todo"
>> list:
>> 1. Rework the chip specific lockup detection code a bit more and probably
>> clean it up a bit.
>> 2. Make the timeout a module parameter, cause compute task sometimes block a
>> ring for more than 10 seconds.
>> 3. Keep track of the actually RPTR offset a fence is emitted to
>> 3. Keep track of all the BOs a IB is touching.
>> 4. Now if a lockup happens start with the last successfully signaled fence
>> and dump the ring content after that RPTR offset till the first not signaled
>> fence.
>> 5. Then if this fence references to an IB dump it's content and the BOs it
>> is touching.
>> 6. Dump everything on the ring after that fence until you reach the RPTR of
>> the next fence or the WPTR of the ring.
>> 7. If there is a next fence repeat the whole thing at number 5.
>>
>> If I'm not completely wrong that should give you practically every
>> information available, and we probably should put that behind another module
>> option, cause we are going to spam syslog pretty much here. Feel free to
>> add/modify the ideas on this list.
>>
>> Christian.
> What i have is similar, i am assuming only ib trigger lockup, before each ib
> emit to scratch reg ib offset in sa and ib size. For each ib keep bo list. On
> lockup allocate big memory to copy the whole ib and all the bo referenced
> by the ib (i am using my bof format as i already have userspace tools).
>
> Remove all the debugfs file. Just add a new one that gave you the first faulty
> ib. On read of this file kernel free the memory. Kernel should also free the
> memory after a while or better would be to enable the lockup copy only if
> some kernel radeon option is enabled.

Just resent my current patchset to the mailing list, it's not as 
complete as your solution, but seems to be a step into the right 
direction. So please take a look at them.

Being able to generate something like a "GPU crash dump" on lockup 
sounds like something very valuable to me, but I'm not sure if debugfs 
files are the right direction to go. Maybe something more like a module 
parameter containing a directory, and if set we dump all informations 
(including bo content) available in binary form (instead of the current 
human readable form of the debugfs files).

Anyway, the just send patchset solves the problem I'm currently looking 
into, and I'm running a bit out of time (again). So I don't know if I 
can complete that solution....

Cheers,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-25 13:01       ` Christian König
@ 2012-04-25 13:30         ` Dave Airlie
  2012-04-25 13:46           ` Alex Deucher
  0 siblings, 1 reply; 36+ messages in thread
From: Dave Airlie @ 2012-04-25 13:30 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

2012/4/25 Christian König <deathsimple@vodafone.de>:
> On 21.04.2012 16:14, Jerome Glisse wrote:
>>
>> 2012/4/21 Christian König<deathsimple@vodafone.de>:
>>>
>>> On 20.04.2012 01:47, Jerome Glisse wrote:
>>>>
>>>> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>>>>>
>>>>> This includes mostly fixes for multi ring lockups and GPU resets, but
>>>>> it
>>>>> should general improve the behavior of the kernel mode driver in case
>>>>> something goes badly wrong.
>>>>>
>>>>> On the other hand it completely rewrites the IB pool and semaphore
>>>>> handling, so I think there are still a couple of problems in it.
>>>>>
>>>>> The first four patches were already send to the list, but the current
>>>>> set
>>>>> depends on them so I resend them again.
>>>>>
>>>>> Cheers,
>>>>> Christian.
>>>>
>>>> I did a quick review, it looks mostly good, but as it's sensitive code
>>>> i would like to spend sometime on
>>>> it. Probably next week. Note that i had some work on this area too, i
>>>> mostly want to drop all the debugfs
>>>> related to this and add some new more usefull (basicly something that
>>>> allow you to read all the data
>>>> needed to replay a locking up ib). I also was looking into Dave reset
>>>> thread and your solution of moving
>>>> reset in ioctl return path sounds good too but i need to convince my
>>>> self that it encompass all possible
>>>> case.
>>>>
>>>> Cheers,
>>>> Jerome
>>>>
>>> After sleeping a night over it I already reworked the patch for improving
>>> the SA performance, so please wait at least for v2 before taking a look
>>> at
>>> it :)
>>>
>>> Regarding the debugging of lockups I had the following on my "in mind
>>> todo"
>>> list:
>>> 1. Rework the chip specific lockup detection code a bit more and probably
>>> clean it up a bit.
>>> 2. Make the timeout a module parameter, cause compute task sometimes
>>> block a
>>> ring for more than 10 seconds.
>>> 3. Keep track of the actually RPTR offset a fence is emitted to
>>> 3. Keep track of all the BOs a IB is touching.
>>> 4. Now if a lockup happens start with the last successfully signaled
>>> fence
>>> and dump the ring content after that RPTR offset till the first not
>>> signaled
>>> fence.
>>> 5. Then if this fence references to an IB dump it's content and the BOs
>>> it
>>> is touching.
>>> 6. Dump everything on the ring after that fence until you reach the RPTR
>>> of
>>> the next fence or the WPTR of the ring.
>>> 7. If there is a next fence repeat the whole thing at number 5.
>>>
>>> If I'm not completely wrong that should give you practically every
>>> information available, and we probably should put that behind another
>>> module
>>> option, cause we are going to spam syslog pretty much here. Feel free to
>>> add/modify the ideas on this list.
>>>
>>> Christian.
>>
>> What i have is similar, i am assuming only ib trigger lockup, before each
>> ib
>> emit to scratch reg ib offset in sa and ib size. For each ib keep bo list.
>> On
>> lockup allocate big memory to copy the whole ib and all the bo referenced
>> by the ib (i am using my bof format as i already have userspace tools).
>>
>> Remove all the debugfs file. Just add a new one that gave you the first
>> faulty
>> ib. On read of this file kernel free the memory. Kernel should also free
>> the
>> memory after a while or better would be to enable the lockup copy only if
>> some kernel radeon option is enabled.
>
>
> Just resent my current patchset to the mailing list, it's not as complete as
> your solution, but seems to be a step into the right direction. So please
> take a look at them.
>
> Being able to generate something like a "GPU crash dump" on lockup sounds
> like something very valuable to me, but I'm not sure if debugfs files are
> the right direction to go. Maybe something more like a module parameter
> containing a directory, and if set we dump all informations (including bo
> content) available in binary form (instead of the current human readable
> form of the debugfs files).

Do what intel driver does, create a versioned binary debugfs file with
all the error state in it for a lockup,
store only one of these at a time, run a userspace tool to dump it out
into something you can
upload or just cat the file and upload it.

You don't want the kernel writing to dirs on disk under any circumstances

Dave.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-25 13:30         ` Dave Airlie
@ 2012-04-25 13:46           ` Alex Deucher
  2012-04-25 14:26             ` Jerome Glisse
  0 siblings, 1 reply; 36+ messages in thread
From: Alex Deucher @ 2012-04-25 13:46 UTC (permalink / raw)
  To: Dave Airlie; +Cc: Christian König, dri-devel

2012/4/25 Dave Airlie <airlied@gmail.com>:
> 2012/4/25 Christian König <deathsimple@vodafone.de>:
>> On 21.04.2012 16:14, Jerome Glisse wrote:
>>>
>>> 2012/4/21 Christian König<deathsimple@vodafone.de>:
>>>>
>>>> On 20.04.2012 01:47, Jerome Glisse wrote:
>>>>>
>>>>> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>>>>>>
>>>>>> This includes mostly fixes for multi ring lockups and GPU resets, but
>>>>>> it
>>>>>> should general improve the behavior of the kernel mode driver in case
>>>>>> something goes badly wrong.
>>>>>>
>>>>>> On the other hand it completely rewrites the IB pool and semaphore
>>>>>> handling, so I think there are still a couple of problems in it.
>>>>>>
>>>>>> The first four patches were already send to the list, but the current
>>>>>> set
>>>>>> depends on them so I resend them again.
>>>>>>
>>>>>> Cheers,
>>>>>> Christian.
>>>>>
>>>>> I did a quick review, it looks mostly good, but as it's sensitive code
>>>>> i would like to spend sometime on
>>>>> it. Probably next week. Note that i had some work on this area too, i
>>>>> mostly want to drop all the debugfs
>>>>> related to this and add some new more usefull (basicly something that
>>>>> allow you to read all the data
>>>>> needed to replay a locking up ib). I also was looking into Dave reset
>>>>> thread and your solution of moving
>>>>> reset in ioctl return path sounds good too but i need to convince my
>>>>> self that it encompass all possible
>>>>> case.
>>>>>
>>>>> Cheers,
>>>>> Jerome
>>>>>
>>>> After sleeping a night over it I already reworked the patch for improving
>>>> the SA performance, so please wait at least for v2 before taking a look
>>>> at
>>>> it :)
>>>>
>>>> Regarding the debugging of lockups I had the following on my "in mind
>>>> todo"
>>>> list:
>>>> 1. Rework the chip specific lockup detection code a bit more and probably
>>>> clean it up a bit.
>>>> 2. Make the timeout a module parameter, cause compute task sometimes
>>>> block a
>>>> ring for more than 10 seconds.
>>>> 3. Keep track of the actually RPTR offset a fence is emitted to
>>>> 3. Keep track of all the BOs a IB is touching.
>>>> 4. Now if a lockup happens start with the last successfully signaled
>>>> fence
>>>> and dump the ring content after that RPTR offset till the first not
>>>> signaled
>>>> fence.
>>>> 5. Then if this fence references to an IB dump it's content and the BOs
>>>> it
>>>> is touching.
>>>> 6. Dump everything on the ring after that fence until you reach the RPTR
>>>> of
>>>> the next fence or the WPTR of the ring.
>>>> 7. If there is a next fence repeat the whole thing at number 5.
>>>>
>>>> If I'm not completely wrong that should give you practically every
>>>> information available, and we probably should put that behind another
>>>> module
>>>> option, cause we are going to spam syslog pretty much here. Feel free to
>>>> add/modify the ideas on this list.
>>>>
>>>> Christian.
>>>
>>> What i have is similar, i am assuming only ib trigger lockup, before each
>>> ib
>>> emit to scratch reg ib offset in sa and ib size. For each ib keep bo list.
>>> On
>>> lockup allocate big memory to copy the whole ib and all the bo referenced
>>> by the ib (i am using my bof format as i already have userspace tools).
>>>
>>> Remove all the debugfs file. Just add a new one that gave you the first
>>> faulty
>>> ib. On read of this file kernel free the memory. Kernel should also free
>>> the
>>> memory after a while or better would be to enable the lockup copy only if
>>> some kernel radeon option is enabled.
>>
>>
>> Just resent my current patchset to the mailing list, it's not as complete as
>> your solution, but seems to be a step into the right direction. So please
>> take a look at them.
>>
>> Being able to generate something like a "GPU crash dump" on lockup sounds
>> like something very valuable to me, but I'm not sure if debugfs files are
>> the right direction to go. Maybe something more like a module parameter
>> containing a directory, and if set we dump all informations (including bo
>> content) available in binary form (instead of the current human readable
>> form of the debugfs files).
>
> Do what intel driver does, create a versioned binary debugfs file with
> all the error state in it for a lockup,
> store only one of these at a time, run a userspace tool to dump it out
> into something you can
> upload or just cat the file and upload it.
>
> You don't want the kernel writing to dirs on disk under any circumstances
>

We have an internal binary format for dumping command streams and
associated buffers, we should probably use that so that we can better
take advantage of existing internal tools.

Alex

> Dave.
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
  2012-04-25 13:46           ` Alex Deucher
@ 2012-04-25 14:26             ` Jerome Glisse
  0 siblings, 0 replies; 36+ messages in thread
From: Jerome Glisse @ 2012-04-25 14:26 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Christian König, dri-devel

On Wed, Apr 25, 2012 at 9:46 AM, Alex Deucher <alexdeucher@gmail.com> wrote:
> 2012/4/25 Dave Airlie <airlied@gmail.com>:
>> 2012/4/25 Christian König <deathsimple@vodafone.de>:
>>> On 21.04.2012 16:14, Jerome Glisse wrote:
>>>>
>>>> 2012/4/21 Christian König<deathsimple@vodafone.de>:
>>>>>
>>>>> On 20.04.2012 01:47, Jerome Glisse wrote:
>>>>>>
>>>>>> 2012/4/19 Christian König<deathsimple@vodafone.de>:
>>>>>>>
>>>>>>> This includes mostly fixes for multi ring lockups and GPU resets, but
>>>>>>> it
>>>>>>> should general improve the behavior of the kernel mode driver in case
>>>>>>> something goes badly wrong.
>>>>>>>
>>>>>>> On the other hand it completely rewrites the IB pool and semaphore
>>>>>>> handling, so I think there are still a couple of problems in it.
>>>>>>>
>>>>>>> The first four patches were already send to the list, but the current
>>>>>>> set
>>>>>>> depends on them so I resend them again.
>>>>>>>
>>>>>>> Cheers,
>>>>>>> Christian.
>>>>>>
>>>>>> I did a quick review, it looks mostly good, but as it's sensitive code
>>>>>> i would like to spend sometime on
>>>>>> it. Probably next week. Note that i had some work on this area too, i
>>>>>> mostly want to drop all the debugfs
>>>>>> related to this and add some new more usefull (basicly something that
>>>>>> allow you to read all the data
>>>>>> needed to replay a locking up ib). I also was looking into Dave reset
>>>>>> thread and your solution of moving
>>>>>> reset in ioctl return path sounds good too but i need to convince my
>>>>>> self that it encompass all possible
>>>>>> case.
>>>>>>
>>>>>> Cheers,
>>>>>> Jerome
>>>>>>
>>>>> After sleeping a night over it I already reworked the patch for improving
>>>>> the SA performance, so please wait at least for v2 before taking a look
>>>>> at
>>>>> it :)
>>>>>
>>>>> Regarding the debugging of lockups I had the following on my "in mind
>>>>> todo"
>>>>> list:
>>>>> 1. Rework the chip specific lockup detection code a bit more and probably
>>>>> clean it up a bit.
>>>>> 2. Make the timeout a module parameter, cause compute task sometimes
>>>>> block a
>>>>> ring for more than 10 seconds.
>>>>> 3. Keep track of the actually RPTR offset a fence is emitted to
>>>>> 3. Keep track of all the BOs a IB is touching.
>>>>> 4. Now if a lockup happens start with the last successfully signaled
>>>>> fence
>>>>> and dump the ring content after that RPTR offset till the first not
>>>>> signaled
>>>>> fence.
>>>>> 5. Then if this fence references to an IB dump it's content and the BOs
>>>>> it
>>>>> is touching.
>>>>> 6. Dump everything on the ring after that fence until you reach the RPTR
>>>>> of
>>>>> the next fence or the WPTR of the ring.
>>>>> 7. If there is a next fence repeat the whole thing at number 5.
>>>>>
>>>>> If I'm not completely wrong that should give you practically every
>>>>> information available, and we probably should put that behind another
>>>>> module
>>>>> option, cause we are going to spam syslog pretty much here. Feel free to
>>>>> add/modify the ideas on this list.
>>>>>
>>>>> Christian.
>>>>
>>>> What i have is similar, i am assuming only ib trigger lockup, before each
>>>> ib
>>>> emit to scratch reg ib offset in sa and ib size. For each ib keep bo list.
>>>> On
>>>> lockup allocate big memory to copy the whole ib and all the bo referenced
>>>> by the ib (i am using my bof format as i already have userspace tools).
>>>>
>>>> Remove all the debugfs file. Just add a new one that gave you the first
>>>> faulty
>>>> ib. On read of this file kernel free the memory. Kernel should also free
>>>> the
>>>> memory after a while or better would be to enable the lockup copy only if
>>>> some kernel radeon option is enabled.
>>>
>>>
>>> Just resent my current patchset to the mailing list, it's not as complete as
>>> your solution, but seems to be a step into the right direction. So please
>>> take a look at them.
>>>
>>> Being able to generate something like a "GPU crash dump" on lockup sounds
>>> like something very valuable to me, but I'm not sure if debugfs files are
>>> the right direction to go. Maybe something more like a module parameter
>>> containing a directory, and if set we dump all informations (including bo
>>> content) available in binary form (instead of the current human readable
>>> form of the debugfs files).
>>
>> Do what intel driver does, create a versioned binary debugfs file with
>> all the error state in it for a lockup,
>> store only one of these at a time, run a userspace tool to dump it out
>> into something you can
>> upload or just cat the file and upload it.
>>
>> You don't want the kernel writing to dirs on disk under any circumstances
>>
>
> We have an internal binary format for dumping command streams and
> associated buffers, we should probably use that so that we can better
> take advantage of existing internal tools.
>
> Alex
>

I really would like to drop all the debugfs file related to ib/ring with this
patchset. Note that i also have a binary format to replay command stream
the blob format. It has all the information needed to replay on the
open driver and tools are their (my joujou repo on fdo).

Cheers,
Jerome

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: Reworking of GPU reset logic
@ 2012-04-25 12:46 Christian König
  0 siblings, 0 replies; 36+ messages in thread
From: Christian König @ 2012-04-25 12:46 UTC (permalink / raw)
  To: dri-devel

Second round of patchset.

Thanks for all the comments and/or bug reports, allot of patches are now v2/v3 and should get another look. Every regression known so far should be fixed with them now.
Additionally to the patches that where already included in the last set there are 8 new ones which are also reset, lockup and debugging related.

As always comments and bug-reports are very welcome,
Christian.

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2012-04-25 14:26 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-19 22:39 Reworking of GPU reset logic Christian König
2012-04-19 22:39 ` [PATCH 01/13] drm/radeon: make radeon_gpu_is_lockup a per ring function Christian König
2012-04-19 22:39 ` [PATCH 02/13] drm/radeon: replace gpu_lockup with ring->ready flag Christian König
2012-04-19 22:39 ` [PATCH 03/13] drm/radeon: register ring debugfs handlers on init Christian König
2012-04-19 22:39 ` [PATCH 04/13] drm/radeon: use central function for IB testing Christian König
2012-04-19 22:39 ` [PATCH 05/13] drm/radeon: rework gpu lockup detection and processing Christian König
2012-04-19 22:39 ` [PATCH 06/13] drm/radeon: improve sub allocator Christian König
2012-04-20  7:24   ` Michel Dänzer
2012-04-20  9:11     ` Christian König
2012-04-19 22:39 ` [PATCH 07/13] drm/radeon: add sub allocator debugfs file Christian König
2012-04-19 22:39 ` [PATCH 08/13] drm/radeon: add biggest hole tracking and wakequeue to the sa Christian König
2012-04-19 22:39 ` [PATCH 09/13] drm/radeon: simplify semaphore handling Christian König
2012-04-19 22:39 ` [PATCH 10/13] drm/radeon: return -ENOENT in fence_wait_* Christian König
2012-04-20  7:20   ` Michel Dänzer
2012-04-20  8:49     ` Christian König
2012-04-20  9:15       ` Michel Dänzer
2012-04-20 10:24         ` Christian König
2012-04-20 11:30           ` Michel Dänzer
2012-04-19 22:39 ` [PATCH 11/13] drm/radeon: rip out the ib pool Christian König
2012-04-19 22:39 ` [PATCH 12/13] drm/radeon: fix a bug with the ring syncing code Christian König
2012-04-24 14:04   ` Dave Airlie
2012-04-24 14:23     ` Christian König
2012-04-19 22:39 ` [PATCH 13/13] drm/radeon: rework recursive gpu reset handling Christian König
2012-04-20  6:57   ` Dave Airlie
2012-04-20  7:50     ` Daniel Vetter
2012-04-20  9:38       ` Christian König
2012-04-19 23:47 ` Reworking of GPU reset logic Jerome Glisse
2012-04-21  9:42   ` Christian König
2012-04-21 14:14     ` Jerome Glisse
2012-04-25 13:01       ` Christian König
2012-04-25 13:30         ` Dave Airlie
2012-04-25 13:46           ` Alex Deucher
2012-04-25 14:26             ` Jerome Glisse
2012-04-23  7:40     ` Michel Dänzer
2012-04-25 12:50       ` Christian König
2012-04-25 12:46 Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.