All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02  8:34 ` Thomas Hellström
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Hellström @ 2023-03-02  8:34 UTC (permalink / raw)
  To: dri-devel; +Cc: Thomas Hellström, intel-gfx, Christian Koenig, intel-xe

Add a suballocator test to get some test coverage for the new drm
suballocator, and perform some basic timing (elapsed time).

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/Kconfig                   |   1 +
 drivers/gpu/drm/tests/Makefile            |   3 +-
 drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
 3 files changed, 359 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8fbe57407c60..dced53723721 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
 	select DRM_LIB_RANDOM
 	select DRM_KMS_HELPER
 	select DRM_BUDDY
+	select DRM_SUBALLOC_HELPER
 	select DRM_EXPORT_FOR_TESTS if m
 	select DRM_KUNIT_TEST_HELPERS
 	default KUNIT_ALL_TESTS
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index bca726a8f483..c664944a48ab 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_modes_test.o \
 	drm_plane_helper_test.o \
 	drm_probe_helper_test.o \
-	drm_rect_test.o
+	drm_rect_test.o \
+	drm_suballoc_test.o
 
 CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
new file mode 100644
index 000000000000..e7303a5505a0
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Test case for the drm_suballoc suballocator manager
+ * Copyright 2023 Intel Corporation.
+ */
+
+#include <kunit/test.h>
+
+#include <linux/dma-fence.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <drm/drm_suballoc.h>
+
+#define SA_ITERATIONS 10000
+#define SA_SIZE SZ_1M
+#define SA_DEFAULT_ALIGN SZ_4K
+
+static bool intr = true;
+static bool from_reclaim;
+static bool pre_throttle;
+static unsigned int num_rings = 4;
+static unsigned int iterations = SA_ITERATIONS;
+
+static atomic64_t free_space;
+
+static atomic_t other_id;
+
+struct suballoc_fence;
+
+/**
+ * struct suballoc_ring - fake gpu engine.
+ * @list: List of fences to signal.
+ * @signal_time: Accumulated fence signal execution time.
+ * @lock: Protects the suballoc ring members. hardirq safe.
+ * @hrtimer: Fake execution time timer.
+ * @active: The currently active fence for which we have pending work or a
+ *          timer running.
+ * @seqno: Fence submissin seqno.
+ * @idx: Index for calculation of fake execution time.
+ * @work: Work struct used solely to move the timer start to a different
+ *        processor than that used for submission.
+ */
+struct suballoc_ring {
+	ktime_t signal_time;
+	struct list_head list;
+	/* Protect the ring processing. */
+	spinlock_t lock;
+	struct hrtimer hrtimer;
+	struct suballoc_fence *active;
+	atomic64_t seqno;
+	u32 idx;
+	struct work_struct work;
+};
+
+/**
+ * struct suballoc_fence - Hrtimer-driven fence.
+ * @fence: The base class fence struct.
+ * @link: Link for the ring's fence list.
+ * @size: The size of the suballocator range associated with this fence.
+ * @id: Cpu id likely used by the submission thread for suballoc allocation.
+ */
+struct suballoc_fence {
+	struct dma_fence fence;
+	struct list_head link;
+	size_t size;
+	unsigned int id;
+};
+
+/* A varying but repeatable fake execution time */
+static ktime_t ring_next_delay(struct suballoc_ring *ring)
+{
+	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
+}
+
+/*
+ * Launch from a work item to decrease the likelyhood of the timer expiry
+ * callback getting called from the allocating cpu.
+ * We want to trigger cache-line bouncing between allocating and signalling
+ * cpus.
+ */
+static void ring_launch_timer_work(struct work_struct *work)
+{
+	struct suballoc_ring *ring =
+		container_of(work, typeof(*ring), work);
+
+	spin_lock_irq(&ring->lock);
+	if (ring->active)
+		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
+				       100ULL * NSEC_PER_USEC,
+				       HRTIMER_MODE_REL_PINNED);
+
+	spin_unlock_irq(&ring->lock);
+}
+
+/*
+ * Signal an active fence and pull the next off the list if any and make it
+ * active.
+ */
+static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
+{
+	struct suballoc_ring *ring =
+		container_of(hrtimer, typeof(*ring), hrtimer);
+	struct suballoc_fence *sfence;
+	ktime_t now, then;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ring->lock, irqflags);
+	sfence = ring->active;
+
+	if (sfence) {
+		struct dma_fence *fence = &sfence->fence;
+
+		if (sfence->id != get_cpu())
+			atomic_inc(&other_id);
+		put_cpu();
+
+		then = ktime_get();
+		dma_fence_signal(fence);
+		now = ktime_get();
+		dma_fence_put(fence);
+		ring->signal_time = ktime_add(ring->signal_time,
+					      ktime_sub(now, then));
+		ring->active = NULL;
+		atomic64_add(sfence->size, &free_space);
+	}
+
+	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
+	if (sfence) {
+		list_del_init(&sfence->link);
+		ring->active = sfence;
+		spin_unlock_irqrestore(&ring->lock, irqflags);
+		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
+		return HRTIMER_RESTART;
+	}
+	spin_unlock_irqrestore(&ring->lock, irqflags);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * Queue a fence on a ring and if it's the first fence, make it active.
+ */
+static void ring_add_fence(struct suballoc_ring *ring,
+			   struct suballoc_fence *sfence)
+{
+	spin_lock_irq(&ring->lock);
+	if (!ring->active) {
+		ring->active = sfence;
+		queue_work(system_unbound_wq, &ring->work);
+	} else {
+		list_add_tail(&sfence->link, &ring->list);
+	}
+	spin_unlock_irq(&ring->lock);
+}
+
+static void ring_init(struct suballoc_ring *ring)
+{
+	memset(ring, 0, sizeof(*ring));
+	INIT_LIST_HEAD(&ring->list);
+	spin_lock_init(&ring->lock);
+	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ring->hrtimer.function = ring_hrtimer_expired;
+	INIT_WORK(&ring->work, ring_launch_timer_work);
+}
+
+static bool ring_idle(struct suballoc_ring *ring)
+{
+	bool tmp;
+
+	spin_lock_irq(&ring->lock);
+	tmp = !ring->active;
+	spin_unlock_irq(&ring->lock);
+
+	return tmp;
+}
+
+static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
+{
+	return "suballoc";
+}
+
+static const struct dma_fence_ops dma_fence_suballoc_ops = {
+	.get_driver_name = dma_fence_get_suballoc_name,
+	.get_timeline_name = dma_fence_get_suballoc_name,
+};
+
+static DEFINE_SPINLOCK(sa_fence_lock);
+static ktime_t alloctime, freetime;
+
+static void drm_test_suballoc(struct kunit *test)
+{
+	struct suballoc_ring *rings;
+	struct drm_suballoc_manager sa_manager;
+	struct drm_suballoc *sa;
+	struct suballoc_fence *sfence;
+	struct dma_fence *fence;
+	ktime_t then, now, signaltime;
+	int i, ring, iter_tot = 0;
+	size_t size;
+	unsigned int align;
+	unsigned long long soffset;
+	gfp_t gfp;
+
+	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
+	if (!rings) {
+		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
+		return;
+	}
+
+	for (i = 0; i < num_rings; ++i)
+		ring_init(rings + i);
+
+	atomic64_set(&free_space, SA_SIZE);
+	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
+
+	if (from_reclaim)
+		gfp = GFP_NOWAIT | __GFP_NOWARN;
+	else
+		gfp = GFP_KERNEL;
+
+	for (i = 0; i < iterations; ++i) {
+		ring = i % num_rings;
+		size = (ring + 1) * SZ_4K;
+		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
+
+		if (pre_throttle)
+			while (atomic64_read(&free_space) < SA_SIZE / 2)
+				cpu_relax();
+
+		if (from_reclaim)
+			fs_reclaim_acquire(GFP_KERNEL);
+
+		then = ktime_get();
+		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
+		now = ktime_get();
+		if (from_reclaim)
+			fs_reclaim_release(GFP_KERNEL);
+
+		alloctime = ktime_add(alloctime, ktime_sub(now, then));
+
+		iter_tot++;
+		if (IS_ERR(sa)) {
+			if (from_reclaim) {
+				iter_tot--;
+				continue;
+			}
+
+			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
+				   sa);
+			break;
+		}
+
+		atomic64_sub(size, &free_space);
+		soffset = drm_suballoc_soffset(sa);
+		if (!IS_ALIGNED(soffset, align)) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
+				   soffset, align, soffset & (align - 1));
+			break;
+		}
+
+		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Allocation beyond end.\n");
+			break;
+		}
+
+		if (drm_suballoc_size(sa) < size ||
+		    drm_suballoc_size(sa) >= size + align) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Incorrect size.\n");
+			break;
+		}
+
+		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
+		if (unlikely(!sfence)) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Fence allocation failed.\n");
+			break;
+		}
+		fence = &sfence->fence;
+		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
+			       ring + 1,
+			       atomic64_inc_return(&rings[ring].seqno));
+		sfence->size = size;
+		sfence->id = get_cpu();
+		put_cpu();
+
+		ring_add_fence(rings + ring, sfence);
+
+		then = ktime_get();
+		drm_suballoc_free(sa, fence);
+		now = ktime_get();
+		freetime = ktime_add(freetime, ktime_sub(now, then));
+	}
+
+	signaltime = ktime_set(0, 0);
+	for (i = 0; i < num_rings; ++i) {
+		struct suballoc_ring *sring = &rings[i];
+
+		flush_work(&sring->work);
+		while (!ring_idle(sring))
+			schedule();
+		signaltime = ktime_add(signaltime, sring->signal_time);
+	}
+
+	kvfree(rings);
+
+	kunit_info(test, "signals on different processor: %d of %d\n",
+		   atomic_read(&other_id), iter_tot);
+	drm_suballoc_manager_fini(&sa_manager);
+	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(alloctime) / iter_tot);
+	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(freetime) / iter_tot);
+	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(signaltime) / iter_tot);
+
+	if (atomic64_read(&free_space) != SA_SIZE) {
+		kunit_warn(test, "Test sanity check failed.\n");
+		kunit_warn(test, "Space left at exit is %lld of %d\n",
+			   (long long)atomic64_read(&free_space), SA_SIZE);
+	}
+}
+
+module_param(intr, bool, 0400);
+MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
+module_param(from_reclaim, bool, 0400);
+MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
+module_param(pre_throttle, bool, 0400);
+MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
+		 "before allocations.");
+module_param(num_rings, uint, 0400);
+MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
+module_param(iterations, uint, 0400);
+MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
+
+static struct kunit_case drm_suballoc_tests[] = {
+	KUNIT_CASE(drm_test_suballoc),
+	{}
+};
+
+static struct kunit_suite drm_suballoc_test_suite = {
+	.name = "drm_suballoc",
+	.test_cases = drm_suballoc_tests,
+};
+
+kunit_test_suite(drm_suballoc_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("DRM suballocator Kunit test");
+MODULE_LICENSE("Dual MIT/GPL");
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02  8:34 ` Thomas Hellström
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Hellström @ 2023-03-02  8:34 UTC (permalink / raw)
  To: dri-devel; +Cc: Thomas Hellström, intel-gfx, Christian Koenig, intel-xe

Add a suballocator test to get some test coverage for the new drm
suballocator, and perform some basic timing (elapsed time).

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/Kconfig                   |   1 +
 drivers/gpu/drm/tests/Makefile            |   3 +-
 drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
 3 files changed, 359 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8fbe57407c60..dced53723721 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
 	select DRM_LIB_RANDOM
 	select DRM_KMS_HELPER
 	select DRM_BUDDY
+	select DRM_SUBALLOC_HELPER
 	select DRM_EXPORT_FOR_TESTS if m
 	select DRM_KUNIT_TEST_HELPERS
 	default KUNIT_ALL_TESTS
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index bca726a8f483..c664944a48ab 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_modes_test.o \
 	drm_plane_helper_test.o \
 	drm_probe_helper_test.o \
-	drm_rect_test.o
+	drm_rect_test.o \
+	drm_suballoc_test.o
 
 CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
new file mode 100644
index 000000000000..e7303a5505a0
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Test case for the drm_suballoc suballocator manager
+ * Copyright 2023 Intel Corporation.
+ */
+
+#include <kunit/test.h>
+
+#include <linux/dma-fence.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <drm/drm_suballoc.h>
+
+#define SA_ITERATIONS 10000
+#define SA_SIZE SZ_1M
+#define SA_DEFAULT_ALIGN SZ_4K
+
+static bool intr = true;
+static bool from_reclaim;
+static bool pre_throttle;
+static unsigned int num_rings = 4;
+static unsigned int iterations = SA_ITERATIONS;
+
+static atomic64_t free_space;
+
+static atomic_t other_id;
+
+struct suballoc_fence;
+
+/**
+ * struct suballoc_ring - fake gpu engine.
+ * @list: List of fences to signal.
+ * @signal_time: Accumulated fence signal execution time.
+ * @lock: Protects the suballoc ring members. hardirq safe.
+ * @hrtimer: Fake execution time timer.
+ * @active: The currently active fence for which we have pending work or a
+ *          timer running.
+ * @seqno: Fence submissin seqno.
+ * @idx: Index for calculation of fake execution time.
+ * @work: Work struct used solely to move the timer start to a different
+ *        processor than that used for submission.
+ */
+struct suballoc_ring {
+	ktime_t signal_time;
+	struct list_head list;
+	/* Protect the ring processing. */
+	spinlock_t lock;
+	struct hrtimer hrtimer;
+	struct suballoc_fence *active;
+	atomic64_t seqno;
+	u32 idx;
+	struct work_struct work;
+};
+
+/**
+ * struct suballoc_fence - Hrtimer-driven fence.
+ * @fence: The base class fence struct.
+ * @link: Link for the ring's fence list.
+ * @size: The size of the suballocator range associated with this fence.
+ * @id: Cpu id likely used by the submission thread for suballoc allocation.
+ */
+struct suballoc_fence {
+	struct dma_fence fence;
+	struct list_head link;
+	size_t size;
+	unsigned int id;
+};
+
+/* A varying but repeatable fake execution time */
+static ktime_t ring_next_delay(struct suballoc_ring *ring)
+{
+	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
+}
+
+/*
+ * Launch from a work item to decrease the likelyhood of the timer expiry
+ * callback getting called from the allocating cpu.
+ * We want to trigger cache-line bouncing between allocating and signalling
+ * cpus.
+ */
+static void ring_launch_timer_work(struct work_struct *work)
+{
+	struct suballoc_ring *ring =
+		container_of(work, typeof(*ring), work);
+
+	spin_lock_irq(&ring->lock);
+	if (ring->active)
+		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
+				       100ULL * NSEC_PER_USEC,
+				       HRTIMER_MODE_REL_PINNED);
+
+	spin_unlock_irq(&ring->lock);
+}
+
+/*
+ * Signal an active fence and pull the next off the list if any and make it
+ * active.
+ */
+static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
+{
+	struct suballoc_ring *ring =
+		container_of(hrtimer, typeof(*ring), hrtimer);
+	struct suballoc_fence *sfence;
+	ktime_t now, then;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ring->lock, irqflags);
+	sfence = ring->active;
+
+	if (sfence) {
+		struct dma_fence *fence = &sfence->fence;
+
+		if (sfence->id != get_cpu())
+			atomic_inc(&other_id);
+		put_cpu();
+
+		then = ktime_get();
+		dma_fence_signal(fence);
+		now = ktime_get();
+		dma_fence_put(fence);
+		ring->signal_time = ktime_add(ring->signal_time,
+					      ktime_sub(now, then));
+		ring->active = NULL;
+		atomic64_add(sfence->size, &free_space);
+	}
+
+	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
+	if (sfence) {
+		list_del_init(&sfence->link);
+		ring->active = sfence;
+		spin_unlock_irqrestore(&ring->lock, irqflags);
+		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
+		return HRTIMER_RESTART;
+	}
+	spin_unlock_irqrestore(&ring->lock, irqflags);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * Queue a fence on a ring and if it's the first fence, make it active.
+ */
+static void ring_add_fence(struct suballoc_ring *ring,
+			   struct suballoc_fence *sfence)
+{
+	spin_lock_irq(&ring->lock);
+	if (!ring->active) {
+		ring->active = sfence;
+		queue_work(system_unbound_wq, &ring->work);
+	} else {
+		list_add_tail(&sfence->link, &ring->list);
+	}
+	spin_unlock_irq(&ring->lock);
+}
+
+static void ring_init(struct suballoc_ring *ring)
+{
+	memset(ring, 0, sizeof(*ring));
+	INIT_LIST_HEAD(&ring->list);
+	spin_lock_init(&ring->lock);
+	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ring->hrtimer.function = ring_hrtimer_expired;
+	INIT_WORK(&ring->work, ring_launch_timer_work);
+}
+
+static bool ring_idle(struct suballoc_ring *ring)
+{
+	bool tmp;
+
+	spin_lock_irq(&ring->lock);
+	tmp = !ring->active;
+	spin_unlock_irq(&ring->lock);
+
+	return tmp;
+}
+
+static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
+{
+	return "suballoc";
+}
+
+static const struct dma_fence_ops dma_fence_suballoc_ops = {
+	.get_driver_name = dma_fence_get_suballoc_name,
+	.get_timeline_name = dma_fence_get_suballoc_name,
+};
+
+static DEFINE_SPINLOCK(sa_fence_lock);
+static ktime_t alloctime, freetime;
+
+static void drm_test_suballoc(struct kunit *test)
+{
+	struct suballoc_ring *rings;
+	struct drm_suballoc_manager sa_manager;
+	struct drm_suballoc *sa;
+	struct suballoc_fence *sfence;
+	struct dma_fence *fence;
+	ktime_t then, now, signaltime;
+	int i, ring, iter_tot = 0;
+	size_t size;
+	unsigned int align;
+	unsigned long long soffset;
+	gfp_t gfp;
+
+	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
+	if (!rings) {
+		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
+		return;
+	}
+
+	for (i = 0; i < num_rings; ++i)
+		ring_init(rings + i);
+
+	atomic64_set(&free_space, SA_SIZE);
+	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
+
+	if (from_reclaim)
+		gfp = GFP_NOWAIT | __GFP_NOWARN;
+	else
+		gfp = GFP_KERNEL;
+
+	for (i = 0; i < iterations; ++i) {
+		ring = i % num_rings;
+		size = (ring + 1) * SZ_4K;
+		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
+
+		if (pre_throttle)
+			while (atomic64_read(&free_space) < SA_SIZE / 2)
+				cpu_relax();
+
+		if (from_reclaim)
+			fs_reclaim_acquire(GFP_KERNEL);
+
+		then = ktime_get();
+		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
+		now = ktime_get();
+		if (from_reclaim)
+			fs_reclaim_release(GFP_KERNEL);
+
+		alloctime = ktime_add(alloctime, ktime_sub(now, then));
+
+		iter_tot++;
+		if (IS_ERR(sa)) {
+			if (from_reclaim) {
+				iter_tot--;
+				continue;
+			}
+
+			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
+				   sa);
+			break;
+		}
+
+		atomic64_sub(size, &free_space);
+		soffset = drm_suballoc_soffset(sa);
+		if (!IS_ALIGNED(soffset, align)) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
+				   soffset, align, soffset & (align - 1));
+			break;
+		}
+
+		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Allocation beyond end.\n");
+			break;
+		}
+
+		if (drm_suballoc_size(sa) < size ||
+		    drm_suballoc_size(sa) >= size + align) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Incorrect size.\n");
+			break;
+		}
+
+		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
+		if (unlikely(!sfence)) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Fence allocation failed.\n");
+			break;
+		}
+		fence = &sfence->fence;
+		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
+			       ring + 1,
+			       atomic64_inc_return(&rings[ring].seqno));
+		sfence->size = size;
+		sfence->id = get_cpu();
+		put_cpu();
+
+		ring_add_fence(rings + ring, sfence);
+
+		then = ktime_get();
+		drm_suballoc_free(sa, fence);
+		now = ktime_get();
+		freetime = ktime_add(freetime, ktime_sub(now, then));
+	}
+
+	signaltime = ktime_set(0, 0);
+	for (i = 0; i < num_rings; ++i) {
+		struct suballoc_ring *sring = &rings[i];
+
+		flush_work(&sring->work);
+		while (!ring_idle(sring))
+			schedule();
+		signaltime = ktime_add(signaltime, sring->signal_time);
+	}
+
+	kvfree(rings);
+
+	kunit_info(test, "signals on different processor: %d of %d\n",
+		   atomic_read(&other_id), iter_tot);
+	drm_suballoc_manager_fini(&sa_manager);
+	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(alloctime) / iter_tot);
+	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(freetime) / iter_tot);
+	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(signaltime) / iter_tot);
+
+	if (atomic64_read(&free_space) != SA_SIZE) {
+		kunit_warn(test, "Test sanity check failed.\n");
+		kunit_warn(test, "Space left at exit is %lld of %d\n",
+			   (long long)atomic64_read(&free_space), SA_SIZE);
+	}
+}
+
+module_param(intr, bool, 0400);
+MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
+module_param(from_reclaim, bool, 0400);
+MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
+module_param(pre_throttle, bool, 0400);
+MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
+		 "before allocations.");
+module_param(num_rings, uint, 0400);
+MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
+module_param(iterations, uint, 0400);
+MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
+
+static struct kunit_case drm_suballoc_tests[] = {
+	KUNIT_CASE(drm_test_suballoc),
+	{}
+};
+
+static struct kunit_suite drm_suballoc_test_suite = {
+	.name = "drm_suballoc",
+	.test_cases = drm_suballoc_tests,
+};
+
+kunit_test_suite(drm_suballoc_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("DRM suballocator Kunit test");
+MODULE_LICENSE("Dual MIT/GPL");
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Intel-xe] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02  8:34 ` Thomas Hellström
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Hellström @ 2023-03-02  8:34 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx, Christian Koenig, intel-xe

Add a suballocator test to get some test coverage for the new drm
suballocator, and perform some basic timing (elapsed time).

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/Kconfig                   |   1 +
 drivers/gpu/drm/tests/Makefile            |   3 +-
 drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
 3 files changed, 359 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8fbe57407c60..dced53723721 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
 	select DRM_LIB_RANDOM
 	select DRM_KMS_HELPER
 	select DRM_BUDDY
+	select DRM_SUBALLOC_HELPER
 	select DRM_EXPORT_FOR_TESTS if m
 	select DRM_KUNIT_TEST_HELPERS
 	default KUNIT_ALL_TESTS
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index bca726a8f483..c664944a48ab 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_modes_test.o \
 	drm_plane_helper_test.o \
 	drm_probe_helper_test.o \
-	drm_rect_test.o
+	drm_rect_test.o \
+	drm_suballoc_test.o
 
 CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
new file mode 100644
index 000000000000..e7303a5505a0
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Test case for the drm_suballoc suballocator manager
+ * Copyright 2023 Intel Corporation.
+ */
+
+#include <kunit/test.h>
+
+#include <linux/dma-fence.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <drm/drm_suballoc.h>
+
+#define SA_ITERATIONS 10000
+#define SA_SIZE SZ_1M
+#define SA_DEFAULT_ALIGN SZ_4K
+
+static bool intr = true;
+static bool from_reclaim;
+static bool pre_throttle;
+static unsigned int num_rings = 4;
+static unsigned int iterations = SA_ITERATIONS;
+
+static atomic64_t free_space;
+
+static atomic_t other_id;
+
+struct suballoc_fence;
+
+/**
+ * struct suballoc_ring - fake gpu engine.
+ * @list: List of fences to signal.
+ * @signal_time: Accumulated fence signal execution time.
+ * @lock: Protects the suballoc ring members. hardirq safe.
+ * @hrtimer: Fake execution time timer.
+ * @active: The currently active fence for which we have pending work or a
+ *          timer running.
+ * @seqno: Fence submissin seqno.
+ * @idx: Index for calculation of fake execution time.
+ * @work: Work struct used solely to move the timer start to a different
+ *        processor than that used for submission.
+ */
+struct suballoc_ring {
+	ktime_t signal_time;
+	struct list_head list;
+	/* Protect the ring processing. */
+	spinlock_t lock;
+	struct hrtimer hrtimer;
+	struct suballoc_fence *active;
+	atomic64_t seqno;
+	u32 idx;
+	struct work_struct work;
+};
+
+/**
+ * struct suballoc_fence - Hrtimer-driven fence.
+ * @fence: The base class fence struct.
+ * @link: Link for the ring's fence list.
+ * @size: The size of the suballocator range associated with this fence.
+ * @id: Cpu id likely used by the submission thread for suballoc allocation.
+ */
+struct suballoc_fence {
+	struct dma_fence fence;
+	struct list_head link;
+	size_t size;
+	unsigned int id;
+};
+
+/* A varying but repeatable fake execution time */
+static ktime_t ring_next_delay(struct suballoc_ring *ring)
+{
+	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
+}
+
+/*
+ * Launch from a work item to decrease the likelyhood of the timer expiry
+ * callback getting called from the allocating cpu.
+ * We want to trigger cache-line bouncing between allocating and signalling
+ * cpus.
+ */
+static void ring_launch_timer_work(struct work_struct *work)
+{
+	struct suballoc_ring *ring =
+		container_of(work, typeof(*ring), work);
+
+	spin_lock_irq(&ring->lock);
+	if (ring->active)
+		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
+				       100ULL * NSEC_PER_USEC,
+				       HRTIMER_MODE_REL_PINNED);
+
+	spin_unlock_irq(&ring->lock);
+}
+
+/*
+ * Signal an active fence and pull the next off the list if any and make it
+ * active.
+ */
+static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
+{
+	struct suballoc_ring *ring =
+		container_of(hrtimer, typeof(*ring), hrtimer);
+	struct suballoc_fence *sfence;
+	ktime_t now, then;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&ring->lock, irqflags);
+	sfence = ring->active;
+
+	if (sfence) {
+		struct dma_fence *fence = &sfence->fence;
+
+		if (sfence->id != get_cpu())
+			atomic_inc(&other_id);
+		put_cpu();
+
+		then = ktime_get();
+		dma_fence_signal(fence);
+		now = ktime_get();
+		dma_fence_put(fence);
+		ring->signal_time = ktime_add(ring->signal_time,
+					      ktime_sub(now, then));
+		ring->active = NULL;
+		atomic64_add(sfence->size, &free_space);
+	}
+
+	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
+	if (sfence) {
+		list_del_init(&sfence->link);
+		ring->active = sfence;
+		spin_unlock_irqrestore(&ring->lock, irqflags);
+		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
+		return HRTIMER_RESTART;
+	}
+	spin_unlock_irqrestore(&ring->lock, irqflags);
+
+	return HRTIMER_NORESTART;
+}
+
+/*
+ * Queue a fence on a ring and if it's the first fence, make it active.
+ */
+static void ring_add_fence(struct suballoc_ring *ring,
+			   struct suballoc_fence *sfence)
+{
+	spin_lock_irq(&ring->lock);
+	if (!ring->active) {
+		ring->active = sfence;
+		queue_work(system_unbound_wq, &ring->work);
+	} else {
+		list_add_tail(&sfence->link, &ring->list);
+	}
+	spin_unlock_irq(&ring->lock);
+}
+
+static void ring_init(struct suballoc_ring *ring)
+{
+	memset(ring, 0, sizeof(*ring));
+	INIT_LIST_HEAD(&ring->list);
+	spin_lock_init(&ring->lock);
+	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	ring->hrtimer.function = ring_hrtimer_expired;
+	INIT_WORK(&ring->work, ring_launch_timer_work);
+}
+
+static bool ring_idle(struct suballoc_ring *ring)
+{
+	bool tmp;
+
+	spin_lock_irq(&ring->lock);
+	tmp = !ring->active;
+	spin_unlock_irq(&ring->lock);
+
+	return tmp;
+}
+
+static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
+{
+	return "suballoc";
+}
+
+static const struct dma_fence_ops dma_fence_suballoc_ops = {
+	.get_driver_name = dma_fence_get_suballoc_name,
+	.get_timeline_name = dma_fence_get_suballoc_name,
+};
+
+static DEFINE_SPINLOCK(sa_fence_lock);
+static ktime_t alloctime, freetime;
+
+static void drm_test_suballoc(struct kunit *test)
+{
+	struct suballoc_ring *rings;
+	struct drm_suballoc_manager sa_manager;
+	struct drm_suballoc *sa;
+	struct suballoc_fence *sfence;
+	struct dma_fence *fence;
+	ktime_t then, now, signaltime;
+	int i, ring, iter_tot = 0;
+	size_t size;
+	unsigned int align;
+	unsigned long long soffset;
+	gfp_t gfp;
+
+	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
+	if (!rings) {
+		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
+		return;
+	}
+
+	for (i = 0; i < num_rings; ++i)
+		ring_init(rings + i);
+
+	atomic64_set(&free_space, SA_SIZE);
+	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
+
+	if (from_reclaim)
+		gfp = GFP_NOWAIT | __GFP_NOWARN;
+	else
+		gfp = GFP_KERNEL;
+
+	for (i = 0; i < iterations; ++i) {
+		ring = i % num_rings;
+		size = (ring + 1) * SZ_4K;
+		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
+
+		if (pre_throttle)
+			while (atomic64_read(&free_space) < SA_SIZE / 2)
+				cpu_relax();
+
+		if (from_reclaim)
+			fs_reclaim_acquire(GFP_KERNEL);
+
+		then = ktime_get();
+		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
+		now = ktime_get();
+		if (from_reclaim)
+			fs_reclaim_release(GFP_KERNEL);
+
+		alloctime = ktime_add(alloctime, ktime_sub(now, then));
+
+		iter_tot++;
+		if (IS_ERR(sa)) {
+			if (from_reclaim) {
+				iter_tot--;
+				continue;
+			}
+
+			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
+				   sa);
+			break;
+		}
+
+		atomic64_sub(size, &free_space);
+		soffset = drm_suballoc_soffset(sa);
+		if (!IS_ALIGNED(soffset, align)) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
+				   soffset, align, soffset & (align - 1));
+			break;
+		}
+
+		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Allocation beyond end.\n");
+			break;
+		}
+
+		if (drm_suballoc_size(sa) < size ||
+		    drm_suballoc_size(sa) >= size + align) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Incorrect size.\n");
+			break;
+		}
+
+		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
+		if (unlikely(!sfence)) {
+			drm_suballoc_free(sa, NULL);
+			KUNIT_FAIL(test, "Fence allocation failed.\n");
+			break;
+		}
+		fence = &sfence->fence;
+		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
+			       ring + 1,
+			       atomic64_inc_return(&rings[ring].seqno));
+		sfence->size = size;
+		sfence->id = get_cpu();
+		put_cpu();
+
+		ring_add_fence(rings + ring, sfence);
+
+		then = ktime_get();
+		drm_suballoc_free(sa, fence);
+		now = ktime_get();
+		freetime = ktime_add(freetime, ktime_sub(now, then));
+	}
+
+	signaltime = ktime_set(0, 0);
+	for (i = 0; i < num_rings; ++i) {
+		struct suballoc_ring *sring = &rings[i];
+
+		flush_work(&sring->work);
+		while (!ring_idle(sring))
+			schedule();
+		signaltime = ktime_add(signaltime, sring->signal_time);
+	}
+
+	kvfree(rings);
+
+	kunit_info(test, "signals on different processor: %d of %d\n",
+		   atomic_read(&other_id), iter_tot);
+	drm_suballoc_manager_fini(&sa_manager);
+	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(alloctime) / iter_tot);
+	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(freetime) / iter_tot);
+	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
+		   ktime_to_ns(signaltime) / iter_tot);
+
+	if (atomic64_read(&free_space) != SA_SIZE) {
+		kunit_warn(test, "Test sanity check failed.\n");
+		kunit_warn(test, "Space left at exit is %lld of %d\n",
+			   (long long)atomic64_read(&free_space), SA_SIZE);
+	}
+}
+
+module_param(intr, bool, 0400);
+MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
+module_param(from_reclaim, bool, 0400);
+MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
+module_param(pre_throttle, bool, 0400);
+MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
+		 "before allocations.");
+module_param(num_rings, uint, 0400);
+MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
+module_param(iterations, uint, 0400);
+MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
+
+static struct kunit_case drm_suballoc_tests[] = {
+	KUNIT_CASE(drm_test_suballoc),
+	{}
+};
+
+static struct kunit_suite drm_suballoc_test_suite = {
+	.name = "drm_suballoc",
+	.test_cases = drm_suballoc_tests,
+};
+
+kunit_test_suite(drm_suballoc_test_suite);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("DRM suballocator Kunit test");
+MODULE_LICENSE("Dual MIT/GPL");
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [Intel-xe] ✗ CI.Patch_applied: failure for drm/tests: Suballocator test (rev2)
  2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
  (?)
  (?)
@ 2023-03-02  8:40 ` Patchwork
  -1 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2023-03-02  8:40 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-xe

== Series Details ==

Series: drm/tests: Suballocator test (rev2)
URL   : https://patchwork.freedesktop.org/series/114409/
State : failure

== Summary ==

error: patch failed: drivers/gpu/drm/Kconfig:78
error: drivers/gpu/drm/Kconfig: patch does not apply
error: patch failed: drivers/gpu/drm/tests/Makefile:17
error: drivers/gpu/drm/tests/Makefile: patch does not apply



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/tests: Suballocator test (rev2)
  2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
                   ` (2 preceding siblings ...)
  (?)
@ 2023-03-02  9:09 ` Patchwork
  -1 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2023-03-02  9:09 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-gfx

== Series Details ==

Series: drm/tests: Suballocator test (rev2)
URL   : https://patchwork.freedesktop.org/series/114410/
State : warning

== Summary ==

Error: dim checkpatch failed
6ee63db5189b drm/tests: Suballocator test
Traceback (most recent call last):
  File "scripts/spdxcheck.py", line 6, in <module>
    from ply import lex, yacc
ModuleNotFoundError: No module named 'ply'
-:40: WARNING:FILE_PATH_CHANGES: added, moved or deleted file(s), does MAINTAINERS need updating?
#40: 
new file mode 100644

total: 0 errors, 1 warnings, 0 checks, 371 lines checked



^ permalink raw reply	[flat|nested] 21+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/tests: Suballocator test (rev2)
  2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
                   ` (3 preceding siblings ...)
  (?)
@ 2023-03-02  9:20 ` Patchwork
  -1 siblings, 0 replies; 21+ messages in thread
From: Patchwork @ 2023-03-02  9:20 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 6996 bytes --]

== Series Details ==

Series: drm/tests: Suballocator test (rev2)
URL   : https://patchwork.freedesktop.org/series/114410/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_12800 -> Patchwork_114410v2
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_114410v2 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_114410v2, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/index.html

Participating hosts (38 -> 3)
------------------------------

  ERROR: It appears as if the changes made in Patchwork_114410v2 prevented too many machines from booting.

  Additional (1): bat-atsm-1 
  Missing    (36): fi-rkl-11600 bat-adls-5 bat-dg1-5 bat-adlp-6 fi-apl-guc fi-snb-2520m bat-rpls-1 fi-blb-e6850 bat-rpls-2 fi-skl-6600u fi-bsw-n3050 bat-dg2-8 bat-adlm-1 bat-dg2-9 fi-ilk-650 fi-hsw-4770 bat-adln-1 fi-ivb-3770 bat-jsl-3 bat-rplp-1 fi-elk-e7500 bat-dg2-11 fi-bsw-nick fi-kbl-7567u bat-dg1-7 bat-kbl-2 bat-adlp-9 fi-skl-guc fi-cfl-8700k fi-glk-j4005 bat-jsl-1 fi-tgl-1115g4 fi-cfl-guc fi-kbl-guc fi-kbl-x1275 fi-cfl-8109u 

Known issues
------------

  Here are the changes found in Patchwork_114410v2 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@fbdev@eof:
    - bat-atsm-1:         NOTRUN -> [SKIP][1] ([i915#2582]) +4 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@fbdev@eof.html

  * igt@gem_mmap@basic:
    - bat-atsm-1:         NOTRUN -> [SKIP][2] ([i915#4083])
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@gem_mmap@basic.html

  * igt@gem_tiled_fence_blits@basic:
    - bat-atsm-1:         NOTRUN -> [SKIP][3] ([i915#4077]) +2 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@gem_tiled_fence_blits@basic.html

  * igt@gem_tiled_pread_basic:
    - bat-atsm-1:         NOTRUN -> [SKIP][4] ([i915#4079]) +1 similar issue
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@gem_tiled_pread_basic.html

  * igt@i915_pm_rps@basic-api:
    - bat-atsm-1:         NOTRUN -> [SKIP][5] ([i915#6621])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@i915_pm_rps@basic-api.html

  * igt@i915_suspend@basic-s3-without-i915:
    - bat-atsm-1:         NOTRUN -> [SKIP][6] ([i915#6645])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@i915_suspend@basic-s3-without-i915.html

  * igt@kms_addfb_basic@size-max:
    - bat-atsm-1:         NOTRUN -> [SKIP][7] ([i915#6077]) +36 similar issues
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_addfb_basic@size-max.html

  * igt@kms_cursor_legacy@basic-flip-after-cursor-atomic:
    - bat-atsm-1:         NOTRUN -> [SKIP][8] ([i915#6078]) +19 similar issues
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_cursor_legacy@basic-flip-after-cursor-atomic.html

  * igt@kms_flip@basic-plain-flip:
    - bat-atsm-1:         NOTRUN -> [SKIP][9] ([i915#6166]) +3 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_flip@basic-plain-flip.html

  * igt@kms_force_connector_basic@prune-stale-modes:
    - bat-atsm-1:         NOTRUN -> [SKIP][10] ([i915#6093]) +3 similar issues
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_force_connector_basic@prune-stale-modes.html

  * igt@kms_pipe_crc_basic@hang-read-crc:
    - bat-atsm-1:         NOTRUN -> [SKIP][11] ([i915#1836]) +6 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_pipe_crc_basic@hang-read-crc.html

  * igt@kms_prop_blob@basic:
    - bat-atsm-1:         NOTRUN -> [SKIP][12] ([i915#7357])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_prop_blob@basic.html

  * igt@kms_psr@sprite_plane_onoff:
    - bat-atsm-1:         NOTRUN -> [SKIP][13] ([i915#1072]) +3 similar issues
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_psr@sprite_plane_onoff.html

  * igt@kms_setmode@basic-clone-single-crtc:
    - bat-atsm-1:         NOTRUN -> [SKIP][14] ([i915#6094])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@kms_setmode@basic-clone-single-crtc.html

  * igt@prime_vgem@basic-fence-flip:
    - bat-atsm-1:         NOTRUN -> [SKIP][15] ([fdo#109295] / [i915#6078])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@prime_vgem@basic-fence-flip.html

  * igt@prime_vgem@basic-fence-mmap:
    - bat-atsm-1:         NOTRUN -> [SKIP][16] ([fdo#109295] / [i915#4077]) +1 similar issue
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@prime_vgem@basic-fence-mmap.html

  * igt@prime_vgem@basic-write:
    - bat-atsm-1:         NOTRUN -> [SKIP][17] ([fdo#109295]) +3 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/bat-atsm-1/igt@prime_vgem@basic-write.html

  
  [fdo#109295]: https://bugs.freedesktop.org/show_bug.cgi?id=109295
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#1836]: https://gitlab.freedesktop.org/drm/intel/issues/1836
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#6077]: https://gitlab.freedesktop.org/drm/intel/issues/6077
  [i915#6078]: https://gitlab.freedesktop.org/drm/intel/issues/6078
  [i915#6093]: https://gitlab.freedesktop.org/drm/intel/issues/6093
  [i915#6094]: https://gitlab.freedesktop.org/drm/intel/issues/6094
  [i915#6166]: https://gitlab.freedesktop.org/drm/intel/issues/6166
  [i915#6621]: https://gitlab.freedesktop.org/drm/intel/issues/6621
  [i915#6645]: https://gitlab.freedesktop.org/drm/intel/issues/6645
  [i915#7357]: https://gitlab.freedesktop.org/drm/intel/issues/7357


Build changes
-------------

  * Linux: CI_DRM_12800 -> Patchwork_114410v2

  CI-20190529: 20190529
  CI_DRM_12800: 648a70b879daba67a6e7c69f191e846c4e043854 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7178: ffe3f6670b91ab975f90799ab3fd0941b6eae019 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_114410v2: 648a70b879daba67a6e7c69f191e846c4e043854 @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

6659e8c16062 drm/tests: Suballocator test

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_114410v2/index.html

[-- Attachment #2: Type: text/html, Size: 8324 bytes --]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RESEND] drm/tests: Suballocator test
  2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
  (?)
@ 2023-03-02 13:19   ` Christian König
  -1 siblings, 0 replies; 21+ messages in thread
From: Christian König @ 2023-03-02 13:19 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel; +Cc: intel-gfx, intel-xe

Am 02.03.23 um 09:34 schrieb Thomas Hellström:
> Add a suballocator test to get some test coverage for the new drm
> suballocator, and perform some basic timing (elapsed time).
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Nice, I haven't had time to go over it in all detail but it looks pretty 
sophisticated.

Feel free to add an Acked-by: Christian König <christian.koenig@amd.com>.

Regards,
Christian.

> ---
>   drivers/gpu/drm/Kconfig                   |   1 +
>   drivers/gpu/drm/tests/Makefile            |   3 +-
>   drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>   3 files changed, 359 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 8fbe57407c60..dced53723721 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>   	select DRM_LIB_RANDOM
>   	select DRM_KMS_HELPER
>   	select DRM_BUDDY
> +	select DRM_SUBALLOC_HELPER
>   	select DRM_EXPORT_FOR_TESTS if m
>   	select DRM_KUNIT_TEST_HELPERS
>   	default KUNIT_ALL_TESTS
> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
> index bca726a8f483..c664944a48ab 100644
> --- a/drivers/gpu/drm/tests/Makefile
> +++ b/drivers/gpu/drm/tests/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>   	drm_modes_test.o \
>   	drm_plane_helper_test.o \
>   	drm_probe_helper_test.o \
> -	drm_rect_test.o
> +	drm_rect_test.o \
> +	drm_suballoc_test.o
>   
>   CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
> new file mode 100644
> index 000000000000..e7303a5505a0
> --- /dev/null
> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
> @@ -0,0 +1,356 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Test case for the drm_suballoc suballocator manager
> + * Copyright 2023 Intel Corporation.
> + */
> +
> +#include <kunit/test.h>
> +
> +#include <linux/dma-fence.h>
> +#include <linux/ktime.h>
> +#include <linux/hrtimer.h>
> +#include <linux/sizes.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/delay.h>
> +#include <drm/drm_suballoc.h>
> +
> +#define SA_ITERATIONS 10000
> +#define SA_SIZE SZ_1M
> +#define SA_DEFAULT_ALIGN SZ_4K
> +
> +static bool intr = true;
> +static bool from_reclaim;
> +static bool pre_throttle;
> +static unsigned int num_rings = 4;
> +static unsigned int iterations = SA_ITERATIONS;
> +
> +static atomic64_t free_space;
> +
> +static atomic_t other_id;
> +
> +struct suballoc_fence;
> +
> +/**
> + * struct suballoc_ring - fake gpu engine.
> + * @list: List of fences to signal.
> + * @signal_time: Accumulated fence signal execution time.
> + * @lock: Protects the suballoc ring members. hardirq safe.
> + * @hrtimer: Fake execution time timer.
> + * @active: The currently active fence for which we have pending work or a
> + *          timer running.
> + * @seqno: Fence submissin seqno.
> + * @idx: Index for calculation of fake execution time.
> + * @work: Work struct used solely to move the timer start to a different
> + *        processor than that used for submission.
> + */
> +struct suballoc_ring {
> +	ktime_t signal_time;
> +	struct list_head list;
> +	/* Protect the ring processing. */
> +	spinlock_t lock;
> +	struct hrtimer hrtimer;
> +	struct suballoc_fence *active;
> +	atomic64_t seqno;
> +	u32 idx;
> +	struct work_struct work;
> +};
> +
> +/**
> + * struct suballoc_fence - Hrtimer-driven fence.
> + * @fence: The base class fence struct.
> + * @link: Link for the ring's fence list.
> + * @size: The size of the suballocator range associated with this fence.
> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
> + */
> +struct suballoc_fence {
> +	struct dma_fence fence;
> +	struct list_head link;
> +	size_t size;
> +	unsigned int id;
> +};
> +
> +/* A varying but repeatable fake execution time */
> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
> +{
> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
> +}
> +
> +/*
> + * Launch from a work item to decrease the likelyhood of the timer expiry
> + * callback getting called from the allocating cpu.
> + * We want to trigger cache-line bouncing between allocating and signalling
> + * cpus.
> + */
> +static void ring_launch_timer_work(struct work_struct *work)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(work, typeof(*ring), work);
> +
> +	spin_lock_irq(&ring->lock);
> +	if (ring->active)
> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
> +				       100ULL * NSEC_PER_USEC,
> +				       HRTIMER_MODE_REL_PINNED);
> +
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +/*
> + * Signal an active fence and pull the next off the list if any and make it
> + * active.
> + */
> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(hrtimer, typeof(*ring), hrtimer);
> +	struct suballoc_fence *sfence;
> +	ktime_t now, then;
> +	unsigned long irqflags;
> +
> +	spin_lock_irqsave(&ring->lock, irqflags);
> +	sfence = ring->active;
> +
> +	if (sfence) {
> +		struct dma_fence *fence = &sfence->fence;
> +
> +		if (sfence->id != get_cpu())
> +			atomic_inc(&other_id);
> +		put_cpu();
> +
> +		then = ktime_get();
> +		dma_fence_signal(fence);
> +		now = ktime_get();
> +		dma_fence_put(fence);
> +		ring->signal_time = ktime_add(ring->signal_time,
> +					      ktime_sub(now, then));
> +		ring->active = NULL;
> +		atomic64_add(sfence->size, &free_space);
> +	}
> +
> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
> +	if (sfence) {
> +		list_del_init(&sfence->link);
> +		ring->active = sfence;
> +		spin_unlock_irqrestore(&ring->lock, irqflags);
> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
> +		return HRTIMER_RESTART;
> +	}
> +	spin_unlock_irqrestore(&ring->lock, irqflags);
> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +/*
> + * Queue a fence on a ring and if it's the first fence, make it active.
> + */
> +static void ring_add_fence(struct suballoc_ring *ring,
> +			   struct suballoc_fence *sfence)
> +{
> +	spin_lock_irq(&ring->lock);
> +	if (!ring->active) {
> +		ring->active = sfence;
> +		queue_work(system_unbound_wq, &ring->work);
> +	} else {
> +		list_add_tail(&sfence->link, &ring->list);
> +	}
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +static void ring_init(struct suballoc_ring *ring)
> +{
> +	memset(ring, 0, sizeof(*ring));
> +	INIT_LIST_HEAD(&ring->list);
> +	spin_lock_init(&ring->lock);
> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	ring->hrtimer.function = ring_hrtimer_expired;
> +	INIT_WORK(&ring->work, ring_launch_timer_work);
> +}
> +
> +static bool ring_idle(struct suballoc_ring *ring)
> +{
> +	bool tmp;
> +
> +	spin_lock_irq(&ring->lock);
> +	tmp = !ring->active;
> +	spin_unlock_irq(&ring->lock);
> +
> +	return tmp;
> +}
> +
> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
> +{
> +	return "suballoc";
> +}
> +
> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
> +	.get_driver_name = dma_fence_get_suballoc_name,
> +	.get_timeline_name = dma_fence_get_suballoc_name,
> +};
> +
> +static DEFINE_SPINLOCK(sa_fence_lock);
> +static ktime_t alloctime, freetime;
> +
> +static void drm_test_suballoc(struct kunit *test)
> +{
> +	struct suballoc_ring *rings;
> +	struct drm_suballoc_manager sa_manager;
> +	struct drm_suballoc *sa;
> +	struct suballoc_fence *sfence;
> +	struct dma_fence *fence;
> +	ktime_t then, now, signaltime;
> +	int i, ring, iter_tot = 0;
> +	size_t size;
> +	unsigned int align;
> +	unsigned long long soffset;
> +	gfp_t gfp;
> +
> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
> +	if (!rings) {
> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
> +		return;
> +	}
> +
> +	for (i = 0; i < num_rings; ++i)
> +		ring_init(rings + i);
> +
> +	atomic64_set(&free_space, SA_SIZE);
> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
> +
> +	if (from_reclaim)
> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
> +	else
> +		gfp = GFP_KERNEL;
> +
> +	for (i = 0; i < iterations; ++i) {
> +		ring = i % num_rings;
> +		size = (ring + 1) * SZ_4K;
> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
> +
> +		if (pre_throttle)
> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
> +				cpu_relax();
> +
> +		if (from_reclaim)
> +			fs_reclaim_acquire(GFP_KERNEL);
> +
> +		then = ktime_get();
> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
> +		now = ktime_get();
> +		if (from_reclaim)
> +			fs_reclaim_release(GFP_KERNEL);
> +
> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
> +
> +		iter_tot++;
> +		if (IS_ERR(sa)) {
> +			if (from_reclaim) {
> +				iter_tot--;
> +				continue;
> +			}
> +
> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
> +				   sa);
> +			break;
> +		}
> +
> +		atomic64_sub(size, &free_space);
> +		soffset = drm_suballoc_soffset(sa);
> +		if (!IS_ALIGNED(soffset, align)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
> +				   soffset, align, soffset & (align - 1));
> +			break;
> +		}
> +
> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
> +			break;
> +		}
> +
> +		if (drm_suballoc_size(sa) < size ||
> +		    drm_suballoc_size(sa) >= size + align) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect size.\n");
> +			break;
> +		}
> +
> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
> +		if (unlikely(!sfence)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
> +			break;
> +		}
> +		fence = &sfence->fence;
> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
> +			       ring + 1,
> +			       atomic64_inc_return(&rings[ring].seqno));
> +		sfence->size = size;
> +		sfence->id = get_cpu();
> +		put_cpu();
> +
> +		ring_add_fence(rings + ring, sfence);
> +
> +		then = ktime_get();
> +		drm_suballoc_free(sa, fence);
> +		now = ktime_get();
> +		freetime = ktime_add(freetime, ktime_sub(now, then));
> +	}
> +
> +	signaltime = ktime_set(0, 0);
> +	for (i = 0; i < num_rings; ++i) {
> +		struct suballoc_ring *sring = &rings[i];
> +
> +		flush_work(&sring->work);
> +		while (!ring_idle(sring))
> +			schedule();
> +		signaltime = ktime_add(signaltime, sring->signal_time);
> +	}
> +
> +	kvfree(rings);
> +
> +	kunit_info(test, "signals on different processor: %d of %d\n",
> +		   atomic_read(&other_id), iter_tot);
> +	drm_suballoc_manager_fini(&sa_manager);
> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(alloctime) / iter_tot);
> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(freetime) / iter_tot);
> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(signaltime) / iter_tot);
> +
> +	if (atomic64_read(&free_space) != SA_SIZE) {
> +		kunit_warn(test, "Test sanity check failed.\n");
> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
> +			   (long long)atomic64_read(&free_space), SA_SIZE);
> +	}
> +}
> +
> +module_param(intr, bool, 0400);
> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
> +module_param(from_reclaim, bool, 0400);
> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
> +module_param(pre_throttle, bool, 0400);
> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
> +		 "before allocations.");
> +module_param(num_rings, uint, 0400);
> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
> +module_param(iterations, uint, 0400);
> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
> +
> +static struct kunit_case drm_suballoc_tests[] = {
> +	KUNIT_CASE(drm_test_suballoc),
> +	{}
> +};
> +
> +static struct kunit_suite drm_suballoc_test_suite = {
> +	.name = "drm_suballoc",
> +	.test_cases = drm_suballoc_tests,
> +};
> +
> +kunit_test_suite(drm_suballoc_test_suite);
> +
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
> +MODULE_LICENSE("Dual MIT/GPL");


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02 13:19   ` Christian König
  0 siblings, 0 replies; 21+ messages in thread
From: Christian König @ 2023-03-02 13:19 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel; +Cc: intel-gfx, intel-xe

Am 02.03.23 um 09:34 schrieb Thomas Hellström:
> Add a suballocator test to get some test coverage for the new drm
> suballocator, and perform some basic timing (elapsed time).
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Nice, I haven't had time to go over it in all detail but it looks pretty 
sophisticated.

Feel free to add an Acked-by: Christian König <christian.koenig@amd.com>.

Regards,
Christian.

> ---
>   drivers/gpu/drm/Kconfig                   |   1 +
>   drivers/gpu/drm/tests/Makefile            |   3 +-
>   drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>   3 files changed, 359 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 8fbe57407c60..dced53723721 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>   	select DRM_LIB_RANDOM
>   	select DRM_KMS_HELPER
>   	select DRM_BUDDY
> +	select DRM_SUBALLOC_HELPER
>   	select DRM_EXPORT_FOR_TESTS if m
>   	select DRM_KUNIT_TEST_HELPERS
>   	default KUNIT_ALL_TESTS
> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
> index bca726a8f483..c664944a48ab 100644
> --- a/drivers/gpu/drm/tests/Makefile
> +++ b/drivers/gpu/drm/tests/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>   	drm_modes_test.o \
>   	drm_plane_helper_test.o \
>   	drm_probe_helper_test.o \
> -	drm_rect_test.o
> +	drm_rect_test.o \
> +	drm_suballoc_test.o
>   
>   CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
> new file mode 100644
> index 000000000000..e7303a5505a0
> --- /dev/null
> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
> @@ -0,0 +1,356 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Test case for the drm_suballoc suballocator manager
> + * Copyright 2023 Intel Corporation.
> + */
> +
> +#include <kunit/test.h>
> +
> +#include <linux/dma-fence.h>
> +#include <linux/ktime.h>
> +#include <linux/hrtimer.h>
> +#include <linux/sizes.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/delay.h>
> +#include <drm/drm_suballoc.h>
> +
> +#define SA_ITERATIONS 10000
> +#define SA_SIZE SZ_1M
> +#define SA_DEFAULT_ALIGN SZ_4K
> +
> +static bool intr = true;
> +static bool from_reclaim;
> +static bool pre_throttle;
> +static unsigned int num_rings = 4;
> +static unsigned int iterations = SA_ITERATIONS;
> +
> +static atomic64_t free_space;
> +
> +static atomic_t other_id;
> +
> +struct suballoc_fence;
> +
> +/**
> + * struct suballoc_ring - fake gpu engine.
> + * @list: List of fences to signal.
> + * @signal_time: Accumulated fence signal execution time.
> + * @lock: Protects the suballoc ring members. hardirq safe.
> + * @hrtimer: Fake execution time timer.
> + * @active: The currently active fence for which we have pending work or a
> + *          timer running.
> + * @seqno: Fence submissin seqno.
> + * @idx: Index for calculation of fake execution time.
> + * @work: Work struct used solely to move the timer start to a different
> + *        processor than that used for submission.
> + */
> +struct suballoc_ring {
> +	ktime_t signal_time;
> +	struct list_head list;
> +	/* Protect the ring processing. */
> +	spinlock_t lock;
> +	struct hrtimer hrtimer;
> +	struct suballoc_fence *active;
> +	atomic64_t seqno;
> +	u32 idx;
> +	struct work_struct work;
> +};
> +
> +/**
> + * struct suballoc_fence - Hrtimer-driven fence.
> + * @fence: The base class fence struct.
> + * @link: Link for the ring's fence list.
> + * @size: The size of the suballocator range associated with this fence.
> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
> + */
> +struct suballoc_fence {
> +	struct dma_fence fence;
> +	struct list_head link;
> +	size_t size;
> +	unsigned int id;
> +};
> +
> +/* A varying but repeatable fake execution time */
> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
> +{
> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
> +}
> +
> +/*
> + * Launch from a work item to decrease the likelyhood of the timer expiry
> + * callback getting called from the allocating cpu.
> + * We want to trigger cache-line bouncing between allocating and signalling
> + * cpus.
> + */
> +static void ring_launch_timer_work(struct work_struct *work)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(work, typeof(*ring), work);
> +
> +	spin_lock_irq(&ring->lock);
> +	if (ring->active)
> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
> +				       100ULL * NSEC_PER_USEC,
> +				       HRTIMER_MODE_REL_PINNED);
> +
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +/*
> + * Signal an active fence and pull the next off the list if any and make it
> + * active.
> + */
> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(hrtimer, typeof(*ring), hrtimer);
> +	struct suballoc_fence *sfence;
> +	ktime_t now, then;
> +	unsigned long irqflags;
> +
> +	spin_lock_irqsave(&ring->lock, irqflags);
> +	sfence = ring->active;
> +
> +	if (sfence) {
> +		struct dma_fence *fence = &sfence->fence;
> +
> +		if (sfence->id != get_cpu())
> +			atomic_inc(&other_id);
> +		put_cpu();
> +
> +		then = ktime_get();
> +		dma_fence_signal(fence);
> +		now = ktime_get();
> +		dma_fence_put(fence);
> +		ring->signal_time = ktime_add(ring->signal_time,
> +					      ktime_sub(now, then));
> +		ring->active = NULL;
> +		atomic64_add(sfence->size, &free_space);
> +	}
> +
> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
> +	if (sfence) {
> +		list_del_init(&sfence->link);
> +		ring->active = sfence;
> +		spin_unlock_irqrestore(&ring->lock, irqflags);
> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
> +		return HRTIMER_RESTART;
> +	}
> +	spin_unlock_irqrestore(&ring->lock, irqflags);
> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +/*
> + * Queue a fence on a ring and if it's the first fence, make it active.
> + */
> +static void ring_add_fence(struct suballoc_ring *ring,
> +			   struct suballoc_fence *sfence)
> +{
> +	spin_lock_irq(&ring->lock);
> +	if (!ring->active) {
> +		ring->active = sfence;
> +		queue_work(system_unbound_wq, &ring->work);
> +	} else {
> +		list_add_tail(&sfence->link, &ring->list);
> +	}
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +static void ring_init(struct suballoc_ring *ring)
> +{
> +	memset(ring, 0, sizeof(*ring));
> +	INIT_LIST_HEAD(&ring->list);
> +	spin_lock_init(&ring->lock);
> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	ring->hrtimer.function = ring_hrtimer_expired;
> +	INIT_WORK(&ring->work, ring_launch_timer_work);
> +}
> +
> +static bool ring_idle(struct suballoc_ring *ring)
> +{
> +	bool tmp;
> +
> +	spin_lock_irq(&ring->lock);
> +	tmp = !ring->active;
> +	spin_unlock_irq(&ring->lock);
> +
> +	return tmp;
> +}
> +
> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
> +{
> +	return "suballoc";
> +}
> +
> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
> +	.get_driver_name = dma_fence_get_suballoc_name,
> +	.get_timeline_name = dma_fence_get_suballoc_name,
> +};
> +
> +static DEFINE_SPINLOCK(sa_fence_lock);
> +static ktime_t alloctime, freetime;
> +
> +static void drm_test_suballoc(struct kunit *test)
> +{
> +	struct suballoc_ring *rings;
> +	struct drm_suballoc_manager sa_manager;
> +	struct drm_suballoc *sa;
> +	struct suballoc_fence *sfence;
> +	struct dma_fence *fence;
> +	ktime_t then, now, signaltime;
> +	int i, ring, iter_tot = 0;
> +	size_t size;
> +	unsigned int align;
> +	unsigned long long soffset;
> +	gfp_t gfp;
> +
> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
> +	if (!rings) {
> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
> +		return;
> +	}
> +
> +	for (i = 0; i < num_rings; ++i)
> +		ring_init(rings + i);
> +
> +	atomic64_set(&free_space, SA_SIZE);
> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
> +
> +	if (from_reclaim)
> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
> +	else
> +		gfp = GFP_KERNEL;
> +
> +	for (i = 0; i < iterations; ++i) {
> +		ring = i % num_rings;
> +		size = (ring + 1) * SZ_4K;
> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
> +
> +		if (pre_throttle)
> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
> +				cpu_relax();
> +
> +		if (from_reclaim)
> +			fs_reclaim_acquire(GFP_KERNEL);
> +
> +		then = ktime_get();
> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
> +		now = ktime_get();
> +		if (from_reclaim)
> +			fs_reclaim_release(GFP_KERNEL);
> +
> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
> +
> +		iter_tot++;
> +		if (IS_ERR(sa)) {
> +			if (from_reclaim) {
> +				iter_tot--;
> +				continue;
> +			}
> +
> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
> +				   sa);
> +			break;
> +		}
> +
> +		atomic64_sub(size, &free_space);
> +		soffset = drm_suballoc_soffset(sa);
> +		if (!IS_ALIGNED(soffset, align)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
> +				   soffset, align, soffset & (align - 1));
> +			break;
> +		}
> +
> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
> +			break;
> +		}
> +
> +		if (drm_suballoc_size(sa) < size ||
> +		    drm_suballoc_size(sa) >= size + align) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect size.\n");
> +			break;
> +		}
> +
> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
> +		if (unlikely(!sfence)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
> +			break;
> +		}
> +		fence = &sfence->fence;
> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
> +			       ring + 1,
> +			       atomic64_inc_return(&rings[ring].seqno));
> +		sfence->size = size;
> +		sfence->id = get_cpu();
> +		put_cpu();
> +
> +		ring_add_fence(rings + ring, sfence);
> +
> +		then = ktime_get();
> +		drm_suballoc_free(sa, fence);
> +		now = ktime_get();
> +		freetime = ktime_add(freetime, ktime_sub(now, then));
> +	}
> +
> +	signaltime = ktime_set(0, 0);
> +	for (i = 0; i < num_rings; ++i) {
> +		struct suballoc_ring *sring = &rings[i];
> +
> +		flush_work(&sring->work);
> +		while (!ring_idle(sring))
> +			schedule();
> +		signaltime = ktime_add(signaltime, sring->signal_time);
> +	}
> +
> +	kvfree(rings);
> +
> +	kunit_info(test, "signals on different processor: %d of %d\n",
> +		   atomic_read(&other_id), iter_tot);
> +	drm_suballoc_manager_fini(&sa_manager);
> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(alloctime) / iter_tot);
> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(freetime) / iter_tot);
> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(signaltime) / iter_tot);
> +
> +	if (atomic64_read(&free_space) != SA_SIZE) {
> +		kunit_warn(test, "Test sanity check failed.\n");
> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
> +			   (long long)atomic64_read(&free_space), SA_SIZE);
> +	}
> +}
> +
> +module_param(intr, bool, 0400);
> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
> +module_param(from_reclaim, bool, 0400);
> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
> +module_param(pre_throttle, bool, 0400);
> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
> +		 "before allocations.");
> +module_param(num_rings, uint, 0400);
> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
> +module_param(iterations, uint, 0400);
> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
> +
> +static struct kunit_case drm_suballoc_tests[] = {
> +	KUNIT_CASE(drm_test_suballoc),
> +	{}
> +};
> +
> +static struct kunit_suite drm_suballoc_test_suite = {
> +	.name = "drm_suballoc",
> +	.test_cases = drm_suballoc_tests,
> +};
> +
> +kunit_test_suite(drm_suballoc_test_suite);
> +
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
> +MODULE_LICENSE("Dual MIT/GPL");


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-xe] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02 13:19   ` Christian König
  0 siblings, 0 replies; 21+ messages in thread
From: Christian König @ 2023-03-02 13:19 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel; +Cc: intel-gfx, intel-xe

Am 02.03.23 um 09:34 schrieb Thomas Hellström:
> Add a suballocator test to get some test coverage for the new drm
> suballocator, and perform some basic timing (elapsed time).
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Nice, I haven't had time to go over it in all detail but it looks pretty 
sophisticated.

Feel free to add an Acked-by: Christian König <christian.koenig@amd.com>.

Regards,
Christian.

> ---
>   drivers/gpu/drm/Kconfig                   |   1 +
>   drivers/gpu/drm/tests/Makefile            |   3 +-
>   drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>   3 files changed, 359 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 8fbe57407c60..dced53723721 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>   	select DRM_LIB_RANDOM
>   	select DRM_KMS_HELPER
>   	select DRM_BUDDY
> +	select DRM_SUBALLOC_HELPER
>   	select DRM_EXPORT_FOR_TESTS if m
>   	select DRM_KUNIT_TEST_HELPERS
>   	default KUNIT_ALL_TESTS
> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
> index bca726a8f483..c664944a48ab 100644
> --- a/drivers/gpu/drm/tests/Makefile
> +++ b/drivers/gpu/drm/tests/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>   	drm_modes_test.o \
>   	drm_plane_helper_test.o \
>   	drm_probe_helper_test.o \
> -	drm_rect_test.o
> +	drm_rect_test.o \
> +	drm_suballoc_test.o
>   
>   CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
> new file mode 100644
> index 000000000000..e7303a5505a0
> --- /dev/null
> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
> @@ -0,0 +1,356 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Test case for the drm_suballoc suballocator manager
> + * Copyright 2023 Intel Corporation.
> + */
> +
> +#include <kunit/test.h>
> +
> +#include <linux/dma-fence.h>
> +#include <linux/ktime.h>
> +#include <linux/hrtimer.h>
> +#include <linux/sizes.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/delay.h>
> +#include <drm/drm_suballoc.h>
> +
> +#define SA_ITERATIONS 10000
> +#define SA_SIZE SZ_1M
> +#define SA_DEFAULT_ALIGN SZ_4K
> +
> +static bool intr = true;
> +static bool from_reclaim;
> +static bool pre_throttle;
> +static unsigned int num_rings = 4;
> +static unsigned int iterations = SA_ITERATIONS;
> +
> +static atomic64_t free_space;
> +
> +static atomic_t other_id;
> +
> +struct suballoc_fence;
> +
> +/**
> + * struct suballoc_ring - fake gpu engine.
> + * @list: List of fences to signal.
> + * @signal_time: Accumulated fence signal execution time.
> + * @lock: Protects the suballoc ring members. hardirq safe.
> + * @hrtimer: Fake execution time timer.
> + * @active: The currently active fence for which we have pending work or a
> + *          timer running.
> + * @seqno: Fence submissin seqno.
> + * @idx: Index for calculation of fake execution time.
> + * @work: Work struct used solely to move the timer start to a different
> + *        processor than that used for submission.
> + */
> +struct suballoc_ring {
> +	ktime_t signal_time;
> +	struct list_head list;
> +	/* Protect the ring processing. */
> +	spinlock_t lock;
> +	struct hrtimer hrtimer;
> +	struct suballoc_fence *active;
> +	atomic64_t seqno;
> +	u32 idx;
> +	struct work_struct work;
> +};
> +
> +/**
> + * struct suballoc_fence - Hrtimer-driven fence.
> + * @fence: The base class fence struct.
> + * @link: Link for the ring's fence list.
> + * @size: The size of the suballocator range associated with this fence.
> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
> + */
> +struct suballoc_fence {
> +	struct dma_fence fence;
> +	struct list_head link;
> +	size_t size;
> +	unsigned int id;
> +};
> +
> +/* A varying but repeatable fake execution time */
> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
> +{
> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
> +}
> +
> +/*
> + * Launch from a work item to decrease the likelyhood of the timer expiry
> + * callback getting called from the allocating cpu.
> + * We want to trigger cache-line bouncing between allocating and signalling
> + * cpus.
> + */
> +static void ring_launch_timer_work(struct work_struct *work)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(work, typeof(*ring), work);
> +
> +	spin_lock_irq(&ring->lock);
> +	if (ring->active)
> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
> +				       100ULL * NSEC_PER_USEC,
> +				       HRTIMER_MODE_REL_PINNED);
> +
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +/*
> + * Signal an active fence and pull the next off the list if any and make it
> + * active.
> + */
> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(hrtimer, typeof(*ring), hrtimer);
> +	struct suballoc_fence *sfence;
> +	ktime_t now, then;
> +	unsigned long irqflags;
> +
> +	spin_lock_irqsave(&ring->lock, irqflags);
> +	sfence = ring->active;
> +
> +	if (sfence) {
> +		struct dma_fence *fence = &sfence->fence;
> +
> +		if (sfence->id != get_cpu())
> +			atomic_inc(&other_id);
> +		put_cpu();
> +
> +		then = ktime_get();
> +		dma_fence_signal(fence);
> +		now = ktime_get();
> +		dma_fence_put(fence);
> +		ring->signal_time = ktime_add(ring->signal_time,
> +					      ktime_sub(now, then));
> +		ring->active = NULL;
> +		atomic64_add(sfence->size, &free_space);
> +	}
> +
> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
> +	if (sfence) {
> +		list_del_init(&sfence->link);
> +		ring->active = sfence;
> +		spin_unlock_irqrestore(&ring->lock, irqflags);
> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
> +		return HRTIMER_RESTART;
> +	}
> +	spin_unlock_irqrestore(&ring->lock, irqflags);
> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +/*
> + * Queue a fence on a ring and if it's the first fence, make it active.
> + */
> +static void ring_add_fence(struct suballoc_ring *ring,
> +			   struct suballoc_fence *sfence)
> +{
> +	spin_lock_irq(&ring->lock);
> +	if (!ring->active) {
> +		ring->active = sfence;
> +		queue_work(system_unbound_wq, &ring->work);
> +	} else {
> +		list_add_tail(&sfence->link, &ring->list);
> +	}
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +static void ring_init(struct suballoc_ring *ring)
> +{
> +	memset(ring, 0, sizeof(*ring));
> +	INIT_LIST_HEAD(&ring->list);
> +	spin_lock_init(&ring->lock);
> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	ring->hrtimer.function = ring_hrtimer_expired;
> +	INIT_WORK(&ring->work, ring_launch_timer_work);
> +}
> +
> +static bool ring_idle(struct suballoc_ring *ring)
> +{
> +	bool tmp;
> +
> +	spin_lock_irq(&ring->lock);
> +	tmp = !ring->active;
> +	spin_unlock_irq(&ring->lock);
> +
> +	return tmp;
> +}
> +
> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
> +{
> +	return "suballoc";
> +}
> +
> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
> +	.get_driver_name = dma_fence_get_suballoc_name,
> +	.get_timeline_name = dma_fence_get_suballoc_name,
> +};
> +
> +static DEFINE_SPINLOCK(sa_fence_lock);
> +static ktime_t alloctime, freetime;
> +
> +static void drm_test_suballoc(struct kunit *test)
> +{
> +	struct suballoc_ring *rings;
> +	struct drm_suballoc_manager sa_manager;
> +	struct drm_suballoc *sa;
> +	struct suballoc_fence *sfence;
> +	struct dma_fence *fence;
> +	ktime_t then, now, signaltime;
> +	int i, ring, iter_tot = 0;
> +	size_t size;
> +	unsigned int align;
> +	unsigned long long soffset;
> +	gfp_t gfp;
> +
> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
> +	if (!rings) {
> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
> +		return;
> +	}
> +
> +	for (i = 0; i < num_rings; ++i)
> +		ring_init(rings + i);
> +
> +	atomic64_set(&free_space, SA_SIZE);
> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
> +
> +	if (from_reclaim)
> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
> +	else
> +		gfp = GFP_KERNEL;
> +
> +	for (i = 0; i < iterations; ++i) {
> +		ring = i % num_rings;
> +		size = (ring + 1) * SZ_4K;
> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
> +
> +		if (pre_throttle)
> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
> +				cpu_relax();
> +
> +		if (from_reclaim)
> +			fs_reclaim_acquire(GFP_KERNEL);
> +
> +		then = ktime_get();
> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
> +		now = ktime_get();
> +		if (from_reclaim)
> +			fs_reclaim_release(GFP_KERNEL);
> +
> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
> +
> +		iter_tot++;
> +		if (IS_ERR(sa)) {
> +			if (from_reclaim) {
> +				iter_tot--;
> +				continue;
> +			}
> +
> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
> +				   sa);
> +			break;
> +		}
> +
> +		atomic64_sub(size, &free_space);
> +		soffset = drm_suballoc_soffset(sa);
> +		if (!IS_ALIGNED(soffset, align)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
> +				   soffset, align, soffset & (align - 1));
> +			break;
> +		}
> +
> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
> +			break;
> +		}
> +
> +		if (drm_suballoc_size(sa) < size ||
> +		    drm_suballoc_size(sa) >= size + align) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect size.\n");
> +			break;
> +		}
> +
> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
> +		if (unlikely(!sfence)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
> +			break;
> +		}
> +		fence = &sfence->fence;
> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
> +			       ring + 1,
> +			       atomic64_inc_return(&rings[ring].seqno));
> +		sfence->size = size;
> +		sfence->id = get_cpu();
> +		put_cpu();
> +
> +		ring_add_fence(rings + ring, sfence);
> +
> +		then = ktime_get();
> +		drm_suballoc_free(sa, fence);
> +		now = ktime_get();
> +		freetime = ktime_add(freetime, ktime_sub(now, then));
> +	}
> +
> +	signaltime = ktime_set(0, 0);
> +	for (i = 0; i < num_rings; ++i) {
> +		struct suballoc_ring *sring = &rings[i];
> +
> +		flush_work(&sring->work);
> +		while (!ring_idle(sring))
> +			schedule();
> +		signaltime = ktime_add(signaltime, sring->signal_time);
> +	}
> +
> +	kvfree(rings);
> +
> +	kunit_info(test, "signals on different processor: %d of %d\n",
> +		   atomic_read(&other_id), iter_tot);
> +	drm_suballoc_manager_fini(&sa_manager);
> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(alloctime) / iter_tot);
> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(freetime) / iter_tot);
> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(signaltime) / iter_tot);
> +
> +	if (atomic64_read(&free_space) != SA_SIZE) {
> +		kunit_warn(test, "Test sanity check failed.\n");
> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
> +			   (long long)atomic64_read(&free_space), SA_SIZE);
> +	}
> +}
> +
> +module_param(intr, bool, 0400);
> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
> +module_param(from_reclaim, bool, 0400);
> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
> +module_param(pre_throttle, bool, 0400);
> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
> +		 "before allocations.");
> +module_param(num_rings, uint, 0400);
> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
> +module_param(iterations, uint, 0400);
> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
> +
> +static struct kunit_case drm_suballoc_tests[] = {
> +	KUNIT_CASE(drm_test_suballoc),
> +	{}
> +};
> +
> +static struct kunit_suite drm_suballoc_test_suite = {
> +	.name = "drm_suballoc",
> +	.test_cases = drm_suballoc_tests,
> +};
> +
> +kunit_test_suite(drm_suballoc_test_suite);
> +
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
> +MODULE_LICENSE("Dual MIT/GPL");


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
  2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
  (?)
@ 2023-03-02 16:39   ` kernel test robot
  -1 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2023-03-02 16:39 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel
  Cc: Thomas Hellström, intel-gfx, intel-xe, Christian Koenig,
	oe-kbuild-all

Hi Thomas,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230302083422.76608-1-thomas.hellstrom%40linux.intel.com
patch subject: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20230303/202303030056.CNeZGRQR-lkp@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
        git checkout e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030056.CNeZGRQR-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/mips/kernel/head.o: in function `kernel_entry':
   (.ref.text+0xac): relocation truncated to fit: R_MIPS_26 against `start_kernel'
   init/main.o: in function `set_reset_devices':
   main.c:(.init.text+0x20): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x30): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `debug_kernel':
   main.c:(.init.text+0xa4): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0xb4): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `quiet_kernel':
   main.c:(.init.text+0x128): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x138): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `warn_bootconfig':
   main.c:(.init.text+0x1ac): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x1bc): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `init_setup':
   main.c:(.init.text+0x234): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x254): additional relocation overflows omitted from the output
   mips-linux-ld: drivers/gpu/drm/tests/drm_suballoc_test.o: in function `drm_test_suballoc':
>> drm_suballoc_test.c:(.text.drm_test_suballoc+0xcbc): undefined reference to `__udivdi3'
>> mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xd20): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xd84): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xde8): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xe40): undefined reference to `__udivdi3'
   mips-linux-ld: drivers/gpu/drm/tests/drm_suballoc_test.o:drm_suballoc_test.c:(.text.drm_test_suballoc+0xfb4): more undefined references to `__udivdi3' follow

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-xe] [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02 16:39   ` kernel test robot
  0 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2023-03-02 16:39 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel
  Cc: intel-gfx, intel-xe, Christian Koenig, oe-kbuild-all

Hi Thomas,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230302083422.76608-1-thomas.hellstrom%40linux.intel.com
patch subject: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20230303/202303030056.CNeZGRQR-lkp@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
        git checkout e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030056.CNeZGRQR-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/mips/kernel/head.o: in function `kernel_entry':
   (.ref.text+0xac): relocation truncated to fit: R_MIPS_26 against `start_kernel'
   init/main.o: in function `set_reset_devices':
   main.c:(.init.text+0x20): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x30): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `debug_kernel':
   main.c:(.init.text+0xa4): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0xb4): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `quiet_kernel':
   main.c:(.init.text+0x128): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x138): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `warn_bootconfig':
   main.c:(.init.text+0x1ac): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x1bc): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `init_setup':
   main.c:(.init.text+0x234): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x254): additional relocation overflows omitted from the output
   mips-linux-ld: drivers/gpu/drm/tests/drm_suballoc_test.o: in function `drm_test_suballoc':
>> drm_suballoc_test.c:(.text.drm_test_suballoc+0xcbc): undefined reference to `__udivdi3'
>> mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xd20): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xd84): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xde8): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xe40): undefined reference to `__udivdi3'
   mips-linux-ld: drivers/gpu/drm/tests/drm_suballoc_test.o:drm_suballoc_test.c:(.text.drm_test_suballoc+0xfb4): more undefined references to `__udivdi3' follow

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02 16:39   ` kernel test robot
  0 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2023-03-02 16:39 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel
  Cc: oe-kbuild-all, Thomas Hellström, intel-gfx,
	Christian Koenig, intel-xe

Hi Thomas,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230302083422.76608-1-thomas.hellstrom%40linux.intel.com
patch subject: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
config: mips-allyesconfig (https://download.01.org/0day-ci/archive/20230303/202303030056.CNeZGRQR-lkp@intel.com/config)
compiler: mips-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
        git checkout e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030056.CNeZGRQR-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/mips/kernel/head.o: in function `kernel_entry':
   (.ref.text+0xac): relocation truncated to fit: R_MIPS_26 against `start_kernel'
   init/main.o: in function `set_reset_devices':
   main.c:(.init.text+0x20): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x30): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `debug_kernel':
   main.c:(.init.text+0xa4): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0xb4): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `quiet_kernel':
   main.c:(.init.text+0x128): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x138): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `warn_bootconfig':
   main.c:(.init.text+0x1ac): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x1bc): relocation truncated to fit: R_MIPS_26 against `__sanitizer_cov_trace_pc'
   init/main.o: in function `init_setup':
   main.c:(.init.text+0x234): relocation truncated to fit: R_MIPS_26 against `_mcount'
   main.c:(.init.text+0x254): additional relocation overflows omitted from the output
   mips-linux-ld: drivers/gpu/drm/tests/drm_suballoc_test.o: in function `drm_test_suballoc':
>> drm_suballoc_test.c:(.text.drm_test_suballoc+0xcbc): undefined reference to `__udivdi3'
>> mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xd20): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xd84): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xde8): undefined reference to `__udivdi3'
   mips-linux-ld: drm_suballoc_test.c:(.text.drm_test_suballoc+0xe40): undefined reference to `__udivdi3'
   mips-linux-ld: drivers/gpu/drm/tests/drm_suballoc_test.o:drm_suballoc_test.c:(.text.drm_test_suballoc+0xfb4): more undefined references to `__udivdi3' follow

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
  2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
  (?)
@ 2023-03-02 17:13   ` kernel test robot
  -1 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2023-03-02 17:13 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel
  Cc: Thomas Hellström, intel-gfx, intel-xe, Christian Koenig,
	oe-kbuild-all

Hi Thomas,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230302083422.76608-1-thomas.hellstrom%40linux.intel.com
patch subject: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
config: arm-allmodconfig (https://download.01.org/0day-ci/archive/20230303/202303030052.ybIikXrN-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
        git checkout e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030052.ybIikXrN-lkp@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/tests/drm_suballoc_test.ko] undefined!

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-xe] [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02 17:13   ` kernel test robot
  0 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2023-03-02 17:13 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel
  Cc: intel-gfx, intel-xe, Christian Koenig, oe-kbuild-all

Hi Thomas,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230302083422.76608-1-thomas.hellstrom%40linux.intel.com
patch subject: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
config: arm-allmodconfig (https://download.01.org/0day-ci/archive/20230303/202303030052.ybIikXrN-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
        git checkout e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030052.ybIikXrN-lkp@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/tests/drm_suballoc_test.ko] undefined!

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-02 17:13   ` kernel test robot
  0 siblings, 0 replies; 21+ messages in thread
From: kernel test robot @ 2023-03-02 17:13 UTC (permalink / raw)
  To: Thomas Hellström, dri-devel
  Cc: oe-kbuild-all, Thomas Hellström, intel-gfx,
	Christian Koenig, intel-xe

Hi Thomas,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230302083422.76608-1-thomas.hellstrom%40linux.intel.com
patch subject: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
config: arm-allmodconfig (https://download.01.org/0day-ci/archive/20230303/202303030052.ybIikXrN-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Thomas-Hellstr-m/drm-tests-Suballocator-test/20230302-163704
        git checkout e970911bccf3145b76cd755e2d78c0c0f7f22ca1
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arm SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303030052.ybIikXrN-lkp@intel.com/

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/tests/drm_suballoc_test.ko] undefined!

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RESEND] drm/tests: Suballocator test
  2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
  (?)
@ 2023-03-26  9:42   ` Michał Winiarski
  -1 siblings, 0 replies; 21+ messages in thread
From: Michał Winiarski @ 2023-03-26  9:42 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-gfx, Christian Koenig, dri-devel, intel-xe

On Thu, Mar 02, 2023 at 09:34:22AM +0100, Thomas Hellström wrote:
> Add a suballocator test to get some test coverage for the new drm
> suballocator, and perform some basic timing (elapsed time).
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>  drivers/gpu/drm/Kconfig                   |   1 +
>  drivers/gpu/drm/tests/Makefile            |   3 +-
>  drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>  3 files changed, 359 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
> 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 8fbe57407c60..dced53723721 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>  	select DRM_LIB_RANDOM
>  	select DRM_KMS_HELPER
>  	select DRM_BUDDY
> +	select DRM_SUBALLOC_HELPER
>  	select DRM_EXPORT_FOR_TESTS if m
>  	select DRM_KUNIT_TEST_HELPERS
>  	default KUNIT_ALL_TESTS
> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
> index bca726a8f483..c664944a48ab 100644
> --- a/drivers/gpu/drm/tests/Makefile
> +++ b/drivers/gpu/drm/tests/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>  	drm_modes_test.o \
>  	drm_plane_helper_test.o \
>  	drm_probe_helper_test.o \
> -	drm_rect_test.o
> +	drm_rect_test.o \
> +	drm_suballoc_test.o
>  
>  CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
> new file mode 100644
> index 000000000000..e7303a5505a0
> --- /dev/null
> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
> @@ -0,0 +1,356 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Test case for the drm_suballoc suballocator manager
> + * Copyright 2023 Intel Corporation.
> + */
> +
> +#include <kunit/test.h>
> +
> +#include <linux/dma-fence.h>
> +#include <linux/ktime.h>
> +#include <linux/hrtimer.h>
> +#include <linux/sizes.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/delay.h>
> +#include <drm/drm_suballoc.h>
> +
> +#define SA_ITERATIONS 10000
> +#define SA_SIZE SZ_1M
> +#define SA_DEFAULT_ALIGN SZ_4K
> +
> +static bool intr = true;
> +static bool from_reclaim;
> +static bool pre_throttle;
> +static unsigned int num_rings = 4;
> +static unsigned int iterations = SA_ITERATIONS;
> +
> +static atomic64_t free_space;
> +
> +static atomic_t other_id;
> +
> +struct suballoc_fence;
> +
> +/**
> + * struct suballoc_ring - fake gpu engine.
> + * @list: List of fences to signal.
> + * @signal_time: Accumulated fence signal execution time.
> + * @lock: Protects the suballoc ring members. hardirq safe.
> + * @hrtimer: Fake execution time timer.
> + * @active: The currently active fence for which we have pending work or a
> + *          timer running.
> + * @seqno: Fence submissin seqno.
> + * @idx: Index for calculation of fake execution time.
> + * @work: Work struct used solely to move the timer start to a different
> + *        processor than that used for submission.
> + */
> +struct suballoc_ring {
> +	ktime_t signal_time;
> +	struct list_head list;
> +	/* Protect the ring processing. */
> +	spinlock_t lock;
> +	struct hrtimer hrtimer;
> +	struct suballoc_fence *active;
> +	atomic64_t seqno;
> +	u32 idx;
> +	struct work_struct work;
> +};
> +
> +/**
> + * struct suballoc_fence - Hrtimer-driven fence.
> + * @fence: The base class fence struct.
> + * @link: Link for the ring's fence list.
> + * @size: The size of the suballocator range associated with this fence.
> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
> + */
> +struct suballoc_fence {
> +	struct dma_fence fence;
> +	struct list_head link;
> +	size_t size;
> +	unsigned int id;
> +};
> +
> +/* A varying but repeatable fake execution time */
> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
> +{
> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
> +}

Is there any way we can avoid using time (and large number of
iterations) here, while keeping the coverage?
drm_suballoc have longest runtime out of all tests in DRM (taking ~60%
of the whole DRM kunit execution, drm_mm being the second and taking
~35%, without those two suites DRM tests execute in milliseconds rather
than tens of seconds),
Building test cases in a way that operate on time basis makes it tricky
to optimize the runtime.
If we extract various parameters from modparams to separate test cases,
it's going to get even worse.

> +
> +/*
> + * Launch from a work item to decrease the likelyhood of the timer expiry
> + * callback getting called from the allocating cpu.
> + * We want to trigger cache-line bouncing between allocating and signalling
> + * cpus.
> + */
> +static void ring_launch_timer_work(struct work_struct *work)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(work, typeof(*ring), work);
> +
> +	spin_lock_irq(&ring->lock);
> +	if (ring->active)
> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
> +				       100ULL * NSEC_PER_USEC,
> +				       HRTIMER_MODE_REL_PINNED);
> +
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +/*
> + * Signal an active fence and pull the next off the list if any and make it
> + * active.
> + */
> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(hrtimer, typeof(*ring), hrtimer);
> +	struct suballoc_fence *sfence;
> +	ktime_t now, then;
> +	unsigned long irqflags;
> +
> +	spin_lock_irqsave(&ring->lock, irqflags);
> +	sfence = ring->active;
> +
> +	if (sfence) {
> +		struct dma_fence *fence = &sfence->fence;
> +
> +		if (sfence->id != get_cpu())
> +			atomic_inc(&other_id);
> +		put_cpu();
> +
> +		then = ktime_get();
> +		dma_fence_signal(fence);
> +		now = ktime_get();
> +		dma_fence_put(fence);
> +		ring->signal_time = ktime_add(ring->signal_time,
> +					      ktime_sub(now, then));
> +		ring->active = NULL;
> +		atomic64_add(sfence->size, &free_space);
> +	}
> +
> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
> +	if (sfence) {
> +		list_del_init(&sfence->link);
> +		ring->active = sfence;
> +		spin_unlock_irqrestore(&ring->lock, irqflags);
> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
> +		return HRTIMER_RESTART;
> +	}
> +	spin_unlock_irqrestore(&ring->lock, irqflags);
> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +/*
> + * Queue a fence on a ring and if it's the first fence, make it active.
> + */
> +static void ring_add_fence(struct suballoc_ring *ring,
> +			   struct suballoc_fence *sfence)
> +{
> +	spin_lock_irq(&ring->lock);
> +	if (!ring->active) {
> +		ring->active = sfence;
> +		queue_work(system_unbound_wq, &ring->work);
> +	} else {
> +		list_add_tail(&sfence->link, &ring->list);
> +	}
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +static void ring_init(struct suballoc_ring *ring)
> +{
> +	memset(ring, 0, sizeof(*ring));
> +	INIT_LIST_HEAD(&ring->list);
> +	spin_lock_init(&ring->lock);
> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	ring->hrtimer.function = ring_hrtimer_expired;
> +	INIT_WORK(&ring->work, ring_launch_timer_work);
> +}
> +
> +static bool ring_idle(struct suballoc_ring *ring)
> +{
> +	bool tmp;
> +
> +	spin_lock_irq(&ring->lock);
> +	tmp = !ring->active;
> +	spin_unlock_irq(&ring->lock);
> +
> +	return tmp;
> +}
> +
> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
> +{
> +	return "suballoc";
> +}
> +
> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
> +	.get_driver_name = dma_fence_get_suballoc_name,
> +	.get_timeline_name = dma_fence_get_suballoc_name,
> +};
> +
> +static DEFINE_SPINLOCK(sa_fence_lock);
> +static ktime_t alloctime, freetime;
> +
> +static void drm_test_suballoc(struct kunit *test)
> +{
> +	struct suballoc_ring *rings;
> +	struct drm_suballoc_manager sa_manager;
> +	struct drm_suballoc *sa;
> +	struct suballoc_fence *sfence;
> +	struct dma_fence *fence;
> +	ktime_t then, now, signaltime;
> +	int i, ring, iter_tot = 0;
> +	size_t size;
> +	unsigned int align;
> +	unsigned long long soffset;
> +	gfp_t gfp;
> +
> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
> +	if (!rings) {
> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
> +		return;
> +	}

KUNIT_ASSERT_NOT_NULL?
Though we might want to implement a test-resource managed variant
(kunit_kvmalloc_array) to not have to worry about lifecycle and freeing
the resources.

> +
> +	for (i = 0; i < num_rings; ++i)
> +		ring_init(rings + i);

With resource managed - rings could be allocated and initialized at
.init(). We would then call the flush and wait at .exit(), and as a
result, we would be able to use asserts in test body without worrying
about leaking.

> +
> +	atomic64_set(&free_space, SA_SIZE);
> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);

This could also be moved to .init()

> +
> +	if (from_reclaim)
> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
> +	else
> +		gfp = GFP_KERNEL;
> +
> +	for (i = 0; i < iterations; ++i) {
> +		ring = i % num_rings;
> +		size = (ring + 1) * SZ_4K;
> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
> +
> +		if (pre_throttle)
> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
> +				cpu_relax();
> +
> +		if (from_reclaim)
> +			fs_reclaim_acquire(GFP_KERNEL);
> +
> +		then = ktime_get();
> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
> +		now = ktime_get();
> +		if (from_reclaim)
> +			fs_reclaim_release(GFP_KERNEL);
> +
> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
> +
> +		iter_tot++;
> +		if (IS_ERR(sa)) {

KUNIT_ASSERT_NOT_ERR_OR_NULL?

> +			if (from_reclaim) {

drm_suballoc_new can fail for other reasons than -ENOMEM under memory
pressure, while with from_reclaim we're treating all errors as a
success, is that intentional?

> +				iter_tot--;
> +				continue;
> +			}
> +
> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
> +				   sa);
> +			break;
> +		}
> +
> +		atomic64_sub(size, &free_space);
> +		soffset = drm_suballoc_soffset(sa);
> +		if (!IS_ALIGNED(soffset, align)) {
> +			drm_suballoc_free(sa, NULL);

Do we need to worry about calling free here? We shouldn't leak as long
as we wait upon all fences, as drm_suballoc_manager_fini will do the
clean up for us.

KUNIT_EXPECT_TRUE_MSG(..., IS_ALIGNED(soffset, align), ...)?

> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
> +				   soffset, align, soffset & (align - 1));
> +			break;
> +		}
> +
> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
> +			break;
> +		}

KUNIT_EXPECT_LE_MSG?

> +
> +		if (drm_suballoc_size(sa) < size ||
> +		    drm_suballoc_size(sa) >= size + align) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect size.\n");
> +			break;
> +		}

KUNIT_EXPECT_GE and KUNIT_EXPECT_LT?

> +
> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
> +		if (unlikely(!sfence)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
> +			break;
> +		}

It looks like sfence is never released. kunit_kmalloc?
KUNIT_ASSERT_NOT_NULL / KUNIT_ASSERT_NOT_ERR_OR_NULL?

> +		fence = &sfence->fence;
> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
> +			       ring + 1,
> +			       atomic64_inc_return(&rings[ring].seqno));
> +		sfence->size = size;
> +		sfence->id = get_cpu();
> +		put_cpu();
> +
> +		ring_add_fence(rings + ring, sfence);
> +
> +		then = ktime_get();
> +		drm_suballoc_free(sa, fence);
> +		now = ktime_get();
> +		freetime = ktime_add(freetime, ktime_sub(now, then));
> +	}
> +
> +	signaltime = ktime_set(0, 0);
> +	for (i = 0; i < num_rings; ++i) {
> +		struct suballoc_ring *sring = &rings[i];
> +
> +		flush_work(&sring->work);
> +		while (!ring_idle(sring))
> +			schedule();
> +		signaltime = ktime_add(signaltime, sring->signal_time);
> +	}

This (and drm_suballoc_manager_fini) could be moved to .exit()

> +
> +	kvfree(rings);
> +
> +	kunit_info(test, "signals on different processor: %d of %d\n",
> +		   atomic_read(&other_id), iter_tot);
> +	drm_suballoc_manager_fini(&sa_manager);
> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(alloctime) / iter_tot);
> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(freetime) / iter_tot);
> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(signaltime) / iter_tot);

Do we need those timings?
If we do expect certain values (probably with some epsilon range), we
should handle it as a separate test.

> +
> +	if (atomic64_read(&free_space) != SA_SIZE) {
> +		kunit_warn(test, "Test sanity check failed.\n");
> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
> +			   (long long)atomic64_read(&free_space), SA_SIZE);
> +	}

If this is an error - let's add it as an "expect".
Otherwise it's not printed if the test PASSes (unless we're running with
raw output).

> +}
> +
> +module_param(intr, bool, 0400);
> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");

This should be a separate test case (or param to a test case), not a
modparam.

> +module_param(from_reclaim, bool, 0400);
> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");

Same here.

> +module_param(pre_throttle, bool, 0400);
> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
> +		 "before allocations.");

And here.

> +module_param(num_rings, uint, 0400);
> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
> +module_param(iterations, uint, 0400);
> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");

Do we expect any difference in coverage for different number of rings /
iterations? What's the relation here? Would it be possible to extract
specific values (for which we expect different behavior) to separate
testcases?

-Michał

> +
> +static struct kunit_case drm_suballoc_tests[] = {
> +	KUNIT_CASE(drm_test_suballoc),
> +	{}
> +};
> +
> +static struct kunit_suite drm_suballoc_test_suite = {
> +	.name = "drm_suballoc",
> +	.test_cases = drm_suballoc_tests,
> +};
> +
> +kunit_test_suite(drm_suballoc_test_suite);
> +
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
> +MODULE_LICENSE("Dual MIT/GPL");
> -- 
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-26  9:42   ` Michał Winiarski
  0 siblings, 0 replies; 21+ messages in thread
From: Michał Winiarski @ 2023-03-26  9:42 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-gfx, Christian Koenig, dri-devel, intel-xe

On Thu, Mar 02, 2023 at 09:34:22AM +0100, Thomas Hellström wrote:
> Add a suballocator test to get some test coverage for the new drm
> suballocator, and perform some basic timing (elapsed time).
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>  drivers/gpu/drm/Kconfig                   |   1 +
>  drivers/gpu/drm/tests/Makefile            |   3 +-
>  drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>  3 files changed, 359 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
> 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 8fbe57407c60..dced53723721 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>  	select DRM_LIB_RANDOM
>  	select DRM_KMS_HELPER
>  	select DRM_BUDDY
> +	select DRM_SUBALLOC_HELPER
>  	select DRM_EXPORT_FOR_TESTS if m
>  	select DRM_KUNIT_TEST_HELPERS
>  	default KUNIT_ALL_TESTS
> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
> index bca726a8f483..c664944a48ab 100644
> --- a/drivers/gpu/drm/tests/Makefile
> +++ b/drivers/gpu/drm/tests/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>  	drm_modes_test.o \
>  	drm_plane_helper_test.o \
>  	drm_probe_helper_test.o \
> -	drm_rect_test.o
> +	drm_rect_test.o \
> +	drm_suballoc_test.o
>  
>  CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
> new file mode 100644
> index 000000000000..e7303a5505a0
> --- /dev/null
> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
> @@ -0,0 +1,356 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Test case for the drm_suballoc suballocator manager
> + * Copyright 2023 Intel Corporation.
> + */
> +
> +#include <kunit/test.h>
> +
> +#include <linux/dma-fence.h>
> +#include <linux/ktime.h>
> +#include <linux/hrtimer.h>
> +#include <linux/sizes.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/delay.h>
> +#include <drm/drm_suballoc.h>
> +
> +#define SA_ITERATIONS 10000
> +#define SA_SIZE SZ_1M
> +#define SA_DEFAULT_ALIGN SZ_4K
> +
> +static bool intr = true;
> +static bool from_reclaim;
> +static bool pre_throttle;
> +static unsigned int num_rings = 4;
> +static unsigned int iterations = SA_ITERATIONS;
> +
> +static atomic64_t free_space;
> +
> +static atomic_t other_id;
> +
> +struct suballoc_fence;
> +
> +/**
> + * struct suballoc_ring - fake gpu engine.
> + * @list: List of fences to signal.
> + * @signal_time: Accumulated fence signal execution time.
> + * @lock: Protects the suballoc ring members. hardirq safe.
> + * @hrtimer: Fake execution time timer.
> + * @active: The currently active fence for which we have pending work or a
> + *          timer running.
> + * @seqno: Fence submissin seqno.
> + * @idx: Index for calculation of fake execution time.
> + * @work: Work struct used solely to move the timer start to a different
> + *        processor than that used for submission.
> + */
> +struct suballoc_ring {
> +	ktime_t signal_time;
> +	struct list_head list;
> +	/* Protect the ring processing. */
> +	spinlock_t lock;
> +	struct hrtimer hrtimer;
> +	struct suballoc_fence *active;
> +	atomic64_t seqno;
> +	u32 idx;
> +	struct work_struct work;
> +};
> +
> +/**
> + * struct suballoc_fence - Hrtimer-driven fence.
> + * @fence: The base class fence struct.
> + * @link: Link for the ring's fence list.
> + * @size: The size of the suballocator range associated with this fence.
> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
> + */
> +struct suballoc_fence {
> +	struct dma_fence fence;
> +	struct list_head link;
> +	size_t size;
> +	unsigned int id;
> +};
> +
> +/* A varying but repeatable fake execution time */
> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
> +{
> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
> +}

Is there any way we can avoid using time (and large number of
iterations) here, while keeping the coverage?
drm_suballoc have longest runtime out of all tests in DRM (taking ~60%
of the whole DRM kunit execution, drm_mm being the second and taking
~35%, without those two suites DRM tests execute in milliseconds rather
than tens of seconds),
Building test cases in a way that operate on time basis makes it tricky
to optimize the runtime.
If we extract various parameters from modparams to separate test cases,
it's going to get even worse.

> +
> +/*
> + * Launch from a work item to decrease the likelyhood of the timer expiry
> + * callback getting called from the allocating cpu.
> + * We want to trigger cache-line bouncing between allocating and signalling
> + * cpus.
> + */
> +static void ring_launch_timer_work(struct work_struct *work)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(work, typeof(*ring), work);
> +
> +	spin_lock_irq(&ring->lock);
> +	if (ring->active)
> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
> +				       100ULL * NSEC_PER_USEC,
> +				       HRTIMER_MODE_REL_PINNED);
> +
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +/*
> + * Signal an active fence and pull the next off the list if any and make it
> + * active.
> + */
> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(hrtimer, typeof(*ring), hrtimer);
> +	struct suballoc_fence *sfence;
> +	ktime_t now, then;
> +	unsigned long irqflags;
> +
> +	spin_lock_irqsave(&ring->lock, irqflags);
> +	sfence = ring->active;
> +
> +	if (sfence) {
> +		struct dma_fence *fence = &sfence->fence;
> +
> +		if (sfence->id != get_cpu())
> +			atomic_inc(&other_id);
> +		put_cpu();
> +
> +		then = ktime_get();
> +		dma_fence_signal(fence);
> +		now = ktime_get();
> +		dma_fence_put(fence);
> +		ring->signal_time = ktime_add(ring->signal_time,
> +					      ktime_sub(now, then));
> +		ring->active = NULL;
> +		atomic64_add(sfence->size, &free_space);
> +	}
> +
> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
> +	if (sfence) {
> +		list_del_init(&sfence->link);
> +		ring->active = sfence;
> +		spin_unlock_irqrestore(&ring->lock, irqflags);
> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
> +		return HRTIMER_RESTART;
> +	}
> +	spin_unlock_irqrestore(&ring->lock, irqflags);
> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +/*
> + * Queue a fence on a ring and if it's the first fence, make it active.
> + */
> +static void ring_add_fence(struct suballoc_ring *ring,
> +			   struct suballoc_fence *sfence)
> +{
> +	spin_lock_irq(&ring->lock);
> +	if (!ring->active) {
> +		ring->active = sfence;
> +		queue_work(system_unbound_wq, &ring->work);
> +	} else {
> +		list_add_tail(&sfence->link, &ring->list);
> +	}
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +static void ring_init(struct suballoc_ring *ring)
> +{
> +	memset(ring, 0, sizeof(*ring));
> +	INIT_LIST_HEAD(&ring->list);
> +	spin_lock_init(&ring->lock);
> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	ring->hrtimer.function = ring_hrtimer_expired;
> +	INIT_WORK(&ring->work, ring_launch_timer_work);
> +}
> +
> +static bool ring_idle(struct suballoc_ring *ring)
> +{
> +	bool tmp;
> +
> +	spin_lock_irq(&ring->lock);
> +	tmp = !ring->active;
> +	spin_unlock_irq(&ring->lock);
> +
> +	return tmp;
> +}
> +
> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
> +{
> +	return "suballoc";
> +}
> +
> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
> +	.get_driver_name = dma_fence_get_suballoc_name,
> +	.get_timeline_name = dma_fence_get_suballoc_name,
> +};
> +
> +static DEFINE_SPINLOCK(sa_fence_lock);
> +static ktime_t alloctime, freetime;
> +
> +static void drm_test_suballoc(struct kunit *test)
> +{
> +	struct suballoc_ring *rings;
> +	struct drm_suballoc_manager sa_manager;
> +	struct drm_suballoc *sa;
> +	struct suballoc_fence *sfence;
> +	struct dma_fence *fence;
> +	ktime_t then, now, signaltime;
> +	int i, ring, iter_tot = 0;
> +	size_t size;
> +	unsigned int align;
> +	unsigned long long soffset;
> +	gfp_t gfp;
> +
> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
> +	if (!rings) {
> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
> +		return;
> +	}

KUNIT_ASSERT_NOT_NULL?
Though we might want to implement a test-resource managed variant
(kunit_kvmalloc_array) to not have to worry about lifecycle and freeing
the resources.

> +
> +	for (i = 0; i < num_rings; ++i)
> +		ring_init(rings + i);

With resource managed - rings could be allocated and initialized at
.init(). We would then call the flush and wait at .exit(), and as a
result, we would be able to use asserts in test body without worrying
about leaking.

> +
> +	atomic64_set(&free_space, SA_SIZE);
> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);

This could also be moved to .init()

> +
> +	if (from_reclaim)
> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
> +	else
> +		gfp = GFP_KERNEL;
> +
> +	for (i = 0; i < iterations; ++i) {
> +		ring = i % num_rings;
> +		size = (ring + 1) * SZ_4K;
> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
> +
> +		if (pre_throttle)
> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
> +				cpu_relax();
> +
> +		if (from_reclaim)
> +			fs_reclaim_acquire(GFP_KERNEL);
> +
> +		then = ktime_get();
> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
> +		now = ktime_get();
> +		if (from_reclaim)
> +			fs_reclaim_release(GFP_KERNEL);
> +
> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
> +
> +		iter_tot++;
> +		if (IS_ERR(sa)) {

KUNIT_ASSERT_NOT_ERR_OR_NULL?

> +			if (from_reclaim) {

drm_suballoc_new can fail for other reasons than -ENOMEM under memory
pressure, while with from_reclaim we're treating all errors as a
success, is that intentional?

> +				iter_tot--;
> +				continue;
> +			}
> +
> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
> +				   sa);
> +			break;
> +		}
> +
> +		atomic64_sub(size, &free_space);
> +		soffset = drm_suballoc_soffset(sa);
> +		if (!IS_ALIGNED(soffset, align)) {
> +			drm_suballoc_free(sa, NULL);

Do we need to worry about calling free here? We shouldn't leak as long
as we wait upon all fences, as drm_suballoc_manager_fini will do the
clean up for us.

KUNIT_EXPECT_TRUE_MSG(..., IS_ALIGNED(soffset, align), ...)?

> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
> +				   soffset, align, soffset & (align - 1));
> +			break;
> +		}
> +
> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
> +			break;
> +		}

KUNIT_EXPECT_LE_MSG?

> +
> +		if (drm_suballoc_size(sa) < size ||
> +		    drm_suballoc_size(sa) >= size + align) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect size.\n");
> +			break;
> +		}

KUNIT_EXPECT_GE and KUNIT_EXPECT_LT?

> +
> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
> +		if (unlikely(!sfence)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
> +			break;
> +		}

It looks like sfence is never released. kunit_kmalloc?
KUNIT_ASSERT_NOT_NULL / KUNIT_ASSERT_NOT_ERR_OR_NULL?

> +		fence = &sfence->fence;
> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
> +			       ring + 1,
> +			       atomic64_inc_return(&rings[ring].seqno));
> +		sfence->size = size;
> +		sfence->id = get_cpu();
> +		put_cpu();
> +
> +		ring_add_fence(rings + ring, sfence);
> +
> +		then = ktime_get();
> +		drm_suballoc_free(sa, fence);
> +		now = ktime_get();
> +		freetime = ktime_add(freetime, ktime_sub(now, then));
> +	}
> +
> +	signaltime = ktime_set(0, 0);
> +	for (i = 0; i < num_rings; ++i) {
> +		struct suballoc_ring *sring = &rings[i];
> +
> +		flush_work(&sring->work);
> +		while (!ring_idle(sring))
> +			schedule();
> +		signaltime = ktime_add(signaltime, sring->signal_time);
> +	}

This (and drm_suballoc_manager_fini) could be moved to .exit()

> +
> +	kvfree(rings);
> +
> +	kunit_info(test, "signals on different processor: %d of %d\n",
> +		   atomic_read(&other_id), iter_tot);
> +	drm_suballoc_manager_fini(&sa_manager);
> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(alloctime) / iter_tot);
> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(freetime) / iter_tot);
> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(signaltime) / iter_tot);

Do we need those timings?
If we do expect certain values (probably with some epsilon range), we
should handle it as a separate test.

> +
> +	if (atomic64_read(&free_space) != SA_SIZE) {
> +		kunit_warn(test, "Test sanity check failed.\n");
> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
> +			   (long long)atomic64_read(&free_space), SA_SIZE);
> +	}

If this is an error - let's add it as an "expect".
Otherwise it's not printed if the test PASSes (unless we're running with
raw output).

> +}
> +
> +module_param(intr, bool, 0400);
> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");

This should be a separate test case (or param to a test case), not a
modparam.

> +module_param(from_reclaim, bool, 0400);
> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");

Same here.

> +module_param(pre_throttle, bool, 0400);
> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
> +		 "before allocations.");

And here.

> +module_param(num_rings, uint, 0400);
> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
> +module_param(iterations, uint, 0400);
> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");

Do we expect any difference in coverage for different number of rings /
iterations? What's the relation here? Would it be possible to extract
specific values (for which we expect different behavior) to separate
testcases?

-Michał

> +
> +static struct kunit_case drm_suballoc_tests[] = {
> +	KUNIT_CASE(drm_test_suballoc),
> +	{}
> +};
> +
> +static struct kunit_suite drm_suballoc_test_suite = {
> +	.name = "drm_suballoc",
> +	.test_cases = drm_suballoc_tests,
> +};
> +
> +kunit_test_suite(drm_suballoc_test_suite);
> +
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
> +MODULE_LICENSE("Dual MIT/GPL");
> -- 
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-xe] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-26  9:42   ` Michał Winiarski
  0 siblings, 0 replies; 21+ messages in thread
From: Michał Winiarski @ 2023-03-26  9:42 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-gfx, Christian Koenig, dri-devel, intel-xe

On Thu, Mar 02, 2023 at 09:34:22AM +0100, Thomas Hellström wrote:
> Add a suballocator test to get some test coverage for the new drm
> suballocator, and perform some basic timing (elapsed time).
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>  drivers/gpu/drm/Kconfig                   |   1 +
>  drivers/gpu/drm/tests/Makefile            |   3 +-
>  drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>  3 files changed, 359 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
> 
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 8fbe57407c60..dced53723721 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>  	select DRM_LIB_RANDOM
>  	select DRM_KMS_HELPER
>  	select DRM_BUDDY
> +	select DRM_SUBALLOC_HELPER
>  	select DRM_EXPORT_FOR_TESTS if m
>  	select DRM_KUNIT_TEST_HELPERS
>  	default KUNIT_ALL_TESTS
> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
> index bca726a8f483..c664944a48ab 100644
> --- a/drivers/gpu/drm/tests/Makefile
> +++ b/drivers/gpu/drm/tests/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>  	drm_modes_test.o \
>  	drm_plane_helper_test.o \
>  	drm_probe_helper_test.o \
> -	drm_rect_test.o
> +	drm_rect_test.o \
> +	drm_suballoc_test.o
>  
>  CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
> new file mode 100644
> index 000000000000..e7303a5505a0
> --- /dev/null
> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
> @@ -0,0 +1,356 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/*
> + * Test case for the drm_suballoc suballocator manager
> + * Copyright 2023 Intel Corporation.
> + */
> +
> +#include <kunit/test.h>
> +
> +#include <linux/dma-fence.h>
> +#include <linux/ktime.h>
> +#include <linux/hrtimer.h>
> +#include <linux/sizes.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/delay.h>
> +#include <drm/drm_suballoc.h>
> +
> +#define SA_ITERATIONS 10000
> +#define SA_SIZE SZ_1M
> +#define SA_DEFAULT_ALIGN SZ_4K
> +
> +static bool intr = true;
> +static bool from_reclaim;
> +static bool pre_throttle;
> +static unsigned int num_rings = 4;
> +static unsigned int iterations = SA_ITERATIONS;
> +
> +static atomic64_t free_space;
> +
> +static atomic_t other_id;
> +
> +struct suballoc_fence;
> +
> +/**
> + * struct suballoc_ring - fake gpu engine.
> + * @list: List of fences to signal.
> + * @signal_time: Accumulated fence signal execution time.
> + * @lock: Protects the suballoc ring members. hardirq safe.
> + * @hrtimer: Fake execution time timer.
> + * @active: The currently active fence for which we have pending work or a
> + *          timer running.
> + * @seqno: Fence submissin seqno.
> + * @idx: Index for calculation of fake execution time.
> + * @work: Work struct used solely to move the timer start to a different
> + *        processor than that used for submission.
> + */
> +struct suballoc_ring {
> +	ktime_t signal_time;
> +	struct list_head list;
> +	/* Protect the ring processing. */
> +	spinlock_t lock;
> +	struct hrtimer hrtimer;
> +	struct suballoc_fence *active;
> +	atomic64_t seqno;
> +	u32 idx;
> +	struct work_struct work;
> +};
> +
> +/**
> + * struct suballoc_fence - Hrtimer-driven fence.
> + * @fence: The base class fence struct.
> + * @link: Link for the ring's fence list.
> + * @size: The size of the suballocator range associated with this fence.
> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
> + */
> +struct suballoc_fence {
> +	struct dma_fence fence;
> +	struct list_head link;
> +	size_t size;
> +	unsigned int id;
> +};
> +
> +/* A varying but repeatable fake execution time */
> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
> +{
> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
> +}

Is there any way we can avoid using time (and large number of
iterations) here, while keeping the coverage?
drm_suballoc have longest runtime out of all tests in DRM (taking ~60%
of the whole DRM kunit execution, drm_mm being the second and taking
~35%, without those two suites DRM tests execute in milliseconds rather
than tens of seconds),
Building test cases in a way that operate on time basis makes it tricky
to optimize the runtime.
If we extract various parameters from modparams to separate test cases,
it's going to get even worse.

> +
> +/*
> + * Launch from a work item to decrease the likelyhood of the timer expiry
> + * callback getting called from the allocating cpu.
> + * We want to trigger cache-line bouncing between allocating and signalling
> + * cpus.
> + */
> +static void ring_launch_timer_work(struct work_struct *work)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(work, typeof(*ring), work);
> +
> +	spin_lock_irq(&ring->lock);
> +	if (ring->active)
> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
> +				       100ULL * NSEC_PER_USEC,
> +				       HRTIMER_MODE_REL_PINNED);
> +
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +/*
> + * Signal an active fence and pull the next off the list if any and make it
> + * active.
> + */
> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
> +{
> +	struct suballoc_ring *ring =
> +		container_of(hrtimer, typeof(*ring), hrtimer);
> +	struct suballoc_fence *sfence;
> +	ktime_t now, then;
> +	unsigned long irqflags;
> +
> +	spin_lock_irqsave(&ring->lock, irqflags);
> +	sfence = ring->active;
> +
> +	if (sfence) {
> +		struct dma_fence *fence = &sfence->fence;
> +
> +		if (sfence->id != get_cpu())
> +			atomic_inc(&other_id);
> +		put_cpu();
> +
> +		then = ktime_get();
> +		dma_fence_signal(fence);
> +		now = ktime_get();
> +		dma_fence_put(fence);
> +		ring->signal_time = ktime_add(ring->signal_time,
> +					      ktime_sub(now, then));
> +		ring->active = NULL;
> +		atomic64_add(sfence->size, &free_space);
> +	}
> +
> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
> +	if (sfence) {
> +		list_del_init(&sfence->link);
> +		ring->active = sfence;
> +		spin_unlock_irqrestore(&ring->lock, irqflags);
> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
> +		return HRTIMER_RESTART;
> +	}
> +	spin_unlock_irqrestore(&ring->lock, irqflags);
> +
> +	return HRTIMER_NORESTART;
> +}
> +
> +/*
> + * Queue a fence on a ring and if it's the first fence, make it active.
> + */
> +static void ring_add_fence(struct suballoc_ring *ring,
> +			   struct suballoc_fence *sfence)
> +{
> +	spin_lock_irq(&ring->lock);
> +	if (!ring->active) {
> +		ring->active = sfence;
> +		queue_work(system_unbound_wq, &ring->work);
> +	} else {
> +		list_add_tail(&sfence->link, &ring->list);
> +	}
> +	spin_unlock_irq(&ring->lock);
> +}
> +
> +static void ring_init(struct suballoc_ring *ring)
> +{
> +	memset(ring, 0, sizeof(*ring));
> +	INIT_LIST_HEAD(&ring->list);
> +	spin_lock_init(&ring->lock);
> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> +	ring->hrtimer.function = ring_hrtimer_expired;
> +	INIT_WORK(&ring->work, ring_launch_timer_work);
> +}
> +
> +static bool ring_idle(struct suballoc_ring *ring)
> +{
> +	bool tmp;
> +
> +	spin_lock_irq(&ring->lock);
> +	tmp = !ring->active;
> +	spin_unlock_irq(&ring->lock);
> +
> +	return tmp;
> +}
> +
> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
> +{
> +	return "suballoc";
> +}
> +
> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
> +	.get_driver_name = dma_fence_get_suballoc_name,
> +	.get_timeline_name = dma_fence_get_suballoc_name,
> +};
> +
> +static DEFINE_SPINLOCK(sa_fence_lock);
> +static ktime_t alloctime, freetime;
> +
> +static void drm_test_suballoc(struct kunit *test)
> +{
> +	struct suballoc_ring *rings;
> +	struct drm_suballoc_manager sa_manager;
> +	struct drm_suballoc *sa;
> +	struct suballoc_fence *sfence;
> +	struct dma_fence *fence;
> +	ktime_t then, now, signaltime;
> +	int i, ring, iter_tot = 0;
> +	size_t size;
> +	unsigned int align;
> +	unsigned long long soffset;
> +	gfp_t gfp;
> +
> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
> +	if (!rings) {
> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
> +		return;
> +	}

KUNIT_ASSERT_NOT_NULL?
Though we might want to implement a test-resource managed variant
(kunit_kvmalloc_array) to not have to worry about lifecycle and freeing
the resources.

> +
> +	for (i = 0; i < num_rings; ++i)
> +		ring_init(rings + i);

With resource managed - rings could be allocated and initialized at
.init(). We would then call the flush and wait at .exit(), and as a
result, we would be able to use asserts in test body without worrying
about leaking.

> +
> +	atomic64_set(&free_space, SA_SIZE);
> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);

This could also be moved to .init()

> +
> +	if (from_reclaim)
> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
> +	else
> +		gfp = GFP_KERNEL;
> +
> +	for (i = 0; i < iterations; ++i) {
> +		ring = i % num_rings;
> +		size = (ring + 1) * SZ_4K;
> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
> +
> +		if (pre_throttle)
> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
> +				cpu_relax();
> +
> +		if (from_reclaim)
> +			fs_reclaim_acquire(GFP_KERNEL);
> +
> +		then = ktime_get();
> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
> +		now = ktime_get();
> +		if (from_reclaim)
> +			fs_reclaim_release(GFP_KERNEL);
> +
> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
> +
> +		iter_tot++;
> +		if (IS_ERR(sa)) {

KUNIT_ASSERT_NOT_ERR_OR_NULL?

> +			if (from_reclaim) {

drm_suballoc_new can fail for other reasons than -ENOMEM under memory
pressure, while with from_reclaim we're treating all errors as a
success, is that intentional?

> +				iter_tot--;
> +				continue;
> +			}
> +
> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
> +				   sa);
> +			break;
> +		}
> +
> +		atomic64_sub(size, &free_space);
> +		soffset = drm_suballoc_soffset(sa);
> +		if (!IS_ALIGNED(soffset, align)) {
> +			drm_suballoc_free(sa, NULL);

Do we need to worry about calling free here? We shouldn't leak as long
as we wait upon all fences, as drm_suballoc_manager_fini will do the
clean up for us.

KUNIT_EXPECT_TRUE_MSG(..., IS_ALIGNED(soffset, align), ...)?

> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
> +				   soffset, align, soffset & (align - 1));
> +			break;
> +		}
> +
> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
> +			break;
> +		}

KUNIT_EXPECT_LE_MSG?

> +
> +		if (drm_suballoc_size(sa) < size ||
> +		    drm_suballoc_size(sa) >= size + align) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Incorrect size.\n");
> +			break;
> +		}

KUNIT_EXPECT_GE and KUNIT_EXPECT_LT?

> +
> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
> +		if (unlikely(!sfence)) {
> +			drm_suballoc_free(sa, NULL);
> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
> +			break;
> +		}

It looks like sfence is never released. kunit_kmalloc?
KUNIT_ASSERT_NOT_NULL / KUNIT_ASSERT_NOT_ERR_OR_NULL?

> +		fence = &sfence->fence;
> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
> +			       ring + 1,
> +			       atomic64_inc_return(&rings[ring].seqno));
> +		sfence->size = size;
> +		sfence->id = get_cpu();
> +		put_cpu();
> +
> +		ring_add_fence(rings + ring, sfence);
> +
> +		then = ktime_get();
> +		drm_suballoc_free(sa, fence);
> +		now = ktime_get();
> +		freetime = ktime_add(freetime, ktime_sub(now, then));
> +	}
> +
> +	signaltime = ktime_set(0, 0);
> +	for (i = 0; i < num_rings; ++i) {
> +		struct suballoc_ring *sring = &rings[i];
> +
> +		flush_work(&sring->work);
> +		while (!ring_idle(sring))
> +			schedule();
> +		signaltime = ktime_add(signaltime, sring->signal_time);
> +	}

This (and drm_suballoc_manager_fini) could be moved to .exit()

> +
> +	kvfree(rings);
> +
> +	kunit_info(test, "signals on different processor: %d of %d\n",
> +		   atomic_read(&other_id), iter_tot);
> +	drm_suballoc_manager_fini(&sa_manager);
> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(alloctime) / iter_tot);
> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(freetime) / iter_tot);
> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
> +		   ktime_to_ns(signaltime) / iter_tot);

Do we need those timings?
If we do expect certain values (probably with some epsilon range), we
should handle it as a separate test.

> +
> +	if (atomic64_read(&free_space) != SA_SIZE) {
> +		kunit_warn(test, "Test sanity check failed.\n");
> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
> +			   (long long)atomic64_read(&free_space), SA_SIZE);
> +	}

If this is an error - let's add it as an "expect".
Otherwise it's not printed if the test PASSes (unless we're running with
raw output).

> +}
> +
> +module_param(intr, bool, 0400);
> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");

This should be a separate test case (or param to a test case), not a
modparam.

> +module_param(from_reclaim, bool, 0400);
> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");

Same here.

> +module_param(pre_throttle, bool, 0400);
> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
> +		 "before allocations.");

And here.

> +module_param(num_rings, uint, 0400);
> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
> +module_param(iterations, uint, 0400);
> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");

Do we expect any difference in coverage for different number of rings /
iterations? What's the relation here? Would it be possible to extract
specific values (for which we expect different behavior) to separate
testcases?

-Michał

> +
> +static struct kunit_case drm_suballoc_tests[] = {
> +	KUNIT_CASE(drm_test_suballoc),
> +	{}
> +};
> +
> +static struct kunit_suite drm_suballoc_test_suite = {
> +	.name = "drm_suballoc",
> +	.test_cases = drm_suballoc_tests,
> +};
> +
> +kunit_test_suite(drm_suballoc_test_suite);
> +
> +MODULE_AUTHOR("Intel Corporation");
> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
> +MODULE_LICENSE("Dual MIT/GPL");
> -- 
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RESEND] drm/tests: Suballocator test
  2023-03-26  9:42   ` [Intel-gfx] " Michał Winiarski
  (?)
@ 2023-03-28  8:22     ` Thomas Hellström
  -1 siblings, 0 replies; 21+ messages in thread
From: Thomas Hellström @ 2023-03-28  8:22 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx, Christian Koenig, dri-devel, intel-xe

Hi Michal,

Thanks for the review, see comments inline,

On 3/26/23 11:42, Michał Winiarski wrote:
> On Thu, Mar 02, 2023 at 09:34:22AM +0100, Thomas Hellström wrote:
>> Add a suballocator test to get some test coverage for the new drm
>> suballocator, and perform some basic timing (elapsed time).
>>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>>   drivers/gpu/drm/Kconfig                   |   1 +
>>   drivers/gpu/drm/tests/Makefile            |   3 +-
>>   drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>>   3 files changed, 359 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
>>
>> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>> index 8fbe57407c60..dced53723721 100644
>> --- a/drivers/gpu/drm/Kconfig
>> +++ b/drivers/gpu/drm/Kconfig
>> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>>   	select DRM_LIB_RANDOM
>>   	select DRM_KMS_HELPER
>>   	select DRM_BUDDY
>> +	select DRM_SUBALLOC_HELPER
>>   	select DRM_EXPORT_FOR_TESTS if m
>>   	select DRM_KUNIT_TEST_HELPERS
>>   	default KUNIT_ALL_TESTS
>> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
>> index bca726a8f483..c664944a48ab 100644
>> --- a/drivers/gpu/drm/tests/Makefile
>> +++ b/drivers/gpu/drm/tests/Makefile
>> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>>   	drm_modes_test.o \
>>   	drm_plane_helper_test.o \
>>   	drm_probe_helper_test.o \
>> -	drm_rect_test.o
>> +	drm_rect_test.o \
>> +	drm_suballoc_test.o
>>   
>>   CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
>> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
>> new file mode 100644
>> index 000000000000..e7303a5505a0
>> --- /dev/null
>> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
>> @@ -0,0 +1,356 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>> +/*
>> + * Test case for the drm_suballoc suballocator manager
>> + * Copyright 2023 Intel Corporation.
>> + */
>> +
>> +#include <kunit/test.h>
>> +
>> +#include <linux/dma-fence.h>
>> +#include <linux/ktime.h>
>> +#include <linux/hrtimer.h>
>> +#include <linux/sizes.h>
>> +#include <linux/slab.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/delay.h>
>> +#include <drm/drm_suballoc.h>
>> +
>> +#define SA_ITERATIONS 10000
>> +#define SA_SIZE SZ_1M
>> +#define SA_DEFAULT_ALIGN SZ_4K
>> +
>> +static bool intr = true;
>> +static bool from_reclaim;
>> +static bool pre_throttle;
>> +static unsigned int num_rings = 4;
>> +static unsigned int iterations = SA_ITERATIONS;
>> +
>> +static atomic64_t free_space;
>> +
>> +static atomic_t other_id;
>> +
>> +struct suballoc_fence;
>> +
>> +/**
>> + * struct suballoc_ring - fake gpu engine.
>> + * @list: List of fences to signal.
>> + * @signal_time: Accumulated fence signal execution time.
>> + * @lock: Protects the suballoc ring members. hardirq safe.
>> + * @hrtimer: Fake execution time timer.
>> + * @active: The currently active fence for which we have pending work or a
>> + *          timer running.
>> + * @seqno: Fence submissin seqno.
>> + * @idx: Index for calculation of fake execution time.
>> + * @work: Work struct used solely to move the timer start to a different
>> + *        processor than that used for submission.
>> + */
>> +struct suballoc_ring {
>> +	ktime_t signal_time;
>> +	struct list_head list;
>> +	/* Protect the ring processing. */
>> +	spinlock_t lock;
>> +	struct hrtimer hrtimer;
>> +	struct suballoc_fence *active;
>> +	atomic64_t seqno;
>> +	u32 idx;
>> +	struct work_struct work;
>> +};
>> +
>> +/**
>> + * struct suballoc_fence - Hrtimer-driven fence.
>> + * @fence: The base class fence struct.
>> + * @link: Link for the ring's fence list.
>> + * @size: The size of the suballocator range associated with this fence.
>> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
>> + */
>> +struct suballoc_fence {
>> +	struct dma_fence fence;
>> +	struct list_head link;
>> +	size_t size;
>> +	unsigned int id;
>> +};
>> +
>> +/* A varying but repeatable fake execution time */
>> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
>> +{
>> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
>> +}
> Is there any way we can avoid using time (and large number of
> iterations) here, while keeping the coverage?
> drm_suballoc have longest runtime out of all tests in DRM (taking ~60%
> of the whole DRM kunit execution, drm_mm being the second and taking
> ~35%, without those two suites DRM tests execute in milliseconds rather
> than tens of seconds),
> Building test cases in a way that operate on time basis makes it tricky
> to optimize the runtime.
> If we extract various parameters from modparams to separate test cases,
> it's going to get even worse.

This is intended to mimic the behaviour of different rings / engines 
using the same suballocator but with different typical batch-buffer 
execution time, causing suballocator fragmentation. TBH I haven't 
thought much about test execution time here so I can take a look at 
improving that. Also if time-based becomes an issue what's important to 
maintain coverage is the order in which the fences are signaled, and 
also that we are able to drain the suballocator completely. However, I'm 
a bit afraid that trying to achieve that in other ways may complicate 
the test even more.



>
>> +
>> +/*
>> + * Launch from a work item to decrease the likelyhood of the timer expiry
>> + * callback getting called from the allocating cpu.
>> + * We want to trigger cache-line bouncing between allocating and signalling
>> + * cpus.
>> + */
>> +static void ring_launch_timer_work(struct work_struct *work)
>> +{
>> +	struct suballoc_ring *ring =
>> +		container_of(work, typeof(*ring), work);
>> +
>> +	spin_lock_irq(&ring->lock);
>> +	if (ring->active)
>> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
>> +				       100ULL * NSEC_PER_USEC,
>> +				       HRTIMER_MODE_REL_PINNED);
>> +
>> +	spin_unlock_irq(&ring->lock);
>> +}
>> +
>> +/*
>> + * Signal an active fence and pull the next off the list if any and make it
>> + * active.
>> + */
>> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
>> +{
>> +	struct suballoc_ring *ring =
>> +		container_of(hrtimer, typeof(*ring), hrtimer);
>> +	struct suballoc_fence *sfence;
>> +	ktime_t now, then;
>> +	unsigned long irqflags;
>> +
>> +	spin_lock_irqsave(&ring->lock, irqflags);
>> +	sfence = ring->active;
>> +
>> +	if (sfence) {
>> +		struct dma_fence *fence = &sfence->fence;
>> +
>> +		if (sfence->id != get_cpu())
>> +			atomic_inc(&other_id);
>> +		put_cpu();
>> +
>> +		then = ktime_get();
>> +		dma_fence_signal(fence);
>> +		now = ktime_get();
>> +		dma_fence_put(fence);
>> +		ring->signal_time = ktime_add(ring->signal_time,
>> +					      ktime_sub(now, then));
>> +		ring->active = NULL;
>> +		atomic64_add(sfence->size, &free_space);
>> +	}
>> +
>> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
>> +	if (sfence) {
>> +		list_del_init(&sfence->link);
>> +		ring->active = sfence;
>> +		spin_unlock_irqrestore(&ring->lock, irqflags);
>> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
>> +		return HRTIMER_RESTART;
>> +	}
>> +	spin_unlock_irqrestore(&ring->lock, irqflags);
>> +
>> +	return HRTIMER_NORESTART;
>> +}
>> +
>> +/*
>> + * Queue a fence on a ring and if it's the first fence, make it active.
>> + */
>> +static void ring_add_fence(struct suballoc_ring *ring,
>> +			   struct suballoc_fence *sfence)
>> +{
>> +	spin_lock_irq(&ring->lock);
>> +	if (!ring->active) {
>> +		ring->active = sfence;
>> +		queue_work(system_unbound_wq, &ring->work);
>> +	} else {
>> +		list_add_tail(&sfence->link, &ring->list);
>> +	}
>> +	spin_unlock_irq(&ring->lock);
>> +}
>> +
>> +static void ring_init(struct suballoc_ring *ring)
>> +{
>> +	memset(ring, 0, sizeof(*ring));
>> +	INIT_LIST_HEAD(&ring->list);
>> +	spin_lock_init(&ring->lock);
>> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +	ring->hrtimer.function = ring_hrtimer_expired;
>> +	INIT_WORK(&ring->work, ring_launch_timer_work);
>> +}
>> +
>> +static bool ring_idle(struct suballoc_ring *ring)
>> +{
>> +	bool tmp;
>> +
>> +	spin_lock_irq(&ring->lock);
>> +	tmp = !ring->active;
>> +	spin_unlock_irq(&ring->lock);
>> +
>> +	return tmp;
>> +}
>> +
>> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
>> +{
>> +	return "suballoc";
>> +}
>> +
>> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
>> +	.get_driver_name = dma_fence_get_suballoc_name,
>> +	.get_timeline_name = dma_fence_get_suballoc_name,
>> +};
>> +
>> +static DEFINE_SPINLOCK(sa_fence_lock);
>> +static ktime_t alloctime, freetime;
>> +
>> +static void drm_test_suballoc(struct kunit *test)
>> +{
>> +	struct suballoc_ring *rings;
>> +	struct drm_suballoc_manager sa_manager;
>> +	struct drm_suballoc *sa;
>> +	struct suballoc_fence *sfence;
>> +	struct dma_fence *fence;
>> +	ktime_t then, now, signaltime;
>> +	int i, ring, iter_tot = 0;
>> +	size_t size;
>> +	unsigned int align;
>> +	unsigned long long soffset;
>> +	gfp_t gfp;
>> +
>> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
>> +	if (!rings) {
>> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
>> +		return;
>> +	}
> KUNIT_ASSERT_NOT_NULL?
> Though we might want to implement a test-resource managed variant
> (kunit_kvmalloc_array) to not have to worry about lifecycle and freeing
> the resources.

Will fix this and the other similar issues you pointed out.

>
>> +
>> +	for (i = 0; i < num_rings; ++i)
>> +		ring_init(rings + i);
> With resource managed - rings could be allocated and initialized at
> .init(). We would then call the flush and wait at .exit(), and as a
> result, we would be able to use asserts in test body without worrying
> about leaking.
>
>> +
>> +	atomic64_set(&free_space, SA_SIZE);
>> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
> This could also be moved to .init()
>
>> +
>> +	if (from_reclaim)
>> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
>> +	else
>> +		gfp = GFP_KERNEL;
>> +
>> +	for (i = 0; i < iterations; ++i) {
>> +		ring = i % num_rings;
>> +		size = (ring + 1) * SZ_4K;
>> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
>> +
>> +		if (pre_throttle)
>> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
>> +				cpu_relax();
>> +
>> +		if (from_reclaim)
>> +			fs_reclaim_acquire(GFP_KERNEL);
>> +
>> +		then = ktime_get();
>> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
>> +		now = ktime_get();
>> +		if (from_reclaim)
>> +			fs_reclaim_release(GFP_KERNEL);
>> +
>> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
>> +
>> +		iter_tot++;
>> +		if (IS_ERR(sa)) {
> KUNIT_ASSERT_NOT_ERR_OR_NULL?
>
>> +			if (from_reclaim) {
> drm_suballoc_new can fail for other reasons than -ENOMEM under memory
> pressure, while with from_reclaim we're treating all errors as a
> success, is that intentional?

No it's not. Good catch. Will fix.

Thanks, Thomas



>
>> +				iter_tot--;
>> +				continue;
>> +			}
>> +
>> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
>> +				   sa);
>> +			break;
>> +		}
>> +
>> +		atomic64_sub(size, &free_space);
>> +		soffset = drm_suballoc_soffset(sa);
>> +		if (!IS_ALIGNED(soffset, align)) {
>> +			drm_suballoc_free(sa, NULL);
> Do we need to worry about calling free here? We shouldn't leak as long
> as we wait upon all fences, as drm_suballoc_manager_fini will do the
> clean up for us.
>
> KUNIT_EXPECT_TRUE_MSG(..., IS_ALIGNED(soffset, align), ...)?
>
>> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
>> +				   soffset, align, soffset & (align - 1));
>> +			break;
>> +		}
>> +
>> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
>> +			break;
>> +		}
> KUNIT_EXPECT_LE_MSG?
>
>> +
>> +		if (drm_suballoc_size(sa) < size ||
>> +		    drm_suballoc_size(sa) >= size + align) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Incorrect size.\n");
>> +			break;
>> +		}
> KUNIT_EXPECT_GE and KUNIT_EXPECT_LT?
>
>> +
>> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
>> +		if (unlikely(!sfence)) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
>> +			break;
>> +		}
> It looks like sfence is never released. kunit_kmalloc?
> KUNIT_ASSERT_NOT_NULL / KUNIT_ASSERT_NOT_ERR_OR_NULL?
>
>> +		fence = &sfence->fence;
>> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
>> +			       ring + 1,
>> +			       atomic64_inc_return(&rings[ring].seqno));
>> +		sfence->size = size;
>> +		sfence->id = get_cpu();
>> +		put_cpu();
>> +
>> +		ring_add_fence(rings + ring, sfence);
>> +
>> +		then = ktime_get();
>> +		drm_suballoc_free(sa, fence);
>> +		now = ktime_get();
>> +		freetime = ktime_add(freetime, ktime_sub(now, then));
>> +	}
>> +
>> +	signaltime = ktime_set(0, 0);
>> +	for (i = 0; i < num_rings; ++i) {
>> +		struct suballoc_ring *sring = &rings[i];
>> +
>> +		flush_work(&sring->work);
>> +		while (!ring_idle(sring))
>> +			schedule();
>> +		signaltime = ktime_add(signaltime, sring->signal_time);
>> +	}
> This (and drm_suballoc_manager_fini) could be moved to .exit()
>
>> +
>> +	kvfree(rings);
>> +
>> +	kunit_info(test, "signals on different processor: %d of %d\n",
>> +		   atomic_read(&other_id), iter_tot);
>> +	drm_suballoc_manager_fini(&sa_manager);
>> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(alloctime) / iter_tot);
>> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(freetime) / iter_tot);
>> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(signaltime) / iter_tot);
> Do we need those timings?
> If we do expect certain values (probably with some epsilon range), we
> should handle it as a separate test.
>
>> +
>> +	if (atomic64_read(&free_space) != SA_SIZE) {
>> +		kunit_warn(test, "Test sanity check failed.\n");
>> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
>> +			   (long long)atomic64_read(&free_space), SA_SIZE);
>> +	}
> If this is an error - let's add it as an "expect".
> Otherwise it's not printed if the test PASSes (unless we're running with
> raw output).
>
>> +}
>> +
>> +module_param(intr, bool, 0400);
>> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
> This should be a separate test case (or param to a test case), not a
> modparam.
>
>> +module_param(from_reclaim, bool, 0400);
>> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
> Same here.
>
>> +module_param(pre_throttle, bool, 0400);
>> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
>> +		 "before allocations.");
> And here.
>
>> +module_param(num_rings, uint, 0400);
>> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
>> +module_param(iterations, uint, 0400);
>> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
> Do we expect any difference in coverage for different number of rings /
> iterations? What's the relation here? Would it be possible to extract
> specific values (for which we expect different behavior) to separate
> testcases?
>
> -Michał
>
>> +
>> +static struct kunit_case drm_suballoc_tests[] = {
>> +	KUNIT_CASE(drm_test_suballoc),
>> +	{}
>> +};
>> +
>> +static struct kunit_suite drm_suballoc_test_suite = {
>> +	.name = "drm_suballoc",
>> +	.test_cases = drm_suballoc_tests,
>> +};
>> +
>> +kunit_test_suite(drm_suballoc_test_suite);
>> +
>> +MODULE_AUTHOR("Intel Corporation");
>> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
>> +MODULE_LICENSE("Dual MIT/GPL");
>> -- 
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-gfx] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-28  8:22     ` Thomas Hellström
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Hellström @ 2023-03-28  8:22 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx, Christian Koenig, dri-devel, intel-xe

Hi Michal,

Thanks for the review, see comments inline,

On 3/26/23 11:42, Michał Winiarski wrote:
> On Thu, Mar 02, 2023 at 09:34:22AM +0100, Thomas Hellström wrote:
>> Add a suballocator test to get some test coverage for the new drm
>> suballocator, and perform some basic timing (elapsed time).
>>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>>   drivers/gpu/drm/Kconfig                   |   1 +
>>   drivers/gpu/drm/tests/Makefile            |   3 +-
>>   drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>>   3 files changed, 359 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
>>
>> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>> index 8fbe57407c60..dced53723721 100644
>> --- a/drivers/gpu/drm/Kconfig
>> +++ b/drivers/gpu/drm/Kconfig
>> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>>   	select DRM_LIB_RANDOM
>>   	select DRM_KMS_HELPER
>>   	select DRM_BUDDY
>> +	select DRM_SUBALLOC_HELPER
>>   	select DRM_EXPORT_FOR_TESTS if m
>>   	select DRM_KUNIT_TEST_HELPERS
>>   	default KUNIT_ALL_TESTS
>> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
>> index bca726a8f483..c664944a48ab 100644
>> --- a/drivers/gpu/drm/tests/Makefile
>> +++ b/drivers/gpu/drm/tests/Makefile
>> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>>   	drm_modes_test.o \
>>   	drm_plane_helper_test.o \
>>   	drm_probe_helper_test.o \
>> -	drm_rect_test.o
>> +	drm_rect_test.o \
>> +	drm_suballoc_test.o
>>   
>>   CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
>> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
>> new file mode 100644
>> index 000000000000..e7303a5505a0
>> --- /dev/null
>> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
>> @@ -0,0 +1,356 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>> +/*
>> + * Test case for the drm_suballoc suballocator manager
>> + * Copyright 2023 Intel Corporation.
>> + */
>> +
>> +#include <kunit/test.h>
>> +
>> +#include <linux/dma-fence.h>
>> +#include <linux/ktime.h>
>> +#include <linux/hrtimer.h>
>> +#include <linux/sizes.h>
>> +#include <linux/slab.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/delay.h>
>> +#include <drm/drm_suballoc.h>
>> +
>> +#define SA_ITERATIONS 10000
>> +#define SA_SIZE SZ_1M
>> +#define SA_DEFAULT_ALIGN SZ_4K
>> +
>> +static bool intr = true;
>> +static bool from_reclaim;
>> +static bool pre_throttle;
>> +static unsigned int num_rings = 4;
>> +static unsigned int iterations = SA_ITERATIONS;
>> +
>> +static atomic64_t free_space;
>> +
>> +static atomic_t other_id;
>> +
>> +struct suballoc_fence;
>> +
>> +/**
>> + * struct suballoc_ring - fake gpu engine.
>> + * @list: List of fences to signal.
>> + * @signal_time: Accumulated fence signal execution time.
>> + * @lock: Protects the suballoc ring members. hardirq safe.
>> + * @hrtimer: Fake execution time timer.
>> + * @active: The currently active fence for which we have pending work or a
>> + *          timer running.
>> + * @seqno: Fence submissin seqno.
>> + * @idx: Index for calculation of fake execution time.
>> + * @work: Work struct used solely to move the timer start to a different
>> + *        processor than that used for submission.
>> + */
>> +struct suballoc_ring {
>> +	ktime_t signal_time;
>> +	struct list_head list;
>> +	/* Protect the ring processing. */
>> +	spinlock_t lock;
>> +	struct hrtimer hrtimer;
>> +	struct suballoc_fence *active;
>> +	atomic64_t seqno;
>> +	u32 idx;
>> +	struct work_struct work;
>> +};
>> +
>> +/**
>> + * struct suballoc_fence - Hrtimer-driven fence.
>> + * @fence: The base class fence struct.
>> + * @link: Link for the ring's fence list.
>> + * @size: The size of the suballocator range associated with this fence.
>> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
>> + */
>> +struct suballoc_fence {
>> +	struct dma_fence fence;
>> +	struct list_head link;
>> +	size_t size;
>> +	unsigned int id;
>> +};
>> +
>> +/* A varying but repeatable fake execution time */
>> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
>> +{
>> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
>> +}
> Is there any way we can avoid using time (and large number of
> iterations) here, while keeping the coverage?
> drm_suballoc have longest runtime out of all tests in DRM (taking ~60%
> of the whole DRM kunit execution, drm_mm being the second and taking
> ~35%, without those two suites DRM tests execute in milliseconds rather
> than tens of seconds),
> Building test cases in a way that operate on time basis makes it tricky
> to optimize the runtime.
> If we extract various parameters from modparams to separate test cases,
> it's going to get even worse.

This is intended to mimic the behaviour of different rings / engines 
using the same suballocator but with different typical batch-buffer 
execution time, causing suballocator fragmentation. TBH I haven't 
thought much about test execution time here so I can take a look at 
improving that. Also if time-based becomes an issue what's important to 
maintain coverage is the order in which the fences are signaled, and 
also that we are able to drain the suballocator completely. However, I'm 
a bit afraid that trying to achieve that in other ways may complicate 
the test even more.



>
>> +
>> +/*
>> + * Launch from a work item to decrease the likelyhood of the timer expiry
>> + * callback getting called from the allocating cpu.
>> + * We want to trigger cache-line bouncing between allocating and signalling
>> + * cpus.
>> + */
>> +static void ring_launch_timer_work(struct work_struct *work)
>> +{
>> +	struct suballoc_ring *ring =
>> +		container_of(work, typeof(*ring), work);
>> +
>> +	spin_lock_irq(&ring->lock);
>> +	if (ring->active)
>> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
>> +				       100ULL * NSEC_PER_USEC,
>> +				       HRTIMER_MODE_REL_PINNED);
>> +
>> +	spin_unlock_irq(&ring->lock);
>> +}
>> +
>> +/*
>> + * Signal an active fence and pull the next off the list if any and make it
>> + * active.
>> + */
>> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
>> +{
>> +	struct suballoc_ring *ring =
>> +		container_of(hrtimer, typeof(*ring), hrtimer);
>> +	struct suballoc_fence *sfence;
>> +	ktime_t now, then;
>> +	unsigned long irqflags;
>> +
>> +	spin_lock_irqsave(&ring->lock, irqflags);
>> +	sfence = ring->active;
>> +
>> +	if (sfence) {
>> +		struct dma_fence *fence = &sfence->fence;
>> +
>> +		if (sfence->id != get_cpu())
>> +			atomic_inc(&other_id);
>> +		put_cpu();
>> +
>> +		then = ktime_get();
>> +		dma_fence_signal(fence);
>> +		now = ktime_get();
>> +		dma_fence_put(fence);
>> +		ring->signal_time = ktime_add(ring->signal_time,
>> +					      ktime_sub(now, then));
>> +		ring->active = NULL;
>> +		atomic64_add(sfence->size, &free_space);
>> +	}
>> +
>> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
>> +	if (sfence) {
>> +		list_del_init(&sfence->link);
>> +		ring->active = sfence;
>> +		spin_unlock_irqrestore(&ring->lock, irqflags);
>> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
>> +		return HRTIMER_RESTART;
>> +	}
>> +	spin_unlock_irqrestore(&ring->lock, irqflags);
>> +
>> +	return HRTIMER_NORESTART;
>> +}
>> +
>> +/*
>> + * Queue a fence on a ring and if it's the first fence, make it active.
>> + */
>> +static void ring_add_fence(struct suballoc_ring *ring,
>> +			   struct suballoc_fence *sfence)
>> +{
>> +	spin_lock_irq(&ring->lock);
>> +	if (!ring->active) {
>> +		ring->active = sfence;
>> +		queue_work(system_unbound_wq, &ring->work);
>> +	} else {
>> +		list_add_tail(&sfence->link, &ring->list);
>> +	}
>> +	spin_unlock_irq(&ring->lock);
>> +}
>> +
>> +static void ring_init(struct suballoc_ring *ring)
>> +{
>> +	memset(ring, 0, sizeof(*ring));
>> +	INIT_LIST_HEAD(&ring->list);
>> +	spin_lock_init(&ring->lock);
>> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +	ring->hrtimer.function = ring_hrtimer_expired;
>> +	INIT_WORK(&ring->work, ring_launch_timer_work);
>> +}
>> +
>> +static bool ring_idle(struct suballoc_ring *ring)
>> +{
>> +	bool tmp;
>> +
>> +	spin_lock_irq(&ring->lock);
>> +	tmp = !ring->active;
>> +	spin_unlock_irq(&ring->lock);
>> +
>> +	return tmp;
>> +}
>> +
>> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
>> +{
>> +	return "suballoc";
>> +}
>> +
>> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
>> +	.get_driver_name = dma_fence_get_suballoc_name,
>> +	.get_timeline_name = dma_fence_get_suballoc_name,
>> +};
>> +
>> +static DEFINE_SPINLOCK(sa_fence_lock);
>> +static ktime_t alloctime, freetime;
>> +
>> +static void drm_test_suballoc(struct kunit *test)
>> +{
>> +	struct suballoc_ring *rings;
>> +	struct drm_suballoc_manager sa_manager;
>> +	struct drm_suballoc *sa;
>> +	struct suballoc_fence *sfence;
>> +	struct dma_fence *fence;
>> +	ktime_t then, now, signaltime;
>> +	int i, ring, iter_tot = 0;
>> +	size_t size;
>> +	unsigned int align;
>> +	unsigned long long soffset;
>> +	gfp_t gfp;
>> +
>> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
>> +	if (!rings) {
>> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
>> +		return;
>> +	}
> KUNIT_ASSERT_NOT_NULL?
> Though we might want to implement a test-resource managed variant
> (kunit_kvmalloc_array) to not have to worry about lifecycle and freeing
> the resources.

Will fix this and the other similar issues you pointed out.

>
>> +
>> +	for (i = 0; i < num_rings; ++i)
>> +		ring_init(rings + i);
> With resource managed - rings could be allocated and initialized at
> .init(). We would then call the flush and wait at .exit(), and as a
> result, we would be able to use asserts in test body without worrying
> about leaking.
>
>> +
>> +	atomic64_set(&free_space, SA_SIZE);
>> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
> This could also be moved to .init()
>
>> +
>> +	if (from_reclaim)
>> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
>> +	else
>> +		gfp = GFP_KERNEL;
>> +
>> +	for (i = 0; i < iterations; ++i) {
>> +		ring = i % num_rings;
>> +		size = (ring + 1) * SZ_4K;
>> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
>> +
>> +		if (pre_throttle)
>> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
>> +				cpu_relax();
>> +
>> +		if (from_reclaim)
>> +			fs_reclaim_acquire(GFP_KERNEL);
>> +
>> +		then = ktime_get();
>> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
>> +		now = ktime_get();
>> +		if (from_reclaim)
>> +			fs_reclaim_release(GFP_KERNEL);
>> +
>> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
>> +
>> +		iter_tot++;
>> +		if (IS_ERR(sa)) {
> KUNIT_ASSERT_NOT_ERR_OR_NULL?
>
>> +			if (from_reclaim) {
> drm_suballoc_new can fail for other reasons than -ENOMEM under memory
> pressure, while with from_reclaim we're treating all errors as a
> success, is that intentional?

No it's not. Good catch. Will fix.

Thanks, Thomas



>
>> +				iter_tot--;
>> +				continue;
>> +			}
>> +
>> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
>> +				   sa);
>> +			break;
>> +		}
>> +
>> +		atomic64_sub(size, &free_space);
>> +		soffset = drm_suballoc_soffset(sa);
>> +		if (!IS_ALIGNED(soffset, align)) {
>> +			drm_suballoc_free(sa, NULL);
> Do we need to worry about calling free here? We shouldn't leak as long
> as we wait upon all fences, as drm_suballoc_manager_fini will do the
> clean up for us.
>
> KUNIT_EXPECT_TRUE_MSG(..., IS_ALIGNED(soffset, align), ...)?
>
>> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
>> +				   soffset, align, soffset & (align - 1));
>> +			break;
>> +		}
>> +
>> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
>> +			break;
>> +		}
> KUNIT_EXPECT_LE_MSG?
>
>> +
>> +		if (drm_suballoc_size(sa) < size ||
>> +		    drm_suballoc_size(sa) >= size + align) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Incorrect size.\n");
>> +			break;
>> +		}
> KUNIT_EXPECT_GE and KUNIT_EXPECT_LT?
>
>> +
>> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
>> +		if (unlikely(!sfence)) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
>> +			break;
>> +		}
> It looks like sfence is never released. kunit_kmalloc?
> KUNIT_ASSERT_NOT_NULL / KUNIT_ASSERT_NOT_ERR_OR_NULL?
>
>> +		fence = &sfence->fence;
>> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
>> +			       ring + 1,
>> +			       atomic64_inc_return(&rings[ring].seqno));
>> +		sfence->size = size;
>> +		sfence->id = get_cpu();
>> +		put_cpu();
>> +
>> +		ring_add_fence(rings + ring, sfence);
>> +
>> +		then = ktime_get();
>> +		drm_suballoc_free(sa, fence);
>> +		now = ktime_get();
>> +		freetime = ktime_add(freetime, ktime_sub(now, then));
>> +	}
>> +
>> +	signaltime = ktime_set(0, 0);
>> +	for (i = 0; i < num_rings; ++i) {
>> +		struct suballoc_ring *sring = &rings[i];
>> +
>> +		flush_work(&sring->work);
>> +		while (!ring_idle(sring))
>> +			schedule();
>> +		signaltime = ktime_add(signaltime, sring->signal_time);
>> +	}
> This (and drm_suballoc_manager_fini) could be moved to .exit()
>
>> +
>> +	kvfree(rings);
>> +
>> +	kunit_info(test, "signals on different processor: %d of %d\n",
>> +		   atomic_read(&other_id), iter_tot);
>> +	drm_suballoc_manager_fini(&sa_manager);
>> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(alloctime) / iter_tot);
>> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(freetime) / iter_tot);
>> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(signaltime) / iter_tot);
> Do we need those timings?
> If we do expect certain values (probably with some epsilon range), we
> should handle it as a separate test.
>
>> +
>> +	if (atomic64_read(&free_space) != SA_SIZE) {
>> +		kunit_warn(test, "Test sanity check failed.\n");
>> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
>> +			   (long long)atomic64_read(&free_space), SA_SIZE);
>> +	}
> If this is an error - let's add it as an "expect".
> Otherwise it's not printed if the test PASSes (unless we're running with
> raw output).
>
>> +}
>> +
>> +module_param(intr, bool, 0400);
>> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
> This should be a separate test case (or param to a test case), not a
> modparam.
>
>> +module_param(from_reclaim, bool, 0400);
>> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
> Same here.
>
>> +module_param(pre_throttle, bool, 0400);
>> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
>> +		 "before allocations.");
> And here.
>
>> +module_param(num_rings, uint, 0400);
>> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
>> +module_param(iterations, uint, 0400);
>> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
> Do we expect any difference in coverage for different number of rings /
> iterations? What's the relation here? Would it be possible to extract
> specific values (for which we expect different behavior) to separate
> testcases?
>
> -Michał
>
>> +
>> +static struct kunit_case drm_suballoc_tests[] = {
>> +	KUNIT_CASE(drm_test_suballoc),
>> +	{}
>> +};
>> +
>> +static struct kunit_suite drm_suballoc_test_suite = {
>> +	.name = "drm_suballoc",
>> +	.test_cases = drm_suballoc_tests,
>> +};
>> +
>> +kunit_test_suite(drm_suballoc_test_suite);
>> +
>> +MODULE_AUTHOR("Intel Corporation");
>> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
>> +MODULE_LICENSE("Dual MIT/GPL");
>> -- 
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [Intel-xe] [PATCH RESEND] drm/tests: Suballocator test
@ 2023-03-28  8:22     ` Thomas Hellström
  0 siblings, 0 replies; 21+ messages in thread
From: Thomas Hellström @ 2023-03-28  8:22 UTC (permalink / raw)
  To: Michał Winiarski; +Cc: intel-gfx, Christian Koenig, dri-devel, intel-xe

Hi Michal,

Thanks for the review, see comments inline,

On 3/26/23 11:42, Michał Winiarski wrote:
> On Thu, Mar 02, 2023 at 09:34:22AM +0100, Thomas Hellström wrote:
>> Add a suballocator test to get some test coverage for the new drm
>> suballocator, and perform some basic timing (elapsed time).
>>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> ---
>>   drivers/gpu/drm/Kconfig                   |   1 +
>>   drivers/gpu/drm/tests/Makefile            |   3 +-
>>   drivers/gpu/drm/tests/drm_suballoc_test.c | 356 ++++++++++++++++++++++
>>   3 files changed, 359 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/gpu/drm/tests/drm_suballoc_test.c
>>
>> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
>> index 8fbe57407c60..dced53723721 100644
>> --- a/drivers/gpu/drm/Kconfig
>> +++ b/drivers/gpu/drm/Kconfig
>> @@ -78,6 +78,7 @@ config DRM_KUNIT_TEST
>>   	select DRM_LIB_RANDOM
>>   	select DRM_KMS_HELPER
>>   	select DRM_BUDDY
>> +	select DRM_SUBALLOC_HELPER
>>   	select DRM_EXPORT_FOR_TESTS if m
>>   	select DRM_KUNIT_TEST_HELPERS
>>   	default KUNIT_ALL_TESTS
>> diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
>> index bca726a8f483..c664944a48ab 100644
>> --- a/drivers/gpu/drm/tests/Makefile
>> +++ b/drivers/gpu/drm/tests/Makefile
>> @@ -17,6 +17,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
>>   	drm_modes_test.o \
>>   	drm_plane_helper_test.o \
>>   	drm_probe_helper_test.o \
>> -	drm_rect_test.o
>> +	drm_rect_test.o \
>> +	drm_suballoc_test.o
>>   
>>   CFLAGS_drm_mm_test.o := $(DISABLE_STRUCTLEAK_PLUGIN)
>> diff --git a/drivers/gpu/drm/tests/drm_suballoc_test.c b/drivers/gpu/drm/tests/drm_suballoc_test.c
>> new file mode 100644
>> index 000000000000..e7303a5505a0
>> --- /dev/null
>> +++ b/drivers/gpu/drm/tests/drm_suballoc_test.c
>> @@ -0,0 +1,356 @@
>> +// SPDX-License-Identifier: GPL-2.0 OR MIT
>> +/*
>> + * Test case for the drm_suballoc suballocator manager
>> + * Copyright 2023 Intel Corporation.
>> + */
>> +
>> +#include <kunit/test.h>
>> +
>> +#include <linux/dma-fence.h>
>> +#include <linux/ktime.h>
>> +#include <linux/hrtimer.h>
>> +#include <linux/sizes.h>
>> +#include <linux/slab.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/delay.h>
>> +#include <drm/drm_suballoc.h>
>> +
>> +#define SA_ITERATIONS 10000
>> +#define SA_SIZE SZ_1M
>> +#define SA_DEFAULT_ALIGN SZ_4K
>> +
>> +static bool intr = true;
>> +static bool from_reclaim;
>> +static bool pre_throttle;
>> +static unsigned int num_rings = 4;
>> +static unsigned int iterations = SA_ITERATIONS;
>> +
>> +static atomic64_t free_space;
>> +
>> +static atomic_t other_id;
>> +
>> +struct suballoc_fence;
>> +
>> +/**
>> + * struct suballoc_ring - fake gpu engine.
>> + * @list: List of fences to signal.
>> + * @signal_time: Accumulated fence signal execution time.
>> + * @lock: Protects the suballoc ring members. hardirq safe.
>> + * @hrtimer: Fake execution time timer.
>> + * @active: The currently active fence for which we have pending work or a
>> + *          timer running.
>> + * @seqno: Fence submissin seqno.
>> + * @idx: Index for calculation of fake execution time.
>> + * @work: Work struct used solely to move the timer start to a different
>> + *        processor than that used for submission.
>> + */
>> +struct suballoc_ring {
>> +	ktime_t signal_time;
>> +	struct list_head list;
>> +	/* Protect the ring processing. */
>> +	spinlock_t lock;
>> +	struct hrtimer hrtimer;
>> +	struct suballoc_fence *active;
>> +	atomic64_t seqno;
>> +	u32 idx;
>> +	struct work_struct work;
>> +};
>> +
>> +/**
>> + * struct suballoc_fence - Hrtimer-driven fence.
>> + * @fence: The base class fence struct.
>> + * @link: Link for the ring's fence list.
>> + * @size: The size of the suballocator range associated with this fence.
>> + * @id: Cpu id likely used by the submission thread for suballoc allocation.
>> + */
>> +struct suballoc_fence {
>> +	struct dma_fence fence;
>> +	struct list_head link;
>> +	size_t size;
>> +	unsigned int id;
>> +};
>> +
>> +/* A varying but repeatable fake execution time */
>> +static ktime_t ring_next_delay(struct suballoc_ring *ring)
>> +{
>> +	return ns_to_ktime((u64)(++ring->idx % 8) * 200 * NSEC_PER_USEC);
>> +}
> Is there any way we can avoid using time (and large number of
> iterations) here, while keeping the coverage?
> drm_suballoc have longest runtime out of all tests in DRM (taking ~60%
> of the whole DRM kunit execution, drm_mm being the second and taking
> ~35%, without those two suites DRM tests execute in milliseconds rather
> than tens of seconds),
> Building test cases in a way that operate on time basis makes it tricky
> to optimize the runtime.
> If we extract various parameters from modparams to separate test cases,
> it's going to get even worse.

This is intended to mimic the behaviour of different rings / engines 
using the same suballocator but with different typical batch-buffer 
execution time, causing suballocator fragmentation. TBH I haven't 
thought much about test execution time here so I can take a look at 
improving that. Also if time-based becomes an issue what's important to 
maintain coverage is the order in which the fences are signaled, and 
also that we are able to drain the suballocator completely. However, I'm 
a bit afraid that trying to achieve that in other ways may complicate 
the test even more.



>
>> +
>> +/*
>> + * Launch from a work item to decrease the likelyhood of the timer expiry
>> + * callback getting called from the allocating cpu.
>> + * We want to trigger cache-line bouncing between allocating and signalling
>> + * cpus.
>> + */
>> +static void ring_launch_timer_work(struct work_struct *work)
>> +{
>> +	struct suballoc_ring *ring =
>> +		container_of(work, typeof(*ring), work);
>> +
>> +	spin_lock_irq(&ring->lock);
>> +	if (ring->active)
>> +		hrtimer_start_range_ns(&ring->hrtimer, ring_next_delay(ring),
>> +				       100ULL * NSEC_PER_USEC,
>> +				       HRTIMER_MODE_REL_PINNED);
>> +
>> +	spin_unlock_irq(&ring->lock);
>> +}
>> +
>> +/*
>> + * Signal an active fence and pull the next off the list if any and make it
>> + * active.
>> + */
>> +static enum hrtimer_restart ring_hrtimer_expired(struct hrtimer *hrtimer)
>> +{
>> +	struct suballoc_ring *ring =
>> +		container_of(hrtimer, typeof(*ring), hrtimer);
>> +	struct suballoc_fence *sfence;
>> +	ktime_t now, then;
>> +	unsigned long irqflags;
>> +
>> +	spin_lock_irqsave(&ring->lock, irqflags);
>> +	sfence = ring->active;
>> +
>> +	if (sfence) {
>> +		struct dma_fence *fence = &sfence->fence;
>> +
>> +		if (sfence->id != get_cpu())
>> +			atomic_inc(&other_id);
>> +		put_cpu();
>> +
>> +		then = ktime_get();
>> +		dma_fence_signal(fence);
>> +		now = ktime_get();
>> +		dma_fence_put(fence);
>> +		ring->signal_time = ktime_add(ring->signal_time,
>> +					      ktime_sub(now, then));
>> +		ring->active = NULL;
>> +		atomic64_add(sfence->size, &free_space);
>> +	}
>> +
>> +	sfence = list_first_entry_or_null(&ring->list, typeof(*sfence), link);
>> +	if (sfence) {
>> +		list_del_init(&sfence->link);
>> +		ring->active = sfence;
>> +		spin_unlock_irqrestore(&ring->lock, irqflags);
>> +		hrtimer_forward_now(&ring->hrtimer, ring_next_delay(ring));
>> +		return HRTIMER_RESTART;
>> +	}
>> +	spin_unlock_irqrestore(&ring->lock, irqflags);
>> +
>> +	return HRTIMER_NORESTART;
>> +}
>> +
>> +/*
>> + * Queue a fence on a ring and if it's the first fence, make it active.
>> + */
>> +static void ring_add_fence(struct suballoc_ring *ring,
>> +			   struct suballoc_fence *sfence)
>> +{
>> +	spin_lock_irq(&ring->lock);
>> +	if (!ring->active) {
>> +		ring->active = sfence;
>> +		queue_work(system_unbound_wq, &ring->work);
>> +	} else {
>> +		list_add_tail(&sfence->link, &ring->list);
>> +	}
>> +	spin_unlock_irq(&ring->lock);
>> +}
>> +
>> +static void ring_init(struct suballoc_ring *ring)
>> +{
>> +	memset(ring, 0, sizeof(*ring));
>> +	INIT_LIST_HEAD(&ring->list);
>> +	spin_lock_init(&ring->lock);
>> +	hrtimer_init(&ring->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +	ring->hrtimer.function = ring_hrtimer_expired;
>> +	INIT_WORK(&ring->work, ring_launch_timer_work);
>> +}
>> +
>> +static bool ring_idle(struct suballoc_ring *ring)
>> +{
>> +	bool tmp;
>> +
>> +	spin_lock_irq(&ring->lock);
>> +	tmp = !ring->active;
>> +	spin_unlock_irq(&ring->lock);
>> +
>> +	return tmp;
>> +}
>> +
>> +static const char *dma_fence_get_suballoc_name(struct dma_fence *fence)
>> +{
>> +	return "suballoc";
>> +}
>> +
>> +static const struct dma_fence_ops dma_fence_suballoc_ops = {
>> +	.get_driver_name = dma_fence_get_suballoc_name,
>> +	.get_timeline_name = dma_fence_get_suballoc_name,
>> +};
>> +
>> +static DEFINE_SPINLOCK(sa_fence_lock);
>> +static ktime_t alloctime, freetime;
>> +
>> +static void drm_test_suballoc(struct kunit *test)
>> +{
>> +	struct suballoc_ring *rings;
>> +	struct drm_suballoc_manager sa_manager;
>> +	struct drm_suballoc *sa;
>> +	struct suballoc_fence *sfence;
>> +	struct dma_fence *fence;
>> +	ktime_t then, now, signaltime;
>> +	int i, ring, iter_tot = 0;
>> +	size_t size;
>> +	unsigned int align;
>> +	unsigned long long soffset;
>> +	gfp_t gfp;
>> +
>> +	rings = kvmalloc_array(num_rings, sizeof(*rings), GFP_KERNEL);
>> +	if (!rings) {
>> +		KUNIT_FAIL(test, "Failed allocating %u rings.\n");
>> +		return;
>> +	}
> KUNIT_ASSERT_NOT_NULL?
> Though we might want to implement a test-resource managed variant
> (kunit_kvmalloc_array) to not have to worry about lifecycle and freeing
> the resources.

Will fix this and the other similar issues you pointed out.

>
>> +
>> +	for (i = 0; i < num_rings; ++i)
>> +		ring_init(rings + i);
> With resource managed - rings could be allocated and initialized at
> .init(). We would then call the flush and wait at .exit(), and as a
> result, we would be able to use asserts in test body without worrying
> about leaking.
>
>> +
>> +	atomic64_set(&free_space, SA_SIZE);
>> +	drm_suballoc_manager_init(&sa_manager, SA_SIZE, SA_DEFAULT_ALIGN);
> This could also be moved to .init()
>
>> +
>> +	if (from_reclaim)
>> +		gfp = GFP_NOWAIT | __GFP_NOWARN;
>> +	else
>> +		gfp = GFP_KERNEL;
>> +
>> +	for (i = 0; i < iterations; ++i) {
>> +		ring = i % num_rings;
>> +		size = (ring + 1) * SZ_4K;
>> +		align = 1 << (ring % const_ilog2(SA_DEFAULT_ALIGN));
>> +
>> +		if (pre_throttle)
>> +			while (atomic64_read(&free_space) < SA_SIZE / 2)
>> +				cpu_relax();
>> +
>> +		if (from_reclaim)
>> +			fs_reclaim_acquire(GFP_KERNEL);
>> +
>> +		then = ktime_get();
>> +		sa = drm_suballoc_new(&sa_manager, size, gfp, intr, align);
>> +		now = ktime_get();
>> +		if (from_reclaim)
>> +			fs_reclaim_release(GFP_KERNEL);
>> +
>> +		alloctime = ktime_add(alloctime, ktime_sub(now, then));
>> +
>> +		iter_tot++;
>> +		if (IS_ERR(sa)) {
> KUNIT_ASSERT_NOT_ERR_OR_NULL?
>
>> +			if (from_reclaim) {
> drm_suballoc_new can fail for other reasons than -ENOMEM under memory
> pressure, while with from_reclaim we're treating all errors as a
> success, is that intentional?

No it's not. Good catch. Will fix.

Thanks, Thomas



>
>> +				iter_tot--;
>> +				continue;
>> +			}
>> +
>> +			KUNIT_FAIL(test, "drm_suballoc_new() returned %pe\n",
>> +				   sa);
>> +			break;
>> +		}
>> +
>> +		atomic64_sub(size, &free_space);
>> +		soffset = drm_suballoc_soffset(sa);
>> +		if (!IS_ALIGNED(soffset, align)) {
>> +			drm_suballoc_free(sa, NULL);
> Do we need to worry about calling free here? We shouldn't leak as long
> as we wait upon all fences, as drm_suballoc_manager_fini will do the
> clean up for us.
>
> KUNIT_EXPECT_TRUE_MSG(..., IS_ALIGNED(soffset, align), ...)?
>
>> +			KUNIT_FAIL(test, "Incorrect alignment: offset %llu align %u rem %llu\n",
>> +				   soffset, align, soffset & (align - 1));
>> +			break;
>> +		}
>> +
>> +		if (drm_suballoc_eoffset(sa) > SA_SIZE) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Allocation beyond end.\n");
>> +			break;
>> +		}
> KUNIT_EXPECT_LE_MSG?
>
>> +
>> +		if (drm_suballoc_size(sa) < size ||
>> +		    drm_suballoc_size(sa) >= size + align) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Incorrect size.\n");
>> +			break;
>> +		}
> KUNIT_EXPECT_GE and KUNIT_EXPECT_LT?
>
>> +
>> +		sfence = kmalloc(sizeof(*sfence), GFP_KERNEL);
>> +		if (unlikely(!sfence)) {
>> +			drm_suballoc_free(sa, NULL);
>> +			KUNIT_FAIL(test, "Fence allocation failed.\n");
>> +			break;
>> +		}
> It looks like sfence is never released. kunit_kmalloc?
> KUNIT_ASSERT_NOT_NULL / KUNIT_ASSERT_NOT_ERR_OR_NULL?
>
>> +		fence = &sfence->fence;
>> +		dma_fence_init(fence, &dma_fence_suballoc_ops, &sa_fence_lock,
>> +			       ring + 1,
>> +			       atomic64_inc_return(&rings[ring].seqno));
>> +		sfence->size = size;
>> +		sfence->id = get_cpu();
>> +		put_cpu();
>> +
>> +		ring_add_fence(rings + ring, sfence);
>> +
>> +		then = ktime_get();
>> +		drm_suballoc_free(sa, fence);
>> +		now = ktime_get();
>> +		freetime = ktime_add(freetime, ktime_sub(now, then));
>> +	}
>> +
>> +	signaltime = ktime_set(0, 0);
>> +	for (i = 0; i < num_rings; ++i) {
>> +		struct suballoc_ring *sring = &rings[i];
>> +
>> +		flush_work(&sring->work);
>> +		while (!ring_idle(sring))
>> +			schedule();
>> +		signaltime = ktime_add(signaltime, sring->signal_time);
>> +	}
> This (and drm_suballoc_manager_fini) could be moved to .exit()
>
>> +
>> +	kvfree(rings);
>> +
>> +	kunit_info(test, "signals on different processor: %d of %d\n",
>> +		   atomic_read(&other_id), iter_tot);
>> +	drm_suballoc_manager_fini(&sa_manager);
>> +	kunit_info(test, "Alloc time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(alloctime) / iter_tot);
>> +	kunit_info(test, "Free time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(freetime) / iter_tot);
>> +	kunit_info(test, "Signal time was %llu ns.\n", (unsigned long long)
>> +		   ktime_to_ns(signaltime) / iter_tot);
> Do we need those timings?
> If we do expect certain values (probably with some epsilon range), we
> should handle it as a separate test.
>
>> +
>> +	if (atomic64_read(&free_space) != SA_SIZE) {
>> +		kunit_warn(test, "Test sanity check failed.\n");
>> +		kunit_warn(test, "Space left at exit is %lld of %d\n",
>> +			   (long long)atomic64_read(&free_space), SA_SIZE);
>> +	}
> If this is an error - let's add it as an "expect".
> Otherwise it's not printed if the test PASSes (unless we're running with
> raw output).
>
>> +}
>> +
>> +module_param(intr, bool, 0400);
>> +MODULE_PARM_DESC(intr, "Whether to wait interruptible for space.");
> This should be a separate test case (or param to a test case), not a
> modparam.
>
>> +module_param(from_reclaim, bool, 0400);
>> +MODULE_PARM_DESC(from_reclaim, "Whether to suballocate from reclaim context.");
> Same here.
>
>> +module_param(pre_throttle, bool, 0400);
>> +MODULE_PARM_DESC(pre_throttle, "Whether to have the test throttle for space "
>> +		 "before allocations.");
> And here.
>
>> +module_param(num_rings, uint, 0400);
>> +MODULE_PARM_DESC(num_rings, "Number of rings signalling fences in order.\n");
>> +module_param(iterations, uint, 0400);
>> +MODULE_PARM_DESC(iterations, "Number of allocations to perform.\n");
> Do we expect any difference in coverage for different number of rings /
> iterations? What's the relation here? Would it be possible to extract
> specific values (for which we expect different behavior) to separate
> testcases?
>
> -Michał
>
>> +
>> +static struct kunit_case drm_suballoc_tests[] = {
>> +	KUNIT_CASE(drm_test_suballoc),
>> +	{}
>> +};
>> +
>> +static struct kunit_suite drm_suballoc_test_suite = {
>> +	.name = "drm_suballoc",
>> +	.test_cases = drm_suballoc_tests,
>> +};
>> +
>> +kunit_test_suite(drm_suballoc_test_suite);
>> +
>> +MODULE_AUTHOR("Intel Corporation");
>> +MODULE_DESCRIPTION("DRM suballocator Kunit test");
>> +MODULE_LICENSE("Dual MIT/GPL");
>> -- 
>> 2.34.1
>>

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2023-03-28  8:22 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-02  8:34 [PATCH RESEND] drm/tests: Suballocator test Thomas Hellström
2023-03-02  8:34 ` [Intel-xe] " Thomas Hellström
2023-03-02  8:34 ` [Intel-gfx] " Thomas Hellström
2023-03-02  8:40 ` [Intel-xe] ✗ CI.Patch_applied: failure for drm/tests: Suballocator test (rev2) Patchwork
2023-03-02  9:09 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning " Patchwork
2023-03-02  9:20 ` [Intel-gfx] ✗ Fi.CI.BAT: failure " Patchwork
2023-03-02 13:19 ` [PATCH RESEND] drm/tests: Suballocator test Christian König
2023-03-02 13:19   ` [Intel-xe] " Christian König
2023-03-02 13:19   ` [Intel-gfx] " Christian König
2023-03-02 16:39 ` kernel test robot
2023-03-02 16:39   ` kernel test robot
2023-03-02 16:39   ` [Intel-xe] " kernel test robot
2023-03-02 17:13 ` kernel test robot
2023-03-02 17:13   ` kernel test robot
2023-03-02 17:13   ` [Intel-xe] " kernel test robot
2023-03-26  9:42 ` Michał Winiarski
2023-03-26  9:42   ` [Intel-xe] " Michał Winiarski
2023-03-26  9:42   ` [Intel-gfx] " Michał Winiarski
2023-03-28  8:22   ` Thomas Hellström
2023-03-28  8:22     ` [Intel-xe] " Thomas Hellström
2023-03-28  8:22     ` [Intel-gfx] " Thomas Hellström

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.