All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper
@ 2022-10-18  3:00 vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers vitaly.prosyak
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: vitaly.prosyak @ 2022-10-18  3:00 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 lib/amdgpu/amd_memory.c | 58 +++++++++++++++++++++++++++++++++++++++++
 lib/amdgpu/amd_memory.h |  6 +++++
 2 files changed, 64 insertions(+)

diff --git a/lib/amdgpu/amd_memory.c b/lib/amdgpu/amd_memory.c
index 344551fcc..93e72583b 100644
--- a/lib/amdgpu/amd_memory.c
+++ b/lib/amdgpu/amd_memory.c
@@ -193,6 +193,64 @@ error_va_alloc:
 	return r;
 }
 
+int
+amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
+			unsigned alignment, unsigned heap, uint64_t alloc_flags,
+			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
+			uint64_t *mc_address, amdgpu_va_handle *va_handle)
+{
+	struct amdgpu_bo_alloc_request request = {};
+	amdgpu_bo_handle buf_handle;
+	amdgpu_va_handle handle;
+	uint64_t vmc_addr;
+	int r;
+
+	request.alloc_size = size;
+	request.phys_alignment = alignment;
+	request.preferred_heap = heap;
+	request.flags = alloc_flags;
+
+	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
+	if (r)
+		return r;
+
+	r = amdgpu_va_range_alloc(dev,
+				  amdgpu_gpu_va_range_general,
+				  size, alignment, 0, &vmc_addr,
+				  &handle, 0);
+	if (r)
+		goto error_va_alloc;
+
+	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
+				   AMDGPU_VM_PAGE_READABLE |
+				   AMDGPU_VM_PAGE_WRITEABLE |
+				   AMDGPU_VM_PAGE_EXECUTABLE |
+				   mapping_flags,
+				   AMDGPU_VA_OP_MAP);
+	if (r)
+		goto error_va_map;
+
+	r = amdgpu_bo_cpu_map(buf_handle, cpu);
+	if (r)
+		goto error_cpu_map;
+
+	*bo = buf_handle;
+	*mc_address = vmc_addr;
+	*va_handle = handle;
+
+	return 0;
+
+ error_cpu_map:
+	amdgpu_bo_cpu_unmap(buf_handle);
+
+ error_va_map:
+	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
+
+ error_va_alloc:
+	amdgpu_bo_free(buf_handle);
+	return r;
+}
+
 /**
  *
  * @param bo
diff --git a/lib/amdgpu/amd_memory.h b/lib/amdgpu/amd_memory.h
index 80bf979f4..c84365933 100644
--- a/lib/amdgpu/amd_memory.h
+++ b/lib/amdgpu/amd_memory.h
@@ -55,6 +55,12 @@ amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
 			amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
 			amdgpu_va_handle *va_handle);
 
+int
+amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
+			unsigned alignment, unsigned heap, uint64_t alloc_flags,
+			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
+			uint64_t *mc_address, amdgpu_va_handle *va_handle);
+
 void
 amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
 			 uint64_t mc_addr, uint64_t size);
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers
  2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
@ 2022-10-18  3:00 ` vitaly.prosyak
  2022-10-18  7:30   ` Kamil Konieczny
  2022-10-18  3:00 ` [igt-dev] [PATCH 3/7] tests/amdgpu: add deadlock test for gfx, compute and sdma vitaly.prosyak
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 10+ messages in thread
From: vitaly.prosyak @ 2022-10-18  3:00 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

To validate amdgpu reset functionality.

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 lib/amdgpu/amd_deadlock_helpers.c | 260 ++++++++++++++++++++++++++++++
 lib/amdgpu/amd_deadlock_helpers.h |  34 ++++
 lib/meson.build                   |   3 +-
 3 files changed, 296 insertions(+), 1 deletion(-)
 create mode 100644 lib/amdgpu/amd_deadlock_helpers.c
 create mode 100644 lib/amdgpu/amd_deadlock_helpers.h

diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
new file mode 100644
index 000000000..c6528c6ad
--- /dev/null
+++ b/lib/amdgpu/amd_deadlock_helpers.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *  *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include <amdgpu.h>
+#include "amdgpu_drm.h"
+#include "amd_PM4.h"
+#include "amd_sdma.h"
+#include <unistd.h>
+#include <pthread.h>
+#include "amd_memory.h"
+#include "amd_deadlock_helpers.h"
+#include "amd_ip_blocks.h"
+
+static int use_uc_mtype = 0;
+
+static void *write_mem_address(void *data)
+{
+#define WRITE_MEM_ADDRESS_DELAY_MS 100
+
+	int i;
+	uint32_t * ib_result_cpu = data;
+
+	/* useconds_t range is [0, 1,000,000] so use loop for waits > 1s */
+	for (i = 0; i < WRITE_MEM_ADDRESS_DELAY_MS; i++)
+		usleep(1000);
+
+	ib_result_cpu[256] = 0x1;
+	/* printf("ib_result_cpu[256] = 0x1;\n"); */
+
+	return 0;
+}
+
+void
+amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, bool with_thread)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint32_t *ib_result_cpu2;
+	uint64_t ib_result_mc_address;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status;
+	uint32_t expired;
+	int i, r;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	int bo_cmd_size = 4096;
+	pthread_t stress_thread = {0};
+	struct amdgpu_cmd_base * base_cmd = get_cmd_base();
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device_handle, bo_cmd_size, bo_cmd_size,
+			AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
+						    &ib_result_handle, &ib_result_cpu,
+						    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+			       &bo_list);
+	igt_assert_eq(r, 0);
+
+	base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
+
+	if (with_thread) {
+			r = pthread_create(&stress_thread, NULL, &write_mem_address, ib_result_cpu);
+			igt_assert_eq(r, 0);
+	}
+
+	base_cmd->emit(base_cmd, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+
+	base_cmd->emit(base_cmd, (WAIT_REG_MEM_MEM_SPACE(1)  /* memory */|
+							  WAIT_REG_MEM_FUNCTION(4) /* != */|
+							  WAIT_REG_MEM_ENGINE(0)/* me */));
+
+	base_cmd->emit(base_cmd, (ib_result_mc_address + 256*4) & 0xfffffffc);
+	base_cmd->emit(base_cmd, ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff);
+
+	base_cmd->emit(base_cmd, 0);/* reference value */
+	base_cmd->emit(base_cmd, 0xffffffff); /* and mask */
+	base_cmd->emit(base_cmd, 0x00000004);/* poll interval */
+	base_cmd->emit_repeat(base_cmd, 0xffff1000, 16 - base_cmd->cdw);
+
+
+	ib_result_cpu2 = ib_result_cpu;
+	ib_result_cpu2[256] = 0x0; /* the memory we wait on to change */
+
+
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = base_cmd->cdw;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = ip_type;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+	for (i = 0; i < 200; i++) {
+		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
+		if (r != 0 && r != -ECANCELED)
+			igt_assert(0);
+	}
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = ip_type;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = ibs_request.seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE,0, &expired);
+	if (r != 0 && r != -ECANCELED)
+		igt_assert(0);
+
+	if (with_thread)
+		pthread_join(stress_thread, NULL);
+
+	amdgpu_bo_list_destroy(bo_list);
+
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+				     ib_result_mc_address, 4096);
+
+	amdgpu_cs_ctx_free(context_handle);
+
+	free_cmd_base(base_cmd);
+}
+
+void
+amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint32_t *ib_result_cpu2;
+	uint64_t ib_result_mc_address;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status;
+	uint32_t expired;
+	int i, r;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	struct drm_amdgpu_info_hw_ip info;
+	uint32_t ring_id;
+	pthread_t stress_thread = {0};
+	int bo_cmd_size = 4096;
+	struct amdgpu_cmd_base * base_cmd;
+
+	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_DMA, 0, &info);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
+
+		r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
+				AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
+							    &ib_result_handle, &ib_result_cpu,
+							    &ib_result_mc_address, &va_handle);
+		igt_assert_eq(r, 0);
+
+		if (with_thread) {
+			r = pthread_create(&stress_thread, NULL, &write_mem_address, ib_result_cpu);
+			igt_assert_eq(r, 0);
+		}
+
+		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
+				       &bo_list);
+		igt_assert_eq(r, 0);
+
+		base_cmd = get_cmd_base();
+		base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
+
+		base_cmd->emit(base_cmd, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+					(0 << 26) | /* WAIT_REG_MEM */(4 << 28) | /* != */(1 << 31)
+					/* memory */);
+
+		base_cmd->emit(base_cmd, (ib_result_mc_address + 256*4) & 0xfffffffc);
+
+		base_cmd->emit(base_cmd, ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff);
+
+		base_cmd->emit(base_cmd, 0); /* reference value */
+		base_cmd->emit(base_cmd, 0xffffffff); /* and mask */
+
+		base_cmd->emit(base_cmd,  4 | /* poll interval */(0xfff << 16)/* retry count */);
+
+		base_cmd->emit_repeat(base_cmd, 0, 16 - base_cmd->cdw);
+
+		ib_result_cpu2 = ib_result_cpu;
+		ib_result_cpu2[256] = 0x0; /* the memory we wait on to change */
+
+		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+		ib_info.ib_mc_address = ib_result_mc_address;
+		ib_info.size = base_cmd->cdw;
+
+		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+		ibs_request.ip_type = AMDGPU_HW_IP_DMA;
+		ibs_request.ring = ring_id;
+		ibs_request.number_of_ibs = 1;
+		ibs_request.ibs = &ib_info;
+		ibs_request.resources = bo_list;
+		ibs_request.fence_info.handle = NULL;
+
+		for (i = 0; i < 200; i++) {
+			r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
+			if (r != 0 && r != -ECANCELED)
+				igt_assert(0);
+		}
+
+		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+		fence_status.context = context_handle;
+		fence_status.ip_type = AMDGPU_HW_IP_DMA;
+		fence_status.ip_instance = 0;
+		fence_status.ring = ring_id;
+		fence_status.fence = ibs_request.seq_no;
+
+		r = amdgpu_cs_query_fence_status(&fence_status,
+				AMDGPU_TIMEOUT_INFINITE,0, &expired);
+		if (r != 0 && r != -ECANCELED)
+			igt_assert(0);
+
+		if (with_thread)
+			pthread_join(stress_thread, NULL);
+
+		r = amdgpu_bo_list_destroy(bo_list);
+		igt_assert_eq(r, 0);
+
+		amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+					     ib_result_mc_address, 4096);
+		free_cmd_base(base_cmd);
+	}
+	amdgpu_cs_ctx_free(context_handle);
+}
diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
new file mode 100644
index 000000000..91dcf8bb2
--- /dev/null
+++ b/lib/amdgpu/amd_deadlock_helpers.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef __AMD_DEADLOCK_HELPERS_H__
+#define __AMD_DEADLOCK_HELPERS_H__
+
+void
+amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, bool with_thread);
+
+void
+amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread);
+
+#endif
+
diff --git a/lib/meson.build b/lib/meson.build
index 8d6c8a244..47f9fdab4 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -138,7 +138,8 @@ if libdrm_amdgpu.found()
 		'amdgpu/amd_gfx_v8_0.c',
 		'amdgpu/amd_gfx_v9_0.c',
 		'amdgpu/amd_dispatch_helpers.c',
-		'amdgpu/amd_dispatch.c'
+		'amdgpu/amd_dispatch.c',
+		'amdgpu/amd_deadlock_helpers.c'
 	]
 endif
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 3/7] tests/amdgpu: add deadlock test for gfx, compute and sdma
  2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers vitaly.prosyak
@ 2022-10-18  3:00 ` vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 4/7] lib/amdgpu: add memory and reg.access helper vitaly.prosyak
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: vitaly.prosyak @ 2022-10-18  3:00 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 tests/amdgpu/amd_deadlock.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tests/amdgpu/amd_deadlock.c b/tests/amdgpu/amd_deadlock.c
index e1b508fcf..c7e8e05e9 100644
--- a/tests/amdgpu/amd_deadlock.c
+++ b/tests/amdgpu/amd_deadlock.c
@@ -26,7 +26,21 @@
 #include "lib/amdgpu/amd_memory.h"
 #include "lib/amdgpu/amd_command_submission.h"
 #include "lib/amdgpu/amd_dispatch.h"
+#include "lib/amdgpu/amd_deadlock_helpers.h"
+
+static bool with_thread = true;
+
+static void
+amdgpu_deadlock_gfx(amdgpu_device_handle device_handle)
+{
+	amdgpu_deadlock_helper(device_handle, AMDGPU_HW_IP_GFX, with_thread);
+}
 
+static void
+amdgpu_deadlock_compute(amdgpu_device_handle device_handle)
+{
+	amdgpu_deadlock_helper(device_handle, AMDGPU_HW_IP_COMPUTE, with_thread);
+}
 static void
 amdgpu_dispatch_hang_slow_gfx(amdgpu_device_handle device_handle)
 {
@@ -65,6 +79,15 @@ igt_main
 
 	}
 
+	igt_subtest("amdgpu_deadlock_gfx")
+	amdgpu_deadlock_gfx(device);
+
+	igt_subtest("amdgpu_deadlock_sdma")
+	amdgpu_deadlock_sdma(device, with_thread);
+
+	igt_subtest("amdgpu_deadlock_compute")
+	amdgpu_deadlock_compute(device);
+
 	igt_subtest("dispatch_hang_slow_compute")
 	amdgpu_dispatch_hang_slow_compute(device);
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 4/7] lib/amdgpu: add memory and reg.access helper
  2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 3/7] tests/amdgpu: add deadlock test for gfx, compute and sdma vitaly.prosyak
@ 2022-10-18  3:00 ` vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 5/7] tests/amdgpu: add tests for invalid memory and register access vitaly.prosyak
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: vitaly.prosyak @ 2022-10-18  3:00 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 lib/amdgpu/amd_PM4.h              |  4 ++
 lib/amdgpu/amd_deadlock_helpers.c | 92 +++++++++++++++++++++++++++++--
 lib/amdgpu/amd_deadlock_helpers.h |  3 +
 3 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/lib/amdgpu/amd_PM4.h b/lib/amdgpu/amd_PM4.h
index 2c2152c49..54d001532 100644
--- a/lib/amdgpu/amd_PM4.h
+++ b/lib/amdgpu/amd_PM4.h
@@ -212,4 +212,8 @@
 		 * 1 - pfp
 		 * 2 - ce
 		 */
+
+/* GMC registers */
+#define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR                   0x54f
+
 #endif
diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
index c6528c6ad..49602320a 100644
--- a/lib/amdgpu/amd_deadlock_helpers.c
+++ b/lib/amdgpu/amd_deadlock_helpers.c
@@ -169,7 +169,8 @@ amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread)
 	struct drm_amdgpu_info_hw_ip info;
 	uint32_t ring_id;
 	pthread_t stress_thread = {0};
-	int bo_cmd_size = 4096;
+	const unsigned bo_cmd_size = 4096;
+	const unsigned alignment = 4096;
 	struct amdgpu_cmd_base * base_cmd;
 
 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_DMA, 0, &info);
@@ -180,14 +181,16 @@ amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread)
 
 	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
 
-		r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
-				AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
-							    &ib_result_handle, &ib_result_cpu,
-							    &ib_result_mc_address, &va_handle);
+		r = amdgpu_bo_alloc_and_map_raw(device_handle, bo_cmd_size, alignment,
+										AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ?
+										AMDGPU_VM_MTYPE_UC : 0, &ib_result_handle,
+										&ib_result_cpu, &ib_result_mc_address,
+										&va_handle);
 		igt_assert_eq(r, 0);
 
 		if (with_thread) {
-			r = pthread_create(&stress_thread, NULL, &write_mem_address, ib_result_cpu);
+			r = pthread_create(&stress_thread, NULL, &write_mem_address,
+							   ib_result_cpu);
 			igt_assert_eq(r, 0);
 		}
 
@@ -258,3 +261,80 @@ amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread)
 	}
 	amdgpu_cs_ctx_free(context_handle);
 }
+
+void
+bad_access_helper(amdgpu_device_handle device_handle, int reg_access)
+{
+	amdgpu_context_handle context_handle;
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status;
+	uint32_t expired;
+	const unsigned bo_cmd_size = 4096;
+	const unsigned alignment = 4096;
+	int r;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	struct amdgpu_cmd_base * base_cmd;
+	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map_raw(device_handle, bo_cmd_size, alignment,
+									AMDGPU_GEM_DOMAIN_GTT, 0, 0,
+									&ib_result_handle, &ib_result_cpu,
+									&ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+	base_cmd = get_cmd_base();
+	base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
+
+	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	base_cmd->emit(base_cmd, PACKET3(PACKET3_WRITE_DATA, 3));
+	base_cmd->emit(base_cmd, (reg_access ? WRITE_DATA_DST_SEL(0) :
+										   WRITE_DATA_DST_SEL(5))| WR_CONFIRM);
+
+	base_cmd->emit(base_cmd, reg_access ? mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR :
+					0xdeadbee0);
+	base_cmd->emit(base_cmd, 0 );
+	base_cmd->emit(base_cmd, 0xdeadbeef );
+	base_cmd->emit_repeat(base_cmd, 0xffff1000, 16 - base_cmd->cdw);
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = base_cmd->cdw;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+	ibs_request.fence_info.handle = NULL;
+
+	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
+	if (r != 0 && r != -ECANCELED)
+		igt_assert(0);
+
+
+	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
+	fence_status.context = context_handle;
+	fence_status.ip_type = AMDGPU_HW_IP_GFX;
+	fence_status.ip_instance = 0;
+	fence_status.ring = 0;
+	fence_status.fence = ibs_request.seq_no;
+
+	r = amdgpu_cs_query_fence_status(&fence_status,
+			AMDGPU_TIMEOUT_INFINITE,0, &expired);
+	if (r != 0 && r != -ECANCELED)
+		igt_assert(0);
+
+	amdgpu_bo_list_destroy(bo_list);
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+					 ib_result_mc_address, 4096);
+	free_cmd_base(base_cmd);
+	amdgpu_cs_ctx_free(context_handle);
+}
diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
index 91dcf8bb2..0f2471321 100644
--- a/lib/amdgpu/amd_deadlock_helpers.h
+++ b/lib/amdgpu/amd_deadlock_helpers.h
@@ -30,5 +30,8 @@ amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, boo
 void
 amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread);
 
+void
+bad_access_helper(amdgpu_device_handle device_handle, int reg_access);
+
 #endif
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 5/7] tests/amdgpu: add tests for invalid memory and register access
  2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
                   ` (2 preceding siblings ...)
  2022-10-18  3:00 ` [igt-dev] [PATCH 4/7] lib/amdgpu: add memory and reg.access helper vitaly.prosyak
@ 2022-10-18  3:00 ` vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 6/7] tests/amdgpu: reuse predefined const GFX_COMPUTE_NOP vitaly.prosyak
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: vitaly.prosyak @ 2022-10-18  3:00 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 tests/amdgpu/amd_deadlock.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/amdgpu/amd_deadlock.c b/tests/amdgpu/amd_deadlock.c
index c7e8e05e9..4fefe6935 100644
--- a/tests/amdgpu/amd_deadlock.c
+++ b/tests/amdgpu/amd_deadlock.c
@@ -53,6 +53,18 @@ amdgpu_dispatch_hang_slow_compute(amdgpu_device_handle device_handle)
 	amdgpu_dispatch_hang_slow_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
 }
 
+static void
+amdgpu_illegal_reg_access(amdgpu_device_handle device_handle)
+{
+	bad_access_helper(device_handle, 1);
+}
+
+static void
+amdgpu_illegal_mem_access(amdgpu_device_handle device_handle)
+{
+	bad_access_helper(device_handle, 0);
+}
+
 igt_main
 {
 	amdgpu_device_handle device;
@@ -79,6 +91,12 @@ igt_main
 
 	}
 
+	igt_subtest("amdgpu_illegal_reg_access")
+	amdgpu_illegal_reg_access(device);
+
+	igt_subtest("amdgpu_illegal_mem_access")
+	amdgpu_illegal_mem_access(device);
+
 	igt_subtest("amdgpu_deadlock_gfx")
 	amdgpu_deadlock_gfx(device);
 
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 6/7] tests/amdgpu: reuse predefined const GFX_COMPUTE_NOP
  2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
                   ` (3 preceding siblings ...)
  2022-10-18  3:00 ` [igt-dev] [PATCH 5/7] tests/amdgpu: add tests for invalid memory and register access vitaly.prosyak
@ 2022-10-18  3:00 ` vitaly.prosyak
  2022-10-18  3:00 ` [igt-dev] [PATCH 7/7] amdgpu/tests: PCI unplug 4 tests for different scenario vitaly.prosyak
  2022-10-18  3:13 ` [igt-dev] ✗ Fi.CI.BUILD: failure for series starting with [1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper Patchwork
  6 siblings, 0 replies; 10+ messages in thread
From: vitaly.prosyak @ 2022-10-18  3:00 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 lib/amdgpu/amd_PM4.h              | 1 +
 lib/amdgpu/amd_deadlock_helpers.c | 4 ++--
 lib/amdgpu/amd_dispatch.c         | 6 +++---
 lib/amdgpu/amd_sdma.h             | 2 ++
 tests/amdgpu/amd_basic.c          | 3 ---
 tests/amdgpu/amd_cs_nop.c         | 4 ++--
 tests/amdgpu/amd_prime.c          | 5 ++---
 7 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/lib/amdgpu/amd_PM4.h b/lib/amdgpu/amd_PM4.h
index 54d001532..d6cdbfeaf 100644
--- a/lib/amdgpu/amd_PM4.h
+++ b/lib/amdgpu/amd_PM4.h
@@ -216,4 +216,5 @@
 /* GMC registers */
 #define mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR                   0x54f
 
+#define GFX_COMPUTE_NOP  0xffff1000
 #endif
diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
index 49602320a..d22b4aad5 100644
--- a/lib/amdgpu/amd_deadlock_helpers.c
+++ b/lib/amdgpu/amd_deadlock_helpers.c
@@ -101,7 +101,7 @@ amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, boo
 	base_cmd->emit(base_cmd, 0);/* reference value */
 	base_cmd->emit(base_cmd, 0xffffffff); /* and mask */
 	base_cmd->emit(base_cmd, 0x00000004);/* poll interval */
-	base_cmd->emit_repeat(base_cmd, 0xffff1000, 16 - base_cmd->cdw);
+	base_cmd->emit_repeat(base_cmd, GFX_COMPUTE_NOP, 16 - base_cmd->cdw);
 
 
 	ib_result_cpu2 = ib_result_cpu;
@@ -301,7 +301,7 @@ bad_access_helper(amdgpu_device_handle device_handle, int reg_access)
 					0xdeadbee0);
 	base_cmd->emit(base_cmd, 0 );
 	base_cmd->emit(base_cmd, 0xdeadbeef );
-	base_cmd->emit_repeat(base_cmd, 0xffff1000, 16 - base_cmd->cdw);
+	base_cmd->emit_repeat(base_cmd, GFX_COMPUTE_NOP, 16 - base_cmd->cdw);
 
 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
 	ib_info.ib_mc_address = ib_result_mc_address;
diff --git a/lib/amdgpu/amd_dispatch.c b/lib/amdgpu/amd_dispatch.c
index cf773c309..c476bc1d6 100644
--- a/lib/amdgpu/amd_dispatch.c
+++ b/lib/amdgpu/amd_dispatch.c
@@ -125,7 +125,7 @@ amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
 	base_cmd->emit(base_cmd, 1);
 	base_cmd->emit(base_cmd, 1);
 
-	base_cmd->emit_aligned(base_cmd, 7, 0xffff1000);
+	base_cmd->emit_aligned(base_cmd, 7, GFX_COMPUTE_NOP);
 	resources[0] = bo_dst;
 	resources[1] = bo_shader;
 	resources[2] = bo_cmd;
@@ -278,7 +278,7 @@ amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
 	base_cmd->emit(base_cmd, 1);
 	base_cmd->emit(base_cmd, 1);
 
-	base_cmd->emit_aligned(base_cmd, 7, 0xffff1000); /* type3 nop packet */
+	base_cmd->emit_aligned(base_cmd, 7, GFX_COMPUTE_NOP); /* type3 nop packet */
 
 	resources[0] = bo_shader;
 	resources[1] = bo_src;
@@ -448,7 +448,7 @@ amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
 	base_cmd->emit(base_cmd, 1);
 	base_cmd->emit(base_cmd, 1);
 
-	base_cmd->emit_aligned(base_cmd, 7, 0xffff1000); /* type3 nop packet */
+	base_cmd->emit_aligned(base_cmd, 7, GFX_COMPUTE_NOP); /* type3 nop packet */
 
 	resources[0] = bo_shader;
 	resources[1] = bo_src;
diff --git a/lib/amdgpu/amd_sdma.h b/lib/amdgpu/amd_sdma.h
index 8f018167c..69f7d8ac7 100644
--- a/lib/amdgpu/amd_sdma.h
+++ b/lib/amdgpu/amd_sdma.h
@@ -102,4 +102,6 @@
 		 */
 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
 
+#define SDMA_NOP  0x0
+
 #endif
diff --git a/tests/amdgpu/amd_basic.c b/tests/amdgpu/amd_basic.c
index 9be034975..f5df4acaa 100644
--- a/tests/amdgpu/amd_basic.c
+++ b/tests/amdgpu/amd_basic.c
@@ -34,9 +34,6 @@
 
 #define BUFFER_SIZE (8 * 1024)
 
-#define GFX_COMPUTE_NOP  0xffff1000
-
-
 /**
  * MEM ALLOC TEST
  * @param device
diff --git a/tests/amdgpu/amd_cs_nop.c b/tests/amdgpu/amd_cs_nop.c
index 46f4b7c65..ea3f6aae8 100644
--- a/tests/amdgpu/amd_cs_nop.c
+++ b/tests/amdgpu/amd_cs_nop.c
@@ -26,9 +26,9 @@
 
 #include <amdgpu.h>
 #include <amdgpu_drm.h>
+#include "lib/amdgpu/amd_PM4.h"
+
 
-#define GFX_COMPUTE_NOP  0xffff1000
-#define SDMA_NOP  0x0
 
 static int
 amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
diff --git a/tests/amdgpu/amd_prime.c b/tests/amdgpu/amd_prime.c
index 248fbc354..62924f15b 100644
--- a/tests/amdgpu/amd_prime.c
+++ b/tests/amdgpu/amd_prime.c
@@ -30,9 +30,8 @@
 #include "i915/gem_create.h"
 #include "igt.h"
 #include "igt_vgem.h"
-
-#define GFX_COMPUTE_NOP  0xffff1000
-#define SDMA_NOP  0x0
+#include "lib/amdgpu/amd_sdma.h"
+#include "lib/amdgpu/amd_PM4.h"
 
 static int
 amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] [PATCH 7/7] amdgpu/tests: PCI unplug 4 tests for different scenario.
  2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
                   ` (4 preceding siblings ...)
  2022-10-18  3:00 ` [igt-dev] [PATCH 6/7] tests/amdgpu: reuse predefined const GFX_COMPUTE_NOP vitaly.prosyak
@ 2022-10-18  3:00 ` vitaly.prosyak
  2022-10-18  3:13 ` [igt-dev] ✗ Fi.CI.BUILD: failure for series starting with [1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper Patchwork
  6 siblings, 0 replies; 10+ messages in thread
From: vitaly.prosyak @ 2022-10-18  3:00 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

From: Vitaly Prosyak <vitaly.prosyak@amd.com>

1. A simple test with basic unplug and rescan.
2. Test with command submission using a worker thread
3. Test with an exported buffer object
4. Test with an exported fence to another device.
   More then tow GPUs are required.

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
---
 lib/amdgpu/amd_pci_unplug.c   | 532 ++++++++++++++++++++++++++++++++++
 lib/amdgpu/amd_pci_unplug.h   |  65 +++++
 lib/amdgpu/xalloc.h           |  57 ++++
 lib/meson.build               |   4 +-
 tests/amdgpu/amd_pci_unplug.c |  58 ++++
 tests/amdgpu/meson.build      |   1 +
 6 files changed, 716 insertions(+), 1 deletion(-)
 create mode 100644 lib/amdgpu/amd_pci_unplug.c
 create mode 100644 lib/amdgpu/amd_pci_unplug.h
 create mode 100644 lib/amdgpu/xalloc.h
 create mode 100644 tests/amdgpu/amd_pci_unplug.c

diff --git a/lib/amdgpu/amd_pci_unplug.c b/lib/amdgpu/amd_pci_unplug.c
new file mode 100644
index 000000000..bc77e8eec
--- /dev/null
+++ b/lib/amdgpu/amd_pci_unplug.c
@@ -0,0 +1,532 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+#include <linux/limits.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <sys/sysmacros.h>
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+#include "amd_PM4.h"
+#include "amd_pci_unplug.h"
+#include "amd_memory.h"
+#include "igt.h"
+#include "xalloc.h"
+#include "amd_ip_blocks.h"
+
+static int
+amdgpu_open_devices(bool open_render_node, int  max_cards_supported, int drm_amdgpu_fds[])
+{
+	drmDevicePtr devices[MAX_CARDS_SUPPORTED];
+	int i;
+	int drm_node;
+	int amd_index = 0;
+	int drm_count;
+	int fd;
+	drmVersionPtr version;
+
+	for (i = 0; i < max_cards_supported && i < MAX_CARDS_SUPPORTED; i++)
+		drm_amdgpu_fds[i] = -1;
+
+	drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED);
+
+	if (drm_count < 0) {
+		fprintf(stderr, "drmGetDevices2() returned an error %d\n", drm_count);
+		return 0;
+	}
+
+	for (i = 0; i < drm_count; i++) {
+		/* If this is not PCI device, skip*/
+		if (devices[i]->bustype != DRM_BUS_PCI)
+			continue;
+
+		/* If this is not AMD GPU vender ID, skip*/
+		if (devices[i]->deviceinfo.pci->vendor_id != 0x1002)
+			continue;
+
+		if (open_render_node)
+			drm_node = DRM_NODE_RENDER;
+		else
+			drm_node = DRM_NODE_PRIMARY;
+
+		fd = -1;
+		if (devices[i]->available_nodes & 1 << drm_node)
+			fd = open(
+				devices[i]->nodes[drm_node],
+				O_RDWR | O_CLOEXEC);
+
+		/* This node is not available. */
+		if (fd < 0) continue;
+
+		version = drmGetVersion(fd);
+		if (!version) {
+			fprintf(stderr, "Warning: Cannot get version for %s." "Error is %s\n",
+				devices[i]->nodes[drm_node], strerror(errno));
+			close(fd);
+			continue;
+		}
+
+		if (strcmp(version->name, "amdgpu")) {
+			/* This is not AMDGPU driver, skip.*/
+			drmFreeVersion(version);
+			close(fd);
+			continue;
+		}
+
+		drmFreeVersion(version);
+
+		drm_amdgpu_fds[amd_index] = fd;
+		amd_index++;
+	}
+
+	drmFreeDevices(devices, drm_count);
+	return amd_index;
+}
+static bool
+amdgpu_node_is_drm(int maj, int min)
+{
+	char path[64];
+	struct stat sbuf;
+
+	snprintf(path, sizeof(path), "/sys/dev/char/%d:%d/device/drm", maj, min);
+	return stat(path, &sbuf) == 0;
+}
+
+static char *
+amdgpu_get_device_from_fd(int fd)
+{
+	struct stat sbuf;
+	char path[PATH_MAX + 1];
+	unsigned int maj, min;
+
+	if (fstat(fd, &sbuf))
+		return NULL;
+
+	maj = major(sbuf.st_rdev);
+	min = minor(sbuf.st_rdev);
+
+	if (!amdgpu_node_is_drm(maj, min) || !S_ISCHR(sbuf.st_mode))
+		return NULL;
+
+	snprintf(path, sizeof(path), "/sys/dev/char/%d:%d/device", maj, min);
+	return strdup(path);
+}
+
+static int
+amdgpu_hotunplug_trigger(const char *pathname)
+{
+	int len;
+	int fd= -1;
+
+	fd = open(pathname, O_WRONLY);
+	if (fd <= 0 )
+		goto release;
+
+	len = write(fd, "1", 1);
+
+	close(fd);
+
+release:
+	return len;
+}
+
+static bool
+amdgpu_hotunplug_setup_test(bool render_mode, const struct amd_pci_unplug_setup *setup,
+							struct amd_pci_unplug *unplug)
+{
+	char *tmp_str = NULL;
+	bool ret = false;
+	int r;
+	uint32_t  major_version, minor_version;
+
+	unplug->num_devices = amdgpu_open_devices(render_mode, MAX_CARDS_SUPPORTED,
+											  unplug->drm_amdgpu_fds);
+	if (unplug->num_devices == 0 )
+		goto release;
+
+	if (setup->open_device && setup->open_device2 && unplug->num_devices < 2) {
+		/*Not enough board for the test*/
+		fprintf(stderr, "More than 1 GPU is required for this test\n");
+		goto release;
+	}
+
+	tmp_str = amdgpu_get_device_from_fd(unplug->drm_amdgpu_fds[0]);
+	abort_oom_if_null(tmp_str);
+	unplug->sysfs_remove = realloc(tmp_str, strlen(tmp_str) * 2);
+	abort_oom_if_null(unplug->sysfs_remove );
+	strcat(unplug->sysfs_remove, "/remove");
+
+	r = amdgpu_device_initialize(unplug->drm_amdgpu_fds[0], &major_version,
+									 &minor_version, &unplug->device_handle);
+	if (r != 0)
+		goto release;
+
+	if (minor_version < setup->minor_version_req)
+		goto release;
+
+	if (!setup->open_device) {
+			/* device handle is not always required for test */
+			/* but for drm version is required always */
+		amdgpu_device_deinitialize(unplug->device_handle);
+		unplug->device_handle = NULL;
+	}
+		/* TODO launch another process */
+	if (setup->open_device2) {
+		r = amdgpu_device_initialize(unplug->drm_amdgpu_fds[1], &major_version,
+						   &minor_version, &unplug->device_handle2);
+		if (r != 0)
+			goto release;
+		if (minor_version < setup->minor_version_req)
+			goto release;
+	}
+	ret = true;
+release:
+	return ret;
+}
+
+static void
+amdgpu_hotunplug_teardown_test(struct amd_pci_unplug *unplug)
+{
+	int i;
+	if (unplug->device_handle) {
+		amdgpu_device_deinitialize(unplug->device_handle);
+		unplug->device_handle = NULL;
+	}
+	if (unplug->device_handle2) {
+		amdgpu_device_deinitialize(unplug->device_handle2);
+		unplug->device_handle2 = NULL;
+	}
+	for (i = 0; i < unplug->num_devices; i++) {
+		if (unplug->drm_amdgpu_fds[i] >= 0 ) {
+			close(unplug->drm_amdgpu_fds[i]);
+			unplug->drm_amdgpu_fds[i] = -1;
+		}
+	}
+	if (unplug->sysfs_remove) {
+		free(unplug->sysfs_remove);
+		unplug->sysfs_remove = NULL;
+	}
+}
+
+static int
+amdgpu_hotunplug_remove(struct amd_pci_unplug *unplug)
+{
+	int r = amdgpu_hotunplug_trigger(unplug->sysfs_remove);
+	return r;
+}
+
+static int
+amdgpu_hotunplug_rescan(void)
+{
+	int r = amdgpu_hotunplug_trigger("/sys/bus/pci/rescan");
+	return r;
+}
+
+static int
+amdgpu_cs_sync(amdgpu_context_handle context, unsigned int ip_type,	int ring,
+				unsigned int seqno)
+{
+	struct amdgpu_cs_fence fence = {
+		.context = context,
+		.ip_type = ip_type,
+		.ring = ring,
+		.fence = seqno,
+	};
+	uint32_t expired;
+	int ret;
+
+	ret = amdgpu_cs_query_fence_status(&fence,
+					   AMDGPU_TIMEOUT_INFINITE,
+					   0, &expired);
+	return ret;
+}
+
+static void *
+amdgpu_nop_cs(void *handle)
+{
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	int r;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	amdgpu_context_handle context;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	int bo_cmd_size = 4096;
+	struct amd_pci_unplug * unplug = handle;
+	amdgpu_device_handle device_handle = unplug->device_handle;
+
+	struct amdgpu_cmd_base * base_cmd = get_cmd_base();
+	r = amdgpu_cs_ctx_create(device_handle, &context);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+
+	memset(ib_result_cpu, 0, bo_cmd_size);
+	base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
+	base_cmd->emit_repeat(base_cmd, GFX_COMPUTE_NOP , 16);
+
+	r = amdgpu_bo_list_create(device_handle, 1, &ib_result_handle, NULL, &bo_list);
+	igt_assert_eq(r, 0);
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = base_cmd->cdw;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+
+	while (unplug->do_cs)
+		amdgpu_cs_submit(context, 0, &ibs_request, 1);
+
+	amdgpu_cs_sync(context, AMDGPU_HW_IP_GFX, 0, ibs_request.seq_no);
+	amdgpu_bo_list_destroy(bo_list);
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, ib_result_mc_address,
+							4096);
+
+	amdgpu_cs_ctx_free(context);
+	free_cmd_base(base_cmd);
+
+	return NULL;
+}
+
+static pthread_t*
+amdgpu_create_cs_thread(struct amd_pci_unplug * unplug)
+{
+	int r;
+	pthread_t *thread = malloc(sizeof(*thread));
+	if (!thread)
+		return NULL;
+
+	unplug->do_cs = true;
+
+	r = pthread_create(thread, NULL, &amdgpu_nop_cs, unplug);
+	igt_assert_eq(r, 0);
+
+	/* Give thread enough time to start*/
+	usleep(1000000);
+	return thread;
+}
+
+static void
+amdgpu_wait_cs_thread(struct amd_pci_unplug * unplug, pthread_t *thread)
+{
+	unplug->do_cs = false;
+
+	pthread_join(*thread, NULL);
+	free(thread);
+}
+
+static void
+amdgpu_hotunplug_test(bool render_mode, const struct amd_pci_unplug_setup *setup,
+					  struct amd_pci_unplug * unplug,  bool with_cs)
+{
+	int r;
+	pthread_t *thread = NULL;
+
+	r = amdgpu_hotunplug_setup_test(render_mode, setup, unplug);
+	igt_assert_eq(r , 1);
+
+	if (with_cs)
+		thread = amdgpu_create_cs_thread(unplug);
+
+	r = amdgpu_hotunplug_remove(unplug);
+	igt_assert_eq(r > 0, 1);
+
+	if (with_cs)
+		amdgpu_wait_cs_thread(unplug, thread);
+
+	amdgpu_hotunplug_teardown_test(unplug);
+
+	r = amdgpu_hotunplug_rescan();
+	igt_assert_eq(r > 0, 1);
+}
+
+void
+amdgpu_hotunplug_simple(struct amd_pci_unplug_setup *setup,
+						struct amd_pci_unplug *unplug)
+{
+	memset(unplug, 0, sizeof(*unplug));
+	amdgpu_hotunplug_test(true, setup, unplug, false);
+}
+
+ void
+ amdgpu_hotunplug_with_cs(struct amd_pci_unplug_setup *setup,
+ 						 struct amd_pci_unplug *unplug)
+{
+	memset(unplug, 0, sizeof(*unplug));
+	setup->open_device = true;
+	amdgpu_hotunplug_test(true, setup, unplug, true);
+}
+
+void
+amdgpu_hotunplug_with_exported_bo(struct amd_pci_unplug_setup *setup,
+								  struct amd_pci_unplug *unplug)
+{
+	int r;
+	uint32_t dma_buf_fd;
+	unsigned int *ptr;
+	amdgpu_bo_handle bo_handle;
+
+	struct amdgpu_bo_alloc_request request = {
+		.alloc_size = 4096,
+		.phys_alignment = 4096,
+		.preferred_heap = AMDGPU_GEM_DOMAIN_GTT,
+		.flags = 0,
+	};
+	memset(unplug, 0, sizeof(*unplug));
+	setup->open_device = true;
+
+	r = amdgpu_hotunplug_setup_test(true, setup, unplug);
+	igt_assert_eq(r , 1);
+
+	r = amdgpu_bo_alloc(unplug->device_handle, &request, &bo_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_export(bo_handle, amdgpu_bo_handle_type_dma_buf_fd, &dma_buf_fd);
+	igt_assert_eq(r, 0);
+
+	ptr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, dma_buf_fd, 0);
+
+	r = amdgpu_hotunplug_remove(unplug);
+	igt_assert_eq(r > 0, 1);
+
+	amdgpu_bo_free(bo_handle);
+
+	amdgpu_hotunplug_teardown_test(unplug);
+
+	*ptr = 0xdeafbeef;
+
+	munmap(ptr, 4096);
+	close (dma_buf_fd);
+
+	r = amdgpu_hotunplug_rescan();
+	igt_assert_eq(r > 0, 1);
+}
+
+void
+amdgpu_hotunplug_with_exported_fence(struct amd_pci_unplug_setup *setup,
+									 struct amd_pci_unplug *unplug)
+{
+	amdgpu_bo_handle ib_result_handle;
+	void *ib_result_cpu;
+	uint64_t ib_result_mc_address;
+	uint32_t sync_obj_handle, sync_obj_handle2;
+	int r;
+	amdgpu_bo_list_handle bo_list;
+	amdgpu_va_handle va_handle;
+	amdgpu_context_handle context;
+	struct amdgpu_cs_request ibs_request;
+	struct amdgpu_cs_ib_info ib_info;
+	struct amdgpu_cs_fence fence_status = {0};
+	int shared_fd;
+	int bo_cmd_size = 4096;
+	struct amdgpu_cmd_base * base_cmd = get_cmd_base();
+
+	memset(unplug, 0, sizeof(*unplug));
+	setup->open_device = true;
+	setup->open_device2 = true;
+
+
+	r = amdgpu_hotunplug_setup_test(true, setup, unplug);
+	igt_assert_eq(r , 1);
+
+	r = amdgpu_cs_ctx_create(unplug->device_handle, &context);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_bo_alloc_and_map(unplug->device_handle, bo_cmd_size, 4096,
+				    AMDGPU_GEM_DOMAIN_GTT, 0,
+				    &ib_result_handle, &ib_result_cpu,
+				    &ib_result_mc_address, &va_handle);
+	igt_assert_eq(r, 0);
+	memset(ib_result_cpu, 0, bo_cmd_size);
+	base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
+	base_cmd->emit_repeat(base_cmd, GFX_COMPUTE_NOP , 16);
+
+	r = amdgpu_bo_list_create(unplug->device_handle, 1, &ib_result_handle, NULL,
+							  &bo_list);
+	igt_assert_eq(r, 0);
+
+	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+	ib_info.ib_mc_address = ib_result_mc_address;
+	ib_info.size = base_cmd->cdw;
+
+	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
+	ibs_request.ring = 0;
+	ibs_request.number_of_ibs = 1;
+	ibs_request.ibs = &ib_info;
+	ibs_request.resources = bo_list;
+
+	r = amdgpu_cs_submit(context, 0, &ibs_request, 1);
+	igt_assert_eq(r, 0);
+
+	fence_status.context = context;
+	fence_status.ip_type = AMDGPU_HW_IP_GFX;
+	fence_status.ip_instance = 0;
+	fence_status.fence = ibs_request.seq_no;
+
+	amdgpu_cs_fence_to_handle(unplug->device_handle, &fence_status,
+						AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ, &sync_obj_handle);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_cs_export_syncobj(unplug->device_handle, sync_obj_handle, &shared_fd);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_cs_import_syncobj(unplug->device_handle2, shared_fd, &sync_obj_handle2);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_cs_destroy_syncobj(unplug->device_handle, sync_obj_handle);
+	igt_assert_eq(r, 0);
+
+	amdgpu_bo_list_destroy(bo_list);
+	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, ib_result_mc_address,
+							 4096);
+
+	amdgpu_cs_ctx_free(context);
+
+	r = amdgpu_hotunplug_remove(unplug);
+	igt_assert_eq(r > 0, 1);
+
+	r = amdgpu_cs_syncobj_wait(unplug->device_handle2, &sync_obj_handle2, 1, 100000000, 0, NULL);
+	igt_assert_eq(r, 0);
+
+	r = amdgpu_cs_destroy_syncobj(unplug->device_handle2, sync_obj_handle2);
+	igt_assert_eq(r, 0);
+
+	amdgpu_hotunplug_teardown_test(unplug);
+
+	r = amdgpu_hotunplug_rescan();
+	igt_assert_eq(r > 0, 1);
+	free_cmd_base(base_cmd);
+}
diff --git a/lib/amdgpu/amd_pci_unplug.h b/lib/amdgpu/amd_pci_unplug.h
new file mode 100644
index 000000000..4b7a176e8
--- /dev/null
+++ b/lib/amdgpu/amd_pci_unplug.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#ifndef AMD_PCI_UNPLUG_H
+#define AMD_PCI_UNPLUG_H
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+
+#define MAX_CARDS_SUPPORTED 4
+
+struct amd_pci_unplug_setup {
+	uint32_t  major_version_req;
+	uint32_t  minor_version_req;
+	bool open_device;
+	bool open_device2;
+};
+
+struct amd_pci_unplug {
+	char *sysfs_remove ;
+	int drm_amdgpu_fds[MAX_CARDS_SUPPORTED];
+	int num_devices;
+	amdgpu_device_handle device_handle;
+	amdgpu_device_handle device_handle2;
+	volatile bool do_cs;
+};
+
+void
+amdgpu_hotunplug_simple(struct amd_pci_unplug_setup *setup,
+						struct amd_pci_unplug *unplug);
+
+void
+amdgpu_hotunplug_with_cs(struct amd_pci_unplug_setup *setup,
+						 struct amd_pci_unplug *unplug);
+
+void
+amdgpu_hotunplug_with_exported_bo(struct amd_pci_unplug_setup *setup,
+								  struct amd_pci_unplug *unplug);
+
+void
+amdgpu_hotunplug_with_exported_fence(struct amd_pci_unplug_setup *setup,
+									 struct amd_pci_unplug *unplug);
+
+
+#endif
diff --git a/lib/amdgpu/xalloc.h b/lib/amdgpu/xalloc.h
new file mode 100644
index 000000000..840b065f6
--- /dev/null
+++ b/lib/amdgpu/xalloc.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2008 Kristian Høgsberg
+ * Copyright 2022 Collabora, Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef WESTON_XALLOC_H
+#define WESTON_XALLOC_H
+
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+static inline void *
+abort_oom_if_null(void *p)
+{
+	static const char oommsg[] = ": out of memory\n";
+	size_t written __attribute__((unused));
+
+	if (p)
+		return p;
+
+	written = write(STDERR_FILENO, program_invocation_short_name,
+		        strlen(program_invocation_short_name));
+	written = write(STDERR_FILENO, oommsg, strlen(oommsg));
+
+	abort();
+}
+
+#define xmalloc(s) (abort_oom_if_null(malloc(s)))
+#define xzalloc(s) (abort_oom_if_null(calloc(1, s)))
+#define xcalloc(n, s) (abort_oom_if_null(calloc(n, s)))
+#define xstrdup(s) (abort_oom_if_null(strdup(s)))
+#define xrealloc(p, s) (abort_oom_if_null(realloc(p, s)))
+
+#endif /* WESTON_XALLOC_H */
diff --git a/lib/meson.build b/lib/meson.build
index 47f9fdab4..590ac2bcc 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -139,7 +139,9 @@ if libdrm_amdgpu.found()
 		'amdgpu/amd_gfx_v9_0.c',
 		'amdgpu/amd_dispatch_helpers.c',
 		'amdgpu/amd_dispatch.c',
-		'amdgpu/amd_deadlock_helpers.c'
+		'amdgpu/amd_deadlock_helpers.c',
+		'amdgpu/amd_pci_unplug.c',
+		'amdgpu/xalloc.h'
 	]
 endif
 
diff --git a/tests/amdgpu/amd_pci_unplug.c b/tests/amdgpu/amd_pci_unplug.c
new file mode 100644
index 000000000..7164ee50b
--- /dev/null
+++ b/tests/amdgpu/amd_pci_unplug.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "drmtest.h"
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+#include "lib/amdgpu/amd_PM4.h"
+#include "lib/amdgpu/amd_pci_unplug.h"
+#include "lib/amdgpu/amd_ip_blocks.h"
+
+
+igt_main
+{
+
+	struct amd_pci_unplug_setup setup = {0};
+	struct amd_pci_unplug unplug = {0};
+
+	igt_fixture {
+		setup.minor_version_req = 46;
+	}
+
+	igt_subtest("amdgpu_hotunplug_simple")
+		amdgpu_hotunplug_simple(&setup, &unplug);
+
+	igt_subtest("amdgpu_hotunplug_with_cs")
+		amdgpu_hotunplug_with_cs(&setup, &unplug);
+
+		/*TODO about second GPU*/
+	igt_subtest("amdgpu_hotunplug_with_exported_bo")
+		amdgpu_hotunplug_with_exported_bo(&setup, &unplug);
+
+	igt_subtest("amdgpu_hotunplug_with_exported_fence")
+		amdgpu_hotunplug_with_exported_fence(&setup, &unplug);
+
+	igt_fixture { }
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index 36b83ab1c..48b916925 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -7,6 +7,7 @@ if libdrm_amdgpu.found()
 			  'amd_basic',
 			  'amd_bypass',
 			  'amd_deadlock',
+			  'amd_pci_unplug',
 			  'amd_color',
 			  'amd_cs_nop',
 			  'amd_hotplug',
-- 
2.25.1

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [igt-dev] ✗ Fi.CI.BUILD: failure for series starting with [1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper
  2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
                   ` (5 preceding siblings ...)
  2022-10-18  3:00 ` [igt-dev] [PATCH 7/7] amdgpu/tests: PCI unplug 4 tests for different scenario vitaly.prosyak
@ 2022-10-18  3:13 ` Patchwork
  6 siblings, 0 replies; 10+ messages in thread
From: Patchwork @ 2022-10-18  3:13 UTC (permalink / raw)
  To: vitaly.prosyak; +Cc: igt-dev

== Series Details ==

Series: series starting with [1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper
URL   : https://patchwork.freedesktop.org/series/109798/
State : failure

== Summary ==

Applying: lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper
Applying: lib/amdgpu: add deadlock helpers
Applying: tests/amdgpu: add deadlock test for gfx, compute and sdma
Applying: lib/amdgpu: add memory and reg.access helper
Using index info to reconstruct a base tree...
M	lib/amdgpu/amd_PM4.h
Falling back to patching base and 3-way merge...
Auto-merging lib/amdgpu/amd_PM4.h
CONFLICT (content): Merge conflict in lib/amdgpu/amd_PM4.h
Patch failed at 0004 lib/amdgpu: add memory and reg.access helper
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers
  2022-10-18  3:00 ` [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers vitaly.prosyak
@ 2022-10-18  7:30   ` Kamil Konieczny
  2022-10-18 23:16     ` vitaly prosyak
  0 siblings, 1 reply; 10+ messages in thread
From: Kamil Konieczny @ 2022-10-18  7:30 UTC (permalink / raw)
  To: igt-dev; +Cc: pierre-eric.pelloux-prayer, marek.olsak

Hi Vitaly,

On 2022-10-17 at 23:00:50 -0400, vitaly.prosyak@amd.com wrote:
> From: Vitaly Prosyak <vitaly.prosyak@amd.com>
> 
> To validate amdgpu reset functionality.
--^
To validate
or
Helpers to validate

You introduce here new lib so maybe describe what functional
changes you added, are they only for deadlock detection ?

Read some helpfull notes about writing commit message and
descriptions:

https://www.ozlabs.org/~akpm/stuff/tpp.txt

https://kernelnewbies.org/PatchPhilosophy

https://elixir.bootlin.com/linux/latest/source/Documentation/process/submitting-patches.rst

https://www.kernel.org/doc/html/latest/process/submitting-patches.html#the-canonical-patch-format

> 
> Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
> ---
>  lib/amdgpu/amd_deadlock_helpers.c | 260 ++++++++++++++++++++++++++++++
>  lib/amdgpu/amd_deadlock_helpers.h |  34 ++++
>  lib/meson.build                   |   3 +-
>  3 files changed, 296 insertions(+), 1 deletion(-)
>  create mode 100644 lib/amdgpu/amd_deadlock_helpers.c
>  create mode 100644 lib/amdgpu/amd_deadlock_helpers.h
> 
> diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
> new file mode 100644
> index 000000000..c6528c6ad
> --- /dev/null
> +++ b/lib/amdgpu/amd_deadlock_helpers.c
> @@ -0,0 +1,260 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *  *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.

Use SPDX licence here, see .h and .c files in lib/ which uses
them.

> + *
> + *
> + */
> +#include <amdgpu.h>
> +#include "amdgpu_drm.h"
> +#include "amd_PM4.h"
> +#include "amd_sdma.h"
> +#include <unistd.h>
> +#include <pthread.h>
> +#include "amd_memory.h"
> +#include "amd_deadlock_helpers.h"
> +#include "amd_ip_blocks.h"
> +
> +static int use_uc_mtype = 0;
> +
> +static void *write_mem_address(void *data)
> +{
> +#define WRITE_MEM_ADDRESS_DELAY_MS 100
> +
> +	int i;
> +	uint32_t * ib_result_cpu = data;
> +
> +	/* useconds_t range is [0, 1,000,000] so use loop for waits > 1s */
> +	for (i = 0; i < WRITE_MEM_ADDRESS_DELAY_MS; i++)
> +		usleep(1000);
> +
> +	ib_result_cpu[256] = 0x1;
> +	/* printf("ib_result_cpu[256] = 0x1;\n"); */
> +
> +	return 0;
> +}
> +
> +void
> +amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, bool with_thread)
> +{
> +	amdgpu_context_handle context_handle;
> +	amdgpu_bo_handle ib_result_handle;
> +	void *ib_result_cpu;
> +	uint32_t *ib_result_cpu2;
> +	uint64_t ib_result_mc_address;
> +	struct amdgpu_cs_request ibs_request;
> +	struct amdgpu_cs_ib_info ib_info;
> +	struct amdgpu_cs_fence fence_status;
> +	uint32_t expired;
> +	int i, r;
> +	amdgpu_bo_list_handle bo_list;
> +	amdgpu_va_handle va_handle;
> +	int bo_cmd_size = 4096;
> +	pthread_t stress_thread = {0};
> +	struct amdgpu_cmd_base * base_cmd = get_cmd_base();
> +
> +	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_bo_alloc_and_map_raw(device_handle, bo_cmd_size, bo_cmd_size,
> +			AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
> +						    &ib_result_handle, &ib_result_cpu,
> +						    &ib_result_mc_address, &va_handle);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
> +			       &bo_list);
> +	igt_assert_eq(r, 0);
> +
> +	base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
> +
> +	if (with_thread) {
> +			r = pthread_create(&stress_thread, NULL, &write_mem_address, ib_result_cpu);
> +			igt_assert_eq(r, 0);
> +	}
> +
> +	base_cmd->emit(base_cmd, PACKET3(PACKET3_WAIT_REG_MEM, 5));
> +
> +	base_cmd->emit(base_cmd, (WAIT_REG_MEM_MEM_SPACE(1)  /* memory */|
> +							  WAIT_REG_MEM_FUNCTION(4) /* != */|
> +							  WAIT_REG_MEM_ENGINE(0)/* me */));
> +
> +	base_cmd->emit(base_cmd, (ib_result_mc_address + 256*4) & 0xfffffffc);
> +	base_cmd->emit(base_cmd, ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff);
> +
> +	base_cmd->emit(base_cmd, 0);/* reference value */
> +	base_cmd->emit(base_cmd, 0xffffffff); /* and mask */
> +	base_cmd->emit(base_cmd, 0x00000004);/* poll interval */
> +	base_cmd->emit_repeat(base_cmd, 0xffff1000, 16 - base_cmd->cdw);
> +
> +
> +	ib_result_cpu2 = ib_result_cpu;
> +	ib_result_cpu2[256] = 0x0; /* the memory we wait on to change */
> +
> +
> +
> +	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
> +	ib_info.ib_mc_address = ib_result_mc_address;
> +	ib_info.size = base_cmd->cdw;
> +
> +	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
> +	ibs_request.ip_type = ip_type;
> +	ibs_request.ring = 0;
> +	ibs_request.number_of_ibs = 1;
> +	ibs_request.ibs = &ib_info;
> +	ibs_request.resources = bo_list;
> +	ibs_request.fence_info.handle = NULL;
> +	for (i = 0; i < 200; i++) {
> +		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
> +		if (r != 0 && r != -ECANCELED)
> +			igt_assert(0);
> +	}
> +
> +	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
> +	fence_status.context = context_handle;
> +	fence_status.ip_type = ip_type;
> +	fence_status.ip_instance = 0;
> +	fence_status.ring = 0;
> +	fence_status.fence = ibs_request.seq_no;
> +
> +	r = amdgpu_cs_query_fence_status(&fence_status,
> +			AMDGPU_TIMEOUT_INFINITE,0, &expired);
> +	if (r != 0 && r != -ECANCELED)
> +		igt_assert(0);
> +
> +	if (with_thread)
> +		pthread_join(stress_thread, NULL);
> +
> +	amdgpu_bo_list_destroy(bo_list);
> +
> +	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
> +				     ib_result_mc_address, 4096);
> +
> +	amdgpu_cs_ctx_free(context_handle);
> +
> +	free_cmd_base(base_cmd);
> +}
> +
> +void
> +amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread)
> +{
> +	amdgpu_context_handle context_handle;
> +	amdgpu_bo_handle ib_result_handle;
> +	void *ib_result_cpu;
> +	uint32_t *ib_result_cpu2;
> +	uint64_t ib_result_mc_address;
> +	struct amdgpu_cs_request ibs_request;
> +	struct amdgpu_cs_ib_info ib_info;
> +	struct amdgpu_cs_fence fence_status;
> +	uint32_t expired;
> +	int i, r;
> +	amdgpu_bo_list_handle bo_list;
> +	amdgpu_va_handle va_handle;
> +	struct drm_amdgpu_info_hw_ip info;
> +	uint32_t ring_id;
> +	pthread_t stress_thread = {0};
> +	int bo_cmd_size = 4096;
> +	struct amdgpu_cmd_base * base_cmd;
> +
> +	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_DMA, 0, &info);
> +	igt_assert_eq(r, 0);
> +
> +	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
> +	igt_assert_eq(r, 0);
> +
> +	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
> +
> +		r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
> +				AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
> +							    &ib_result_handle, &ib_result_cpu,
> +							    &ib_result_mc_address, &va_handle);
> +		igt_assert_eq(r, 0);
> +
> +		if (with_thread) {
> +			r = pthread_create(&stress_thread, NULL, &write_mem_address, ib_result_cpu);
> +			igt_assert_eq(r, 0);
> +		}
> +
> +		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
> +				       &bo_list);
> +		igt_assert_eq(r, 0);
> +
> +		base_cmd = get_cmd_base();
> +		base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
> +
> +		base_cmd->emit(base_cmd, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
> +					(0 << 26) | /* WAIT_REG_MEM */(4 << 28) | /* != */(1 << 31)
> +					/* memory */);
> +
> +		base_cmd->emit(base_cmd, (ib_result_mc_address + 256*4) & 0xfffffffc);
> +
> +		base_cmd->emit(base_cmd, ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff);
> +
> +		base_cmd->emit(base_cmd, 0); /* reference value */
> +		base_cmd->emit(base_cmd, 0xffffffff); /* and mask */
> +
> +		base_cmd->emit(base_cmd,  4 | /* poll interval */(0xfff << 16)/* retry count */);
> +
> +		base_cmd->emit_repeat(base_cmd, 0, 16 - base_cmd->cdw);
> +
> +		ib_result_cpu2 = ib_result_cpu;
> +		ib_result_cpu2[256] = 0x0; /* the memory we wait on to change */
> +
> +		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
> +		ib_info.ib_mc_address = ib_result_mc_address;
> +		ib_info.size = base_cmd->cdw;
> +
> +		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
> +		ibs_request.ip_type = AMDGPU_HW_IP_DMA;
> +		ibs_request.ring = ring_id;
> +		ibs_request.number_of_ibs = 1;
> +		ibs_request.ibs = &ib_info;
> +		ibs_request.resources = bo_list;
> +		ibs_request.fence_info.handle = NULL;
> +
> +		for (i = 0; i < 200; i++) {
> +			r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
> +			if (r != 0 && r != -ECANCELED)
> +				igt_assert(0);
> +		}
> +
> +		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
> +		fence_status.context = context_handle;
> +		fence_status.ip_type = AMDGPU_HW_IP_DMA;
> +		fence_status.ip_instance = 0;
> +		fence_status.ring = ring_id;
> +		fence_status.fence = ibs_request.seq_no;
> +
> +		r = amdgpu_cs_query_fence_status(&fence_status,
> +				AMDGPU_TIMEOUT_INFINITE,0, &expired);
> +		if (r != 0 && r != -ECANCELED)
> +			igt_assert(0);
> +
> +		if (with_thread)
> +			pthread_join(stress_thread, NULL);
> +
> +		r = amdgpu_bo_list_destroy(bo_list);
> +		igt_assert_eq(r, 0);
> +
> +		amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
> +					     ib_result_mc_address, 4096);
> +		free_cmd_base(base_cmd);
> +	}
> +	amdgpu_cs_ctx_free(context_handle);
> +}
> diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
> new file mode 100644
> index 000000000..91dcf8bb2
> --- /dev/null
> +++ b/lib/amdgpu/amd_deadlock_helpers.h
> @@ -0,0 +1,34 @@
> +/*
> + * Copyright 2022 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + *
> + */

Same here, use SPDX.

Regards,
Kamil

> +#ifndef __AMD_DEADLOCK_HELPERS_H__
> +#define __AMD_DEADLOCK_HELPERS_H__
> +
> +void
> +amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, bool with_thread);
> +
> +void
> +amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread);
> +
> +#endif
> +
> diff --git a/lib/meson.build b/lib/meson.build
> index 8d6c8a244..47f9fdab4 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -138,7 +138,8 @@ if libdrm_amdgpu.found()
>  		'amdgpu/amd_gfx_v8_0.c',
>  		'amdgpu/amd_gfx_v9_0.c',
>  		'amdgpu/amd_dispatch_helpers.c',
> -		'amdgpu/amd_dispatch.c'
> +		'amdgpu/amd_dispatch.c',
> +		'amdgpu/amd_deadlock_helpers.c'
>  	]
>  endif
>  
> -- 
> 2.25.1
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers
  2022-10-18  7:30   ` Kamil Konieczny
@ 2022-10-18 23:16     ` vitaly prosyak
  0 siblings, 0 replies; 10+ messages in thread
From: vitaly prosyak @ 2022-10-18 23:16 UTC (permalink / raw)
  To: Kamil Konieczny, igt-dev, Vitaly Prosyak,
	pierre-eric.pelloux-prayer, marek.olsak

Hi Kamil

Thanks for the comments

On 2022-10-18 03:30, Kamil Konieczny wrote:
> Hi Vitaly,
>
> On 2022-10-17 at 23:00:50 -0400, vitaly.prosyak@amd.com wrote:
>> From: Vitaly Prosyak <vitaly.prosyak@amd.com>
>>
>> To validate amdgpu reset functionality.
> --^
> To validate
> or
> Helpers to validate
>
> You introduce here new lib so maybe describe what functional
> changes you added, are they only for deadlock detection ?
The directory lib/amdgpu was introduced before to handle

ASIC-specific registers also other stuff which could be shared between 
different tests and for misc. helpers.

I will rework these commits based on suggestions from Pierre-Eric.

It is a test for GPU reset functionality.

>
> Read some helpfull notes about writing commit message and
> descriptions:
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.ozlabs.org%2F~akpm%2Fstuff%2Ftpp.txt&amp;data=05%7C01%7Cvitaly.prosyak%40amd.com%7C710a8503e20e4df2244c08dab0dab564%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638016750638710920%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=ud3r3QbemOH1wYiQoEJJ1EayuI4cyQ2rv40tyl7lwRc%3D&amp;reserved=0
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fkernelnewbies.org%2FPatchPhilosophy&amp;data=05%7C01%7Cvitaly.prosyak%40amd.com%7C710a8503e20e4df2244c08dab0dab564%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638016750638710920%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=jKJecDyRufT2hf61OJBoP58be27m9oZsaUFjAyps08w%3D&amp;reserved=0
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir.bootlin.com%2Flinux%2Flatest%2Fsource%2FDocumentation%2Fprocess%2Fsubmitting-patches.rst&amp;data=05%7C01%7Cvitaly.prosyak%40amd.com%7C710a8503e20e4df2244c08dab0dab564%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638016750638710920%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=9tsIV%2FMRxuhZcxo5rrxWfoPlqOml%2FwJs26%2BiEVCAc48%3D&amp;reserved=0
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.kernel.org%2Fdoc%2Fhtml%2Flatest%2Fprocess%2Fsubmitting-patches.html%23the-canonical-patch-format&amp;data=05%7C01%7Cvitaly.prosyak%40amd.com%7C710a8503e20e4df2244c08dab0dab564%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638016750638710920%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=p2usMKcqlntRFgsrl8bIfw90JRca7dYRvyR4%2FSM5nEY%3D&amp;reserved=0
Thanks
>
>> Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
>> ---
>>   lib/amdgpu/amd_deadlock_helpers.c | 260 ++++++++++++++++++++++++++++++
>>   lib/amdgpu/amd_deadlock_helpers.h |  34 ++++
>>   lib/meson.build                   |   3 +-
>>   3 files changed, 296 insertions(+), 1 deletion(-)
>>   create mode 100644 lib/amdgpu/amd_deadlock_helpers.c
>>   create mode 100644 lib/amdgpu/amd_deadlock_helpers.h
>>
>> diff --git a/lib/amdgpu/amd_deadlock_helpers.c b/lib/amdgpu/amd_deadlock_helpers.c
>> new file mode 100644
>> index 000000000..c6528c6ad
>> --- /dev/null
>> +++ b/lib/amdgpu/amd_deadlock_helpers.c
>> @@ -0,0 +1,260 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *  *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
> Use SPDX licence here, see .h and .c files in lib/ which uses
> them.
Will do.
>
>> + *
>> + *
>> + */
>> +#include <amdgpu.h>
>> +#include "amdgpu_drm.h"
>> +#include "amd_PM4.h"
>> +#include "amd_sdma.h"
>> +#include <unistd.h>
>> +#include <pthread.h>
>> +#include "amd_memory.h"
>> +#include "amd_deadlock_helpers.h"
>> +#include "amd_ip_blocks.h"
>> +
>> +static int use_uc_mtype = 0;
>> +
>> +static void *write_mem_address(void *data)
>> +{
>> +#define WRITE_MEM_ADDRESS_DELAY_MS 100
>> +
>> +	int i;
>> +	uint32_t * ib_result_cpu = data;
>> +
>> +	/* useconds_t range is [0, 1,000,000] so use loop for waits > 1s */
>> +	for (i = 0; i < WRITE_MEM_ADDRESS_DELAY_MS; i++)
>> +		usleep(1000);
>> +
>> +	ib_result_cpu[256] = 0x1;
>> +	/* printf("ib_result_cpu[256] = 0x1;\n"); */
>> +
>> +	return 0;
>> +}
>> +
>> +void
>> +amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, bool with_thread)
>> +{
>> +	amdgpu_context_handle context_handle;
>> +	amdgpu_bo_handle ib_result_handle;
>> +	void *ib_result_cpu;
>> +	uint32_t *ib_result_cpu2;
>> +	uint64_t ib_result_mc_address;
>> +	struct amdgpu_cs_request ibs_request;
>> +	struct amdgpu_cs_ib_info ib_info;
>> +	struct amdgpu_cs_fence fence_status;
>> +	uint32_t expired;
>> +	int i, r;
>> +	amdgpu_bo_list_handle bo_list;
>> +	amdgpu_va_handle va_handle;
>> +	int bo_cmd_size = 4096;
>> +	pthread_t stress_thread = {0};
>> +	struct amdgpu_cmd_base * base_cmd = get_cmd_base();
>> +
>> +	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
>> +	igt_assert_eq(r, 0);
>> +
>> +	r = amdgpu_bo_alloc_and_map_raw(device_handle, bo_cmd_size, bo_cmd_size,
>> +			AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
>> +						    &ib_result_handle, &ib_result_cpu,
>> +						    &ib_result_mc_address, &va_handle);
>> +	igt_assert_eq(r, 0);
>> +
>> +	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
>> +			       &bo_list);
>> +	igt_assert_eq(r, 0);
>> +
>> +	base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
>> +
>> +	if (with_thread) {
>> +			r = pthread_create(&stress_thread, NULL, &write_mem_address, ib_result_cpu);
>> +			igt_assert_eq(r, 0);
>> +	}
>> +
>> +	base_cmd->emit(base_cmd, PACKET3(PACKET3_WAIT_REG_MEM, 5));
>> +
>> +	base_cmd->emit(base_cmd, (WAIT_REG_MEM_MEM_SPACE(1)  /* memory */|
>> +							  WAIT_REG_MEM_FUNCTION(4) /* != */|
>> +							  WAIT_REG_MEM_ENGINE(0)/* me */));
>> +
>> +	base_cmd->emit(base_cmd, (ib_result_mc_address + 256*4) & 0xfffffffc);
>> +	base_cmd->emit(base_cmd, ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff);
>> +
>> +	base_cmd->emit(base_cmd, 0);/* reference value */
>> +	base_cmd->emit(base_cmd, 0xffffffff); /* and mask */
>> +	base_cmd->emit(base_cmd, 0x00000004);/* poll interval */
>> +	base_cmd->emit_repeat(base_cmd, 0xffff1000, 16 - base_cmd->cdw);
>> +
>> +
>> +	ib_result_cpu2 = ib_result_cpu;
>> +	ib_result_cpu2[256] = 0x0; /* the memory we wait on to change */
>> +
>> +
>> +
>> +	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
>> +	ib_info.ib_mc_address = ib_result_mc_address;
>> +	ib_info.size = base_cmd->cdw;
>> +
>> +	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
>> +	ibs_request.ip_type = ip_type;
>> +	ibs_request.ring = 0;
>> +	ibs_request.number_of_ibs = 1;
>> +	ibs_request.ibs = &ib_info;
>> +	ibs_request.resources = bo_list;
>> +	ibs_request.fence_info.handle = NULL;
>> +	for (i = 0; i < 200; i++) {
>> +		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
>> +		if (r != 0 && r != -ECANCELED)
>> +			igt_assert(0);
>> +	}
>> +
>> +	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
>> +	fence_status.context = context_handle;
>> +	fence_status.ip_type = ip_type;
>> +	fence_status.ip_instance = 0;
>> +	fence_status.ring = 0;
>> +	fence_status.fence = ibs_request.seq_no;
>> +
>> +	r = amdgpu_cs_query_fence_status(&fence_status,
>> +			AMDGPU_TIMEOUT_INFINITE,0, &expired);
>> +	if (r != 0 && r != -ECANCELED)
>> +		igt_assert(0);
>> +
>> +	if (with_thread)
>> +		pthread_join(stress_thread, NULL);
>> +
>> +	amdgpu_bo_list_destroy(bo_list);
>> +
>> +	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
>> +				     ib_result_mc_address, 4096);
>> +
>> +	amdgpu_cs_ctx_free(context_handle);
>> +
>> +	free_cmd_base(base_cmd);
>> +}
>> +
>> +void
>> +amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread)
>> +{
>> +	amdgpu_context_handle context_handle;
>> +	amdgpu_bo_handle ib_result_handle;
>> +	void *ib_result_cpu;
>> +	uint32_t *ib_result_cpu2;
>> +	uint64_t ib_result_mc_address;
>> +	struct amdgpu_cs_request ibs_request;
>> +	struct amdgpu_cs_ib_info ib_info;
>> +	struct amdgpu_cs_fence fence_status;
>> +	uint32_t expired;
>> +	int i, r;
>> +	amdgpu_bo_list_handle bo_list;
>> +	amdgpu_va_handle va_handle;
>> +	struct drm_amdgpu_info_hw_ip info;
>> +	uint32_t ring_id;
>> +	pthread_t stress_thread = {0};
>> +	int bo_cmd_size = 4096;
>> +	struct amdgpu_cmd_base * base_cmd;
>> +
>> +	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_DMA, 0, &info);
>> +	igt_assert_eq(r, 0);
>> +
>> +	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
>> +	igt_assert_eq(r, 0);
>> +
>> +	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
>> +
>> +		r = amdgpu_bo_alloc_and_map_raw(device_handle, 4096, 4096,
>> +				AMDGPU_GEM_DOMAIN_GTT, 0, use_uc_mtype ? AMDGPU_VM_MTYPE_UC : 0,
>> +							    &ib_result_handle, &ib_result_cpu,
>> +							    &ib_result_mc_address, &va_handle);
>> +		igt_assert_eq(r, 0);
>> +
>> +		if (with_thread) {
>> +			r = pthread_create(&stress_thread, NULL, &write_mem_address, ib_result_cpu);
>> +			igt_assert_eq(r, 0);
>> +		}
>> +
>> +		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
>> +				       &bo_list);
>> +		igt_assert_eq(r, 0);
>> +
>> +		base_cmd = get_cmd_base();
>> +		base_cmd->attach_buf(base_cmd, ib_result_cpu, bo_cmd_size);
>> +
>> +		base_cmd->emit(base_cmd, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
>> +					(0 << 26) | /* WAIT_REG_MEM */(4 << 28) | /* != */(1 << 31)
>> +					/* memory */);
>> +
>> +		base_cmd->emit(base_cmd, (ib_result_mc_address + 256*4) & 0xfffffffc);
>> +
>> +		base_cmd->emit(base_cmd, ((ib_result_mc_address + 256*4) >> 32) & 0xffffffff);
>> +
>> +		base_cmd->emit(base_cmd, 0); /* reference value */
>> +		base_cmd->emit(base_cmd, 0xffffffff); /* and mask */
>> +
>> +		base_cmd->emit(base_cmd,  4 | /* poll interval */(0xfff << 16)/* retry count */);
>> +
>> +		base_cmd->emit_repeat(base_cmd, 0, 16 - base_cmd->cdw);
>> +
>> +		ib_result_cpu2 = ib_result_cpu;
>> +		ib_result_cpu2[256] = 0x0; /* the memory we wait on to change */
>> +
>> +		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
>> +		ib_info.ib_mc_address = ib_result_mc_address;
>> +		ib_info.size = base_cmd->cdw;
>> +
>> +		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
>> +		ibs_request.ip_type = AMDGPU_HW_IP_DMA;
>> +		ibs_request.ring = ring_id;
>> +		ibs_request.number_of_ibs = 1;
>> +		ibs_request.ibs = &ib_info;
>> +		ibs_request.resources = bo_list;
>> +		ibs_request.fence_info.handle = NULL;
>> +
>> +		for (i = 0; i < 200; i++) {
>> +			r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
>> +			if (r != 0 && r != -ECANCELED)
>> +				igt_assert(0);
>> +		}
>> +
>> +		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
>> +		fence_status.context = context_handle;
>> +		fence_status.ip_type = AMDGPU_HW_IP_DMA;
>> +		fence_status.ip_instance = 0;
>> +		fence_status.ring = ring_id;
>> +		fence_status.fence = ibs_request.seq_no;
>> +
>> +		r = amdgpu_cs_query_fence_status(&fence_status,
>> +				AMDGPU_TIMEOUT_INFINITE,0, &expired);
>> +		if (r != 0 && r != -ECANCELED)
>> +			igt_assert(0);
>> +
>> +		if (with_thread)
>> +			pthread_join(stress_thread, NULL);
>> +
>> +		r = amdgpu_bo_list_destroy(bo_list);
>> +		igt_assert_eq(r, 0);
>> +
>> +		amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
>> +					     ib_result_mc_address, 4096);
>> +		free_cmd_base(base_cmd);
>> +	}
>> +	amdgpu_cs_ctx_free(context_handle);
>> +}
>> diff --git a/lib/amdgpu/amd_deadlock_helpers.h b/lib/amdgpu/amd_deadlock_helpers.h
>> new file mode 100644
>> index 000000000..91dcf8bb2
>> --- /dev/null
>> +++ b/lib/amdgpu/amd_deadlock_helpers.h
>> @@ -0,0 +1,34 @@
>> +/*
>> + * Copyright 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + *
>> + */
> Same here, use SPDX.
Will do.
>
> Regards,
> Kamil
>
>> +#ifndef __AMD_DEADLOCK_HELPERS_H__
>> +#define __AMD_DEADLOCK_HELPERS_H__
>> +
>> +void
>> +amdgpu_deadlock_helper(amdgpu_device_handle device_handle, unsigned ip_type, bool with_thread);
>> +
>> +void
>> +amdgpu_deadlock_sdma(amdgpu_device_handle device_handle, bool with_thread);
>> +
>> +#endif
>> +
>> diff --git a/lib/meson.build b/lib/meson.build
>> index 8d6c8a244..47f9fdab4 100644
>> --- a/lib/meson.build
>> +++ b/lib/meson.build
>> @@ -138,7 +138,8 @@ if libdrm_amdgpu.found()
>>   		'amdgpu/amd_gfx_v8_0.c',
>>   		'amdgpu/amd_gfx_v9_0.c',
>>   		'amdgpu/amd_dispatch_helpers.c',
>> -		'amdgpu/amd_dispatch.c'
>> +		'amdgpu/amd_dispatch.c',
>> +		'amdgpu/amd_deadlock_helpers.c'
>>   	]
>>   endif
>>   
>> -- 
>> 2.25.1
>>
Thanks Vitaly

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2022-10-18 23:17 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-18  3:00 [igt-dev] [PATCH 1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper vitaly.prosyak
2022-10-18  3:00 ` [igt-dev] [PATCH 2/7] lib/amdgpu: add deadlock helpers vitaly.prosyak
2022-10-18  7:30   ` Kamil Konieczny
2022-10-18 23:16     ` vitaly prosyak
2022-10-18  3:00 ` [igt-dev] [PATCH 3/7] tests/amdgpu: add deadlock test for gfx, compute and sdma vitaly.prosyak
2022-10-18  3:00 ` [igt-dev] [PATCH 4/7] lib/amdgpu: add memory and reg.access helper vitaly.prosyak
2022-10-18  3:00 ` [igt-dev] [PATCH 5/7] tests/amdgpu: add tests for invalid memory and register access vitaly.prosyak
2022-10-18  3:00 ` [igt-dev] [PATCH 6/7] tests/amdgpu: reuse predefined const GFX_COMPUTE_NOP vitaly.prosyak
2022-10-18  3:00 ` [igt-dev] [PATCH 7/7] amdgpu/tests: PCI unplug 4 tests for different scenario vitaly.prosyak
2022-10-18  3:13 ` [igt-dev] ✗ Fi.CI.BUILD: failure for series starting with [1/7] lib/amdgpu: add amdgpu_bo_alloc_and_map_raw helper Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.