All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs
@ 2021-05-19  5:45 Felix Kuehling
  2021-05-19  5:45 ` [PATCH 2/3] drm/amdgpu: Use preemptible domain for KFD Felix Kuehling
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Felix Kuehling @ 2021-05-19  5:45 UTC (permalink / raw)
  To: amd-gfx

SG BOs such as dmabuf imports and userptr BOs do not consume system
resources directly. Instead they point to resources owned elsewhere.
They typically get evicted by DMABuf move notifiers of MMU notifiers.
If those notifiers don't need to wait for hardware fences (i.e. the SG
BOs are used in a preemptible context), then we don't need to limit
them to the GTT size and we don't need TTM to evict them.

Create a new domain for such preemptible SG BOs that does not impose
artificial size limits and TTM evictions.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   8 +
 .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c   | 190 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  37 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  11 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   3 +-
 include/uapi/drm/amdgpu_drm.h                 |   7 +-
 8 files changed, 258 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6331a11299d0..6cf0fe871d6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
-	amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
+	amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
+	amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
+	amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
+	amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
 	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
 	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 311bcdc59eda..280cc0c0a9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -246,6 +246,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
 		return -EINVAL;
 
+	/* preemptible domain not supported by current CS API */
+	if (args->in.domains & AMDGPU_GEM_DOMAIN_PREEMPT)
+		return -EINVAL;
+
 	if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) {
 		DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is disabled\n");
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 745fcf3ea450..5b538e746afa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -194,6 +194,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		c++;
 	}
 
+	if (domain & AMDGPU_GEM_DOMAIN_PREEMPT) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].mem_type = AMDGPU_PL_PREEMPT;
+		places[c].flags = 0;
+		c++;
+	}
+
 	if (!c) {
 		places[c].fpfn = 0;
 		places[c].lpfn = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
new file mode 100644
index 000000000000..b4185dc3c394
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2016-2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König, Felix Kuehling
+ */
+
+#include "amdgpu.h"
+
+static inline struct amdgpu_preempt_mgr *
+to_preempt_mgr(struct ttm_resource_manager *man)
+{
+	return container_of(man, struct amdgpu_preempt_mgr, manager);
+}
+
+/**
+ * DOC: mem_info_preempt_used
+ *
+ * The amdgpu driver provides a sysfs API for reporting current total amount of
+ * used preemptible memory.
+ * The file mem_info_preempt_used is used for this, and returns the current
+ * used size of the preemptible block, in bytes
+ */
+static ssize_t amdgpu_mem_info_preempt_used_show(struct device *dev,
+						 struct device_attribute *attr,
+						 char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = drm_to_adev(ddev);
+	struct ttm_resource_manager *man;
+
+	man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT);
+	return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man));
+}
+
+static DEVICE_ATTR(mem_info_preempt_used, S_IRUGO,
+	           amdgpu_mem_info_preempt_used_show, NULL);
+
+/**
+ * amdgpu_preempt_mgr_new - allocate a new node
+ *
+ * @man: TTM memory type manager
+ * @tbo: TTM BO we need this range for
+ * @place: placement flags and restrictions
+ * @mem: the resulting mem object
+ *
+ * Dummy, just count the space used without allocating resources or any limit.
+ */
+static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
+				  struct ttm_buffer_object *tbo,
+				  const struct ttm_place *place,
+				  struct ttm_resource *mem)
+{
+	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
+
+	atomic64_add(mem->num_pages, &mgr->used);
+
+	mem->mm_node = NULL;
+	mem->start = AMDGPU_BO_INVALID_OFFSET;
+	return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_del - free ranges
+ *
+ * @man: TTM memory type manager
+ * @mem: TTM memory object
+ *
+ * Free the allocated GTT again.
+ */
+static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
+				   struct ttm_resource *mem)
+{
+	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
+
+	atomic64_sub(mem->num_pages, &mgr->used);
+}
+
+/**
+ * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain
+ *
+ * @man: TTM memory type manager
+ *
+ * Return how many bytes are used in the GTT domain
+ */
+uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man)
+{
+	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
+	s64 result = atomic64_read(&mgr->used);
+
+	return (result > 0 ? result : 0) * PAGE_SIZE;
+}
+
+/**
+ * amdgpu_preempt_mgr_debug - dump VRAM table
+ *
+ * @man: TTM memory type manager
+ * @printer: DRM printer to use
+ *
+ * Dump the table content using printk.
+ */
+static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man,
+				     struct drm_printer *printer)
+{
+	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
+
+	drm_printf(printer, "man size:%llu pages, preempt used:%lld pages\n",
+		   man->size, (u64)atomic64_read(&mgr->used));
+}
+
+static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
+	.alloc = amdgpu_preempt_mgr_new,
+	.free = amdgpu_preempt_mgr_del,
+	.debug = amdgpu_preempt_mgr_debug
+};
+
+/**
+ * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
+	struct ttm_resource_manager *man = &mgr->manager;
+	int ret;
+
+	man->use_tt = true;
+	man->func = &amdgpu_preempt_mgr_func;
+
+	ttm_resource_manager_init(man, (1 << 30));
+
+	atomic64_set(&mgr->used, 0);
+
+	ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used);
+	if (ret) {
+		DRM_ERROR("Failed to create device file mem_info_preempt_used\n");
+		return ret;
+	}
+
+	ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT,
+			       &mgr->manager);
+	ttm_resource_manager_set_used(man, true);
+	return 0;
+}
+
+/**
+ * amdgpu_preempt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
+{
+	struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
+	struct ttm_resource_manager *man = &mgr->manager;
+	int ret;
+
+	ttm_resource_manager_set_used(man, false);
+
+	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+	if (ret)
+		return;
+
+	device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
+
+	ttm_resource_manager_cleanup(man);
+	ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index d856b204cd12..c6eb6f1c7082 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -158,6 +158,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		}
 		break;
 	case TTM_PL_TT:
+	case AMDGPU_PL_PREEMPT:
 	default:
 		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 		break;
@@ -217,6 +218,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 
 	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
 	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
+	BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
 
 	/* Map only what can't be accessed directly */
 	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
@@ -480,7 +482,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 	struct ttm_resource *old_mem = &bo->mem;
 	int r;
 
-	if (new_mem->mem_type == TTM_PL_TT) {
+	if (new_mem->mem_type == TTM_PL_TT ||
+	    new_mem->mem_type == AMDGPU_PL_PREEMPT) {
 		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
 		if (r)
 			return r;
@@ -498,11 +501,13 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 		goto out;
 	}
 	if (old_mem->mem_type == TTM_PL_SYSTEM &&
-	    new_mem->mem_type == TTM_PL_TT) {
+	    (new_mem->mem_type == TTM_PL_TT ||
+	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
 		ttm_bo_move_null(bo, new_mem);
 		goto out;
 	}
-	if (old_mem->mem_type == TTM_PL_TT &&
+	if ((old_mem->mem_type == TTM_PL_TT ||
+	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
 	    new_mem->mem_type == TTM_PL_SYSTEM) {
 		r = ttm_bo_wait_ctx(bo, ctx);
 		if (r)
@@ -587,6 +592,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
 		/* system memory */
 		return 0;
 	case TTM_PL_TT:
+	case AMDGPU_PL_PREEMPT:
 		break;
 	case TTM_PL_VRAM:
 		mem->bus.offset = mem->start << PAGE_SHIFT;
@@ -1293,7 +1299,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
 	if (mem && mem->mem_type != TTM_PL_SYSTEM)
 		flags |= AMDGPU_PTE_VALID;
 
-	if (mem && mem->mem_type == TTM_PL_TT) {
+	if (mem && (mem->mem_type == TTM_PL_TT ||
+		    mem->mem_type == AMDGPU_PL_PREEMPT)) {
 		flags |= AMDGPU_PTE_SYSTEM;
 
 		if (ttm->caching == ttm_cached)
@@ -1367,6 +1374,15 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	}
 
 	switch (bo->mem.mem_type) {
+	case AMDGPU_PL_PREEMPT:
+		/* Preemptible BOs don't own system resources managed by the
+		 * driver (pages, VRAM, GART space). They point to resources
+		 * owned by someone else (e.g. pageable memory in user mode
+		 * or a DMABuf). They are used in a preemptible context so we
+		 * can guarantee no deadlocks and good QoS in case of MMU
+		 * notifiers or DMABuf move notifiers from the resource owner.
+		 */
+		return false;
 	case TTM_PL_TT:
 		if (amdgpu_bo_is_amdgpu_bo(bo) &&
 		    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
@@ -1748,6 +1764,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
 		 (unsigned)(gtt_size / (1024 * 1024)));
 
+	/* Initialize preemptible memory pool */
+	r = amdgpu_preempt_mgr_init(adev);
+	if (r) {
+		DRM_ERROR("Failed initializing PREEMPT heap.\n");
+		return r;
+	}
+
 	/* Initialize various on-chip memory pools */
 	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
 	if (r) {
@@ -1792,6 +1815,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 
 	amdgpu_vram_mgr_fini(adev);
 	amdgpu_gtt_mgr_fini(adev);
+	amdgpu_preempt_mgr_fini(adev);
 	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
 	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
 	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
@@ -1986,6 +2010,11 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		return -EINVAL;
 	}
 
+	if (bo->tbo.mem.mem_type == AMDGPU_PL_PREEMPT) {
+		DRM_ERROR("Trying to clear preemptible memory.\n");
+		return -EINVAL;
+	}
+
 	if (bo->tbo.mem.mem_type == TTM_PL_TT) {
 		r = amdgpu_ttm_alloc_gart(&bo->tbo);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index eb84a69c4b74..2877a924086f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -31,6 +31,7 @@
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
 #define AMDGPU_PL_GWS		(TTM_PL_PRIV + 1)
 #define AMDGPU_PL_OA		(TTM_PL_PRIV + 2)
+#define AMDGPU_PL_PREEMPT	(TTM_PL_PRIV + 3)
 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
@@ -54,6 +55,11 @@ struct amdgpu_gtt_mgr {
 	atomic64_t available;
 };
 
+struct amdgpu_preempt_mgr {
+	struct ttm_resource_manager manager;
+	atomic64_t used;
+};
+
 struct amdgpu_mman {
 	struct ttm_device		bdev;
 	bool				initialized;
@@ -70,6 +76,7 @@ struct amdgpu_mman {
 
 	struct amdgpu_vram_mgr vram_mgr;
 	struct amdgpu_gtt_mgr gtt_mgr;
+	struct amdgpu_preempt_mgr preempt_mgr;
 
 	uint64_t		stolen_vga_size;
 	struct amdgpu_bo	*stolen_vga_memory;
@@ -97,6 +104,8 @@ struct amdgpu_copy_mem {
 
 int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
 void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
+int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
+void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
 int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
 void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
 
@@ -104,6 +113,8 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
 uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
 int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
 
+uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
+
 u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
 int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
 			      struct ttm_resource *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 55991f393481..da155c276c51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1817,7 +1817,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 				bo = gem_to_amdgpu_bo(gobj);
 		}
 		mem = &bo->tbo.mem;
-		if (mem->mem_type == TTM_PL_TT)
+		if (mem->mem_type == TTM_PL_TT ||
+		    mem->mem_type == AMDGPU_PL_PREEMPT)
 			pages_addr = bo->tbo.ttm->dma_address;
 	}
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 9169df7fadee..4e4422bc54ea 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -97,6 +97,9 @@ extern "C" {
  *
  * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
  * for appending data.
+ *
+ * %AMDGPU_GEM_DOMAIN_PREEMPT	Userptrs and DMAbuf imports in preemptible
+ * contexts, which allow memory to be evicted without waiting for fences.
  */
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -104,12 +107,14 @@ extern "C" {
 #define AMDGPU_GEM_DOMAIN_GDS		0x8
 #define AMDGPU_GEM_DOMAIN_GWS		0x10
 #define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_PREEMPT	0x40
 #define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
 					 AMDGPU_GEM_DOMAIN_GTT | \
 					 AMDGPU_GEM_DOMAIN_VRAM | \
 					 AMDGPU_GEM_DOMAIN_GDS | \
 					 AMDGPU_GEM_DOMAIN_GWS | \
-					 AMDGPU_GEM_DOMAIN_OA)
+					 AMDGPU_GEM_DOMAIN_OA | \
+					 AMDGPU_GEM_DOMAIN_PREEMPT)
 
 /* Flag that CPU access will be required for the case of VRAM domain */
 #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] drm/amdgpu: Use preemptible domain for KFD
  2021-05-19  5:45 [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Felix Kuehling
@ 2021-05-19  5:45 ` Felix Kuehling
  2021-05-19  5:51   ` Felix Kuehling
  2021-05-19  5:45 ` [PATCH 3/3] drm/amdgpu: Workaround IOMMU driver bug Felix Kuehling
  2021-05-19 10:04 ` [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Christian König
  2 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2021-05-19  5:45 UTC (permalink / raw)
  To: amd-gfx

KFD userptr BOs and SG BOs used for DMA mappings can be preempted with
CWSR. Therefore we can place them in the preemptible domain and avoid
unwanted evictions.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 928e8d57cd08..2856ca1032db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -511,7 +511,7 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
 	drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
 				       ttm->num_pages);
 
-	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_PREEMPT);
 	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (ret)
 		goto unmap_sg;
@@ -535,7 +535,7 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
 	struct ttm_operation_ctx ctx = {.interruptible = true};
 	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
 
-	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_PREEMPT);
 	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 }
 
@@ -1408,7 +1408,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
 		alloc_flags = 0;
 	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
-		domain = AMDGPU_GEM_DOMAIN_GTT;
+		domain = AMDGPU_GEM_DOMAIN_PREEMPT;
 		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
 		alloc_flags = 0;
 		if (!offset || !*offset)
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] drm/amdgpu: Workaround IOMMU driver bug
  2021-05-19  5:45 [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Felix Kuehling
  2021-05-19  5:45 ` [PATCH 2/3] drm/amdgpu: Use preemptible domain for KFD Felix Kuehling
@ 2021-05-19  5:45 ` Felix Kuehling
  2021-05-19 10:04 ` [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Christian König
  2 siblings, 0 replies; 7+ messages in thread
From: Felix Kuehling @ 2021-05-19  5:45 UTC (permalink / raw)
  To: amd-gfx

The intel IOMMU driver causes kernel oopses or internal errors flooding
kernel log when mapping larger SG tables. Limiting the size of userptr BOs
to 6GB seems to avoid this.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2856ca1032db..a82ca85a2d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1408,6 +1408,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
 		alloc_flags = 0;
 	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+		/* workaround IOMMU driver bug */
+		if (size >= (6ULL << 30))
+			return -ENOMEM;
 		domain = AMDGPU_GEM_DOMAIN_PREEMPT;
 		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
 		alloc_flags = 0;
-- 
2.31.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: Use preemptible domain for KFD
  2021-05-19  5:45 ` [PATCH 2/3] drm/amdgpu: Use preemptible domain for KFD Felix Kuehling
@ 2021-05-19  5:51   ` Felix Kuehling
  0 siblings, 0 replies; 7+ messages in thread
From: Felix Kuehling @ 2021-05-19  5:51 UTC (permalink / raw)
  To: amd-gfx, Pan, Xinhui

Hi Xinhui,

This should be another way to fix the userptr swapout problem you found. 
This is meant to address a long-stadinp problem with userptr BOs getting 
evicted (not swapped), which broke DMA mappings. This patch series is 
meant to completely avoid evictions of userptrs and dma mappings for 
preemptible KFD contexts. That should also avoid the swapout issue.

If there is an underlying problem with the way TTM handles swapout of 
userptrs, that would still affect userptrs in the graphics driver, though.

Regards,
   Felix

On 2021-05-19 1:45 a.m., Felix Kuehling wrote:
> KFD userptr BOs and SG BOs used for DMA mappings can be preempted with
> CWSR. Therefore we can place them in the preemptible domain and avoid
> unwanted evictions.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++---
>   1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 928e8d57cd08..2856ca1032db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -511,7 +511,7 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
>   	drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
>   				       ttm->num_pages);
>   
> -	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> +	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_PREEMPT);
>   	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>   	if (ret)
>   		goto unmap_sg;
> @@ -535,7 +535,7 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
>   	struct ttm_operation_ctx ctx = {.interruptible = true};
>   	struct amdgpu_bo *bo = attachment->bo_va->base.bo;
>   
> -	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> +	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_PREEMPT);
>   	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>   }
>   
> @@ -1408,7 +1408,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
>   		alloc_flags = 0;
>   	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
> -		domain = AMDGPU_GEM_DOMAIN_GTT;
> +		domain = AMDGPU_GEM_DOMAIN_PREEMPT;
>   		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
>   		alloc_flags = 0;
>   		if (!offset || !*offset)
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs
  2021-05-19  5:45 [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Felix Kuehling
  2021-05-19  5:45 ` [PATCH 2/3] drm/amdgpu: Use preemptible domain for KFD Felix Kuehling
  2021-05-19  5:45 ` [PATCH 3/3] drm/amdgpu: Workaround IOMMU driver bug Felix Kuehling
@ 2021-05-19 10:04 ` Christian König
  2021-05-19 15:22   ` Felix Kuehling
  2 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2021-05-19 10:04 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx

Am 19.05.21 um 07:45 schrieb Felix Kuehling:
> SG BOs such as dmabuf imports and userptr BOs do not consume system
> resources directly. Instead they point to resources owned elsewhere.
> They typically get evicted by DMABuf move notifiers of MMU notifiers.
> If those notifiers don't need to wait for hardware fences (i.e. the SG
> BOs are used in a preemptible context), then we don't need to limit
> them to the GTT size and we don't need TTM to evict them.
>
> Create a new domain for such preemptible SG BOs that does not impose
> artificial size limits and TTM evictions.

Please don't create an GEM domain for this. This has just to much 
potential to be abused by userspace.

The kernel is the only place where we can decide if the BO is 
preemptible or not.

Christian.

>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Makefile           |   7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   4 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   8 +
>   .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c   | 190 ++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  37 +++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  11 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   3 +-
>   include/uapi/drm/amdgpu_drm.h                 |   7 +-
>   8 files changed, 258 insertions(+), 9 deletions(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 6331a11299d0..6cf0fe871d6c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>   	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>   	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>   	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
> -	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
> -	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
> -	amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
> +	amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
> +	amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
> +	amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
> +	amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
>   	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
>   	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>   	amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 311bcdc59eda..280cc0c0a9b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -246,6 +246,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
>   	if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
>   		return -EINVAL;
>   
> +	/* preemptible domain not supported by current CS API */
> +	if (args->in.domains & AMDGPU_GEM_DOMAIN_PREEMPT)
> +		return -EINVAL;
> +
>   	if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) {
>   		DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is disabled\n");
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 745fcf3ea450..5b538e746afa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -194,6 +194,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
>   		c++;
>   	}
>   
> +	if (domain & AMDGPU_GEM_DOMAIN_PREEMPT) {
> +		places[c].fpfn = 0;
> +		places[c].lpfn = 0;
> +		places[c].mem_type = AMDGPU_PL_PREEMPT;
> +		places[c].flags = 0;
> +		c++;
> +	}
> +
>   	if (!c) {
>   		places[c].fpfn = 0;
>   		places[c].lpfn = 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> new file mode 100644
> index 000000000000..b4185dc3c394
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
> @@ -0,0 +1,190 @@
> +/*
> + * Copyright 2016-2021 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Christian König, Felix Kuehling
> + */
> +
> +#include "amdgpu.h"
> +
> +static inline struct amdgpu_preempt_mgr *
> +to_preempt_mgr(struct ttm_resource_manager *man)
> +{
> +	return container_of(man, struct amdgpu_preempt_mgr, manager);
> +}
> +
> +/**
> + * DOC: mem_info_preempt_used
> + *
> + * The amdgpu driver provides a sysfs API for reporting current total amount of
> + * used preemptible memory.
> + * The file mem_info_preempt_used is used for this, and returns the current
> + * used size of the preemptible block, in bytes
> + */
> +static ssize_t amdgpu_mem_info_preempt_used_show(struct device *dev,
> +						 struct device_attribute *attr,
> +						 char *buf)
> +{
> +	struct drm_device *ddev = dev_get_drvdata(dev);
> +	struct amdgpu_device *adev = drm_to_adev(ddev);
> +	struct ttm_resource_manager *man;
> +
> +	man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT);
> +	return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man));
> +}
> +
> +static DEVICE_ATTR(mem_info_preempt_used, S_IRUGO,
> +	           amdgpu_mem_info_preempt_used_show, NULL);
> +
> +/**
> + * amdgpu_preempt_mgr_new - allocate a new node
> + *
> + * @man: TTM memory type manager
> + * @tbo: TTM BO we need this range for
> + * @place: placement flags and restrictions
> + * @mem: the resulting mem object
> + *
> + * Dummy, just count the space used without allocating resources or any limit.
> + */
> +static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
> +				  struct ttm_buffer_object *tbo,
> +				  const struct ttm_place *place,
> +				  struct ttm_resource *mem)
> +{
> +	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> +
> +	atomic64_add(mem->num_pages, &mgr->used);
> +
> +	mem->mm_node = NULL;
> +	mem->start = AMDGPU_BO_INVALID_OFFSET;
> +	return 0;
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_del - free ranges
> + *
> + * @man: TTM memory type manager
> + * @mem: TTM memory object
> + *
> + * Free the allocated GTT again.
> + */
> +static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
> +				   struct ttm_resource *mem)
> +{
> +	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> +
> +	atomic64_sub(mem->num_pages, &mgr->used);
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain
> + *
> + * @man: TTM memory type manager
> + *
> + * Return how many bytes are used in the GTT domain
> + */
> +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man)
> +{
> +	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> +	s64 result = atomic64_read(&mgr->used);
> +
> +	return (result > 0 ? result : 0) * PAGE_SIZE;
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_debug - dump VRAM table
> + *
> + * @man: TTM memory type manager
> + * @printer: DRM printer to use
> + *
> + * Dump the table content using printk.
> + */
> +static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man,
> +				     struct drm_printer *printer)
> +{
> +	struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
> +
> +	drm_printf(printer, "man size:%llu pages, preempt used:%lld pages\n",
> +		   man->size, (u64)atomic64_read(&mgr->used));
> +}
> +
> +static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = {
> +	.alloc = amdgpu_preempt_mgr_new,
> +	.free = amdgpu_preempt_mgr_del,
> +	.debug = amdgpu_preempt_mgr_debug
> +};
> +
> +/**
> + * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Allocate and initialize the GTT manager.
> + */
> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
> +{
> +	struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
> +	struct ttm_resource_manager *man = &mgr->manager;
> +	int ret;
> +
> +	man->use_tt = true;
> +	man->func = &amdgpu_preempt_mgr_func;
> +
> +	ttm_resource_manager_init(man, (1 << 30));
> +
> +	atomic64_set(&mgr->used, 0);
> +
> +	ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used);
> +	if (ret) {
> +		DRM_ERROR("Failed to create device file mem_info_preempt_used\n");
> +		return ret;
> +	}
> +
> +	ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT,
> +			       &mgr->manager);
> +	ttm_resource_manager_set_used(man, true);
> +	return 0;
> +}
> +
> +/**
> + * amdgpu_preempt_mgr_fini - free and destroy GTT manager
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Destroy and free the GTT manager, returns -EBUSY if ranges are still
> + * allocated inside it.
> + */
> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
> +{
> +	struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
> +	struct ttm_resource_manager *man = &mgr->manager;
> +	int ret;
> +
> +	ttm_resource_manager_set_used(man, false);
> +
> +	ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
> +	if (ret)
> +		return;
> +
> +	device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
> +
> +	ttm_resource_manager_cleanup(man);
> +	ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index d856b204cd12..c6eb6f1c7082 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -158,6 +158,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
>   		}
>   		break;
>   	case TTM_PL_TT:
> +	case AMDGPU_PL_PREEMPT:
>   	default:
>   		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
>   		break;
> @@ -217,6 +218,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
>   
>   	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
>   	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
> +	BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
>   
>   	/* Map only what can't be accessed directly */
>   	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
> @@ -480,7 +482,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	struct ttm_resource *old_mem = &bo->mem;
>   	int r;
>   
> -	if (new_mem->mem_type == TTM_PL_TT) {
> +	if (new_mem->mem_type == TTM_PL_TT ||
> +	    new_mem->mem_type == AMDGPU_PL_PREEMPT) {
>   		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
>   		if (r)
>   			return r;
> @@ -498,11 +501,13 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
>   		goto out;
>   	}
>   	if (old_mem->mem_type == TTM_PL_SYSTEM &&
> -	    new_mem->mem_type == TTM_PL_TT) {
> +	    (new_mem->mem_type == TTM_PL_TT ||
> +	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
>   		ttm_bo_move_null(bo, new_mem);
>   		goto out;
>   	}
> -	if (old_mem->mem_type == TTM_PL_TT &&
> +	if ((old_mem->mem_type == TTM_PL_TT ||
> +	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
>   	    new_mem->mem_type == TTM_PL_SYSTEM) {
>   		r = ttm_bo_wait_ctx(bo, ctx);
>   		if (r)
> @@ -587,6 +592,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
>   		/* system memory */
>   		return 0;
>   	case TTM_PL_TT:
> +	case AMDGPU_PL_PREEMPT:
>   		break;
>   	case TTM_PL_VRAM:
>   		mem->bus.offset = mem->start << PAGE_SHIFT;
> @@ -1293,7 +1299,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
>   	if (mem && mem->mem_type != TTM_PL_SYSTEM)
>   		flags |= AMDGPU_PTE_VALID;
>   
> -	if (mem && mem->mem_type == TTM_PL_TT) {
> +	if (mem && (mem->mem_type == TTM_PL_TT ||
> +		    mem->mem_type == AMDGPU_PL_PREEMPT)) {
>   		flags |= AMDGPU_PTE_SYSTEM;
>   
>   		if (ttm->caching == ttm_cached)
> @@ -1367,6 +1374,15 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>   	}
>   
>   	switch (bo->mem.mem_type) {
> +	case AMDGPU_PL_PREEMPT:
> +		/* Preemptible BOs don't own system resources managed by the
> +		 * driver (pages, VRAM, GART space). They point to resources
> +		 * owned by someone else (e.g. pageable memory in user mode
> +		 * or a DMABuf). They are used in a preemptible context so we
> +		 * can guarantee no deadlocks and good QoS in case of MMU
> +		 * notifiers or DMABuf move notifiers from the resource owner.
> +		 */
> +		return false;
>   	case TTM_PL_TT:
>   		if (amdgpu_bo_is_amdgpu_bo(bo) &&
>   		    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
> @@ -1748,6 +1764,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
>   		 (unsigned)(gtt_size / (1024 * 1024)));
>   
> +	/* Initialize preemptible memory pool */
> +	r = amdgpu_preempt_mgr_init(adev);
> +	if (r) {
> +		DRM_ERROR("Failed initializing PREEMPT heap.\n");
> +		return r;
> +	}
> +
>   	/* Initialize various on-chip memory pools */
>   	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
>   	if (r) {
> @@ -1792,6 +1815,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>   
>   	amdgpu_vram_mgr_fini(adev);
>   	amdgpu_gtt_mgr_fini(adev);
> +	amdgpu_preempt_mgr_fini(adev);
>   	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
>   	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
>   	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
> @@ -1986,6 +2010,11 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   		return -EINVAL;
>   	}
>   
> +	if (bo->tbo.mem.mem_type == AMDGPU_PL_PREEMPT) {
> +		DRM_ERROR("Trying to clear preemptible memory.\n");
> +		return -EINVAL;
> +	}
> +
>   	if (bo->tbo.mem.mem_type == TTM_PL_TT) {
>   		r = amdgpu_ttm_alloc_gart(&bo->tbo);
>   		if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index eb84a69c4b74..2877a924086f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -31,6 +31,7 @@
>   #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
>   #define AMDGPU_PL_GWS		(TTM_PL_PRIV + 1)
>   #define AMDGPU_PL_OA		(TTM_PL_PRIV + 2)
> +#define AMDGPU_PL_PREEMPT	(TTM_PL_PRIV + 3)
>   
>   #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
> @@ -54,6 +55,11 @@ struct amdgpu_gtt_mgr {
>   	atomic64_t available;
>   };
>   
> +struct amdgpu_preempt_mgr {
> +	struct ttm_resource_manager manager;
> +	atomic64_t used;
> +};
> +
>   struct amdgpu_mman {
>   	struct ttm_device		bdev;
>   	bool				initialized;
> @@ -70,6 +76,7 @@ struct amdgpu_mman {
>   
>   	struct amdgpu_vram_mgr vram_mgr;
>   	struct amdgpu_gtt_mgr gtt_mgr;
> +	struct amdgpu_preempt_mgr preempt_mgr;
>   
>   	uint64_t		stolen_vga_size;
>   	struct amdgpu_bo	*stolen_vga_memory;
> @@ -97,6 +104,8 @@ struct amdgpu_copy_mem {
>   
>   int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
>   void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
>   int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
>   void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>   
> @@ -104,6 +113,8 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem);
>   uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
>   int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
>   
> +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
> +
>   u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
>   int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
>   			      struct ttm_resource *mem,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 55991f393481..da155c276c51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1817,7 +1817,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   				bo = gem_to_amdgpu_bo(gobj);
>   		}
>   		mem = &bo->tbo.mem;
> -		if (mem->mem_type == TTM_PL_TT)
> +		if (mem->mem_type == TTM_PL_TT ||
> +		    mem->mem_type == AMDGPU_PL_PREEMPT)
>   			pages_addr = bo->tbo.ttm->dma_address;
>   	}
>   
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 9169df7fadee..4e4422bc54ea 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -97,6 +97,9 @@ extern "C" {
>    *
>    * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
>    * for appending data.
> + *
> + * %AMDGPU_GEM_DOMAIN_PREEMPT	Userptrs and DMAbuf imports in preemptible
> + * contexts, which allow memory to be evicted without waiting for fences.
>    */
>   #define AMDGPU_GEM_DOMAIN_CPU		0x1
>   #define AMDGPU_GEM_DOMAIN_GTT		0x2
> @@ -104,12 +107,14 @@ extern "C" {
>   #define AMDGPU_GEM_DOMAIN_GDS		0x8
>   #define AMDGPU_GEM_DOMAIN_GWS		0x10
>   #define AMDGPU_GEM_DOMAIN_OA		0x20
> +#define AMDGPU_GEM_DOMAIN_PREEMPT	0x40
>   #define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
>   					 AMDGPU_GEM_DOMAIN_GTT | \
>   					 AMDGPU_GEM_DOMAIN_VRAM | \
>   					 AMDGPU_GEM_DOMAIN_GDS | \
>   					 AMDGPU_GEM_DOMAIN_GWS | \
> -					 AMDGPU_GEM_DOMAIN_OA)
> +					 AMDGPU_GEM_DOMAIN_OA | \
> +					 AMDGPU_GEM_DOMAIN_PREEMPT)
>   
>   /* Flag that CPU access will be required for the case of VRAM domain */
>   #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs
  2021-05-19 10:04 ` [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Christian König
@ 2021-05-19 15:22   ` Felix Kuehling
  2021-05-20 10:20     ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2021-05-19 15:22 UTC (permalink / raw)
  To: Christian König, amd-gfx

Am 2021-05-19 um 6:04 a.m. schrieb Christian König:
> Am 19.05.21 um 07:45 schrieb Felix Kuehling:
>> SG BOs such as dmabuf imports and userptr BOs do not consume system
>> resources directly. Instead they point to resources owned elsewhere.
>> They typically get evicted by DMABuf move notifiers of MMU notifiers.
>> If those notifiers don't need to wait for hardware fences (i.e. the SG
>> BOs are used in a preemptible context), then we don't need to limit
>> them to the GTT size and we don't need TTM to evict them.
>>
>> Create a new domain for such preemptible SG BOs that does not impose
>> artificial size limits and TTM evictions.
>
> Please don't create an GEM domain for this. This has just to much
> potential to be abused by userspace.
>
> The kernel is the only place where we can decide if the BO is
> preemptible or not.

I did put a check in amdgpu_gem_create_ioctl to prevent user mode from
directly creating preemptible BOs.

Instead of a domain I can use a flag in the BO. But if I put it in a
flag (say AMDGPU_GEM_CREATE_PREEMPTIBLE), that's also accessible to user
mode and I need to filter it out in the ioctl API. I don't see how
that's any different.

Any other ideas?

Thanks,
  Felix


>
> Christian.
>
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile           |   7 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   4 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   8 +
>>   .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c   | 190 ++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  37 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  11 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   3 +-
>>   include/uapi/drm/amdgpu_drm.h                 |   7 +-
>>   8 files changed, 258 insertions(+), 9 deletions(-)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 6331a11299d0..6cf0fe871d6c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>       atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>>       amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>>       amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>> -    amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
>> amdgpu_atomfirmware.o \
>> -    amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
>> -    amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o
>> amdgpu_ras.o amdgpu_vm_cpu.o \
>> +    amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o
>> amdgpu_virt.o \
>> +    amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
>> +    amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
>> +    amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
>>       amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o
>> amdgpu_nbio.o \
>>       amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>       amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 311bcdc59eda..280cc0c0a9b3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -246,6 +246,10 @@ int amdgpu_gem_create_ioctl(struct drm_device
>> *dev, void *data,
>>       if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
>>           return -EINVAL;
>>   +    /* preemptible domain not supported by current CS API */
>> +    if (args->in.domains & AMDGPU_GEM_DOMAIN_PREEMPT)
>> +        return -EINVAL;
>> +
>>       if (!amdgpu_is_tmz(adev) && (flags &
>> AMDGPU_GEM_CREATE_ENCRYPTED)) {
>>           DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is
>> disabled\n");
>>           return -EINVAL;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 745fcf3ea450..5b538e746afa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -194,6 +194,14 @@ void amdgpu_bo_placement_from_domain(struct
>> amdgpu_bo *abo, u32 domain)
>>           c++;
>>       }
>>   +    if (domain & AMDGPU_GEM_DOMAIN_PREEMPT) {
>> +        places[c].fpfn = 0;
>> +        places[c].lpfn = 0;
>> +        places[c].mem_type = AMDGPU_PL_PREEMPT;
>> +        places[c].flags = 0;
>> +        c++;
>> +    }
>> +
>>       if (!c) {
>>           places[c].fpfn = 0;
>>           places[c].lpfn = 0;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>> new file mode 100644
>> index 000000000000..b4185dc3c394
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>> @@ -0,0 +1,190 @@
>> +/*
>> + * Copyright 2016-2021 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * Authors: Christian König, Felix Kuehling
>> + */
>> +
>> +#include "amdgpu.h"
>> +
>> +static inline struct amdgpu_preempt_mgr *
>> +to_preempt_mgr(struct ttm_resource_manager *man)
>> +{
>> +    return container_of(man, struct amdgpu_preempt_mgr, manager);
>> +}
>> +
>> +/**
>> + * DOC: mem_info_preempt_used
>> + *
>> + * The amdgpu driver provides a sysfs API for reporting current
>> total amount of
>> + * used preemptible memory.
>> + * The file mem_info_preempt_used is used for this, and returns the
>> current
>> + * used size of the preemptible block, in bytes
>> + */
>> +static ssize_t amdgpu_mem_info_preempt_used_show(struct device *dev,
>> +                         struct device_attribute *attr,
>> +                         char *buf)
>> +{
>> +    struct drm_device *ddev = dev_get_drvdata(dev);
>> +    struct amdgpu_device *adev = drm_to_adev(ddev);
>> +    struct ttm_resource_manager *man;
>> +
>> +    man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT);
>> +    return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man));
>> +}
>> +
>> +static DEVICE_ATTR(mem_info_preempt_used, S_IRUGO,
>> +               amdgpu_mem_info_preempt_used_show, NULL);
>> +
>> +/**
>> + * amdgpu_preempt_mgr_new - allocate a new node
>> + *
>> + * @man: TTM memory type manager
>> + * @tbo: TTM BO we need this range for
>> + * @place: placement flags and restrictions
>> + * @mem: the resulting mem object
>> + *
>> + * Dummy, just count the space used without allocating resources or
>> any limit.
>> + */
>> +static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
>> +                  struct ttm_buffer_object *tbo,
>> +                  const struct ttm_place *place,
>> +                  struct ttm_resource *mem)
>> +{
>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>> +
>> +    atomic64_add(mem->num_pages, &mgr->used);
>> +
>> +    mem->mm_node = NULL;
>> +    mem->start = AMDGPU_BO_INVALID_OFFSET;
>> +    return 0;
>> +}
>> +
>> +/**
>> + * amdgpu_preempt_mgr_del - free ranges
>> + *
>> + * @man: TTM memory type manager
>> + * @mem: TTM memory object
>> + *
>> + * Free the allocated GTT again.
>> + */
>> +static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
>> +                   struct ttm_resource *mem)
>> +{
>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>> +
>> +    atomic64_sub(mem->num_pages, &mgr->used);
>> +}
>> +
>> +/**
>> + * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain
>> + *
>> + * @man: TTM memory type manager
>> + *
>> + * Return how many bytes are used in the GTT domain
>> + */
>> +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man)
>> +{
>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>> +    s64 result = atomic64_read(&mgr->used);
>> +
>> +    return (result > 0 ? result : 0) * PAGE_SIZE;
>> +}
>> +
>> +/**
>> + * amdgpu_preempt_mgr_debug - dump VRAM table
>> + *
>> + * @man: TTM memory type manager
>> + * @printer: DRM printer to use
>> + *
>> + * Dump the table content using printk.
>> + */
>> +static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man,
>> +                     struct drm_printer *printer)
>> +{
>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>> +
>> +    drm_printf(printer, "man size:%llu pages, preempt used:%lld
>> pages\n",
>> +           man->size, (u64)atomic64_read(&mgr->used));
>> +}
>> +
>> +static const struct ttm_resource_manager_func
>> amdgpu_preempt_mgr_func = {
>> +    .alloc = amdgpu_preempt_mgr_new,
>> +    .free = amdgpu_preempt_mgr_del,
>> +    .debug = amdgpu_preempt_mgr_debug
>> +};
>> +
>> +/**
>> + * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
>> + *
>> + * @adev: amdgpu_device pointer
>> + *
>> + * Allocate and initialize the GTT manager.
>> + */
>> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
>> +{
>> +    struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
>> +    struct ttm_resource_manager *man = &mgr->manager;
>> +    int ret;
>> +
>> +    man->use_tt = true;
>> +    man->func = &amdgpu_preempt_mgr_func;
>> +
>> +    ttm_resource_manager_init(man, (1 << 30));
>> +
>> +    atomic64_set(&mgr->used, 0);
>> +
>> +    ret = device_create_file(adev->dev,
>> &dev_attr_mem_info_preempt_used);
>> +    if (ret) {
>> +        DRM_ERROR("Failed to create device file
>> mem_info_preempt_used\n");
>> +        return ret;
>> +    }
>> +
>> +    ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT,
>> +                   &mgr->manager);
>> +    ttm_resource_manager_set_used(man, true);
>> +    return 0;
>> +}
>> +
>> +/**
>> + * amdgpu_preempt_mgr_fini - free and destroy GTT manager
>> + *
>> + * @adev: amdgpu_device pointer
>> + *
>> + * Destroy and free the GTT manager, returns -EBUSY if ranges are still
>> + * allocated inside it.
>> + */
>> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
>> +{
>> +    struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
>> +    struct ttm_resource_manager *man = &mgr->manager;
>> +    int ret;
>> +
>> +    ttm_resource_manager_set_used(man, false);
>> +
>> +    ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
>> +    if (ret)
>> +        return;
>> +
>> +    device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
>> +
>> +    ttm_resource_manager_cleanup(man);
>> +    ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index d856b204cd12..c6eb6f1c7082 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -158,6 +158,7 @@ static void amdgpu_evict_flags(struct
>> ttm_buffer_object *bo,
>>           }
>>           break;
>>       case TTM_PL_TT:
>> +    case AMDGPU_PL_PREEMPT:
>>       default:
>>           amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
>>           break;
>> @@ -217,6 +218,7 @@ static int amdgpu_ttm_map_buffer(struct
>> ttm_buffer_object *bo,
>>         BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
>>              AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
>> +    BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
>>         /* Map only what can't be accessed directly */
>>       if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
>> @@ -480,7 +482,8 @@ static int amdgpu_bo_move(struct
>> ttm_buffer_object *bo, bool evict,
>>       struct ttm_resource *old_mem = &bo->mem;
>>       int r;
>>   -    if (new_mem->mem_type == TTM_PL_TT) {
>> +    if (new_mem->mem_type == TTM_PL_TT ||
>> +        new_mem->mem_type == AMDGPU_PL_PREEMPT) {
>>           r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
>>           if (r)
>>               return r;
>> @@ -498,11 +501,13 @@ static int amdgpu_bo_move(struct
>> ttm_buffer_object *bo, bool evict,
>>           goto out;
>>       }
>>       if (old_mem->mem_type == TTM_PL_SYSTEM &&
>> -        new_mem->mem_type == TTM_PL_TT) {
>> +        (new_mem->mem_type == TTM_PL_TT ||
>> +         new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
>>           ttm_bo_move_null(bo, new_mem);
>>           goto out;
>>       }
>> -    if (old_mem->mem_type == TTM_PL_TT &&
>> +    if ((old_mem->mem_type == TTM_PL_TT ||
>> +         old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
>>           new_mem->mem_type == TTM_PL_SYSTEM) {
>>           r = ttm_bo_wait_ctx(bo, ctx);
>>           if (r)
>> @@ -587,6 +592,7 @@ static int amdgpu_ttm_io_mem_reserve(struct
>> ttm_device *bdev,
>>           /* system memory */
>>           return 0;
>>       case TTM_PL_TT:
>> +    case AMDGPU_PL_PREEMPT:
>>           break;
>>       case TTM_PL_VRAM:
>>           mem->bus.offset = mem->start << PAGE_SHIFT;
>> @@ -1293,7 +1299,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt
>> *ttm, struct ttm_resource *mem)
>>       if (mem && mem->mem_type != TTM_PL_SYSTEM)
>>           flags |= AMDGPU_PTE_VALID;
>>   -    if (mem && mem->mem_type == TTM_PL_TT) {
>> +    if (mem && (mem->mem_type == TTM_PL_TT ||
>> +            mem->mem_type == AMDGPU_PL_PREEMPT)) {
>>           flags |= AMDGPU_PTE_SYSTEM;
>>             if (ttm->caching == ttm_cached)
>> @@ -1367,6 +1374,15 @@ static bool
>> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>>       }
>>         switch (bo->mem.mem_type) {
>> +    case AMDGPU_PL_PREEMPT:
>> +        /* Preemptible BOs don't own system resources managed by the
>> +         * driver (pages, VRAM, GART space). They point to resources
>> +         * owned by someone else (e.g. pageable memory in user mode
>> +         * or a DMABuf). They are used in a preemptible context so we
>> +         * can guarantee no deadlocks and good QoS in case of MMU
>> +         * notifiers or DMABuf move notifiers from the resource owner.
>> +         */
>> +        return false;
>>       case TTM_PL_TT:
>>           if (amdgpu_bo_is_amdgpu_bo(bo) &&
>>               amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
>> @@ -1748,6 +1764,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>       DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
>>            (unsigned)(gtt_size / (1024 * 1024)));
>>   +    /* Initialize preemptible memory pool */
>> +    r = amdgpu_preempt_mgr_init(adev);
>> +    if (r) {
>> +        DRM_ERROR("Failed initializing PREEMPT heap.\n");
>> +        return r;
>> +    }
>> +
>>       /* Initialize various on-chip memory pools */
>>       r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS,
>> adev->gds.gds_size);
>>       if (r) {
>> @@ -1792,6 +1815,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>>         amdgpu_vram_mgr_fini(adev);
>>       amdgpu_gtt_mgr_fini(adev);
>> +    amdgpu_preempt_mgr_fini(adev);
>>       ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
>>       ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
>>       ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
>> @@ -1986,6 +2010,11 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>           return -EINVAL;
>>       }
>>   +    if (bo->tbo.mem.mem_type == AMDGPU_PL_PREEMPT) {
>> +        DRM_ERROR("Trying to clear preemptible memory.\n");
>> +        return -EINVAL;
>> +    }
>> +
>>       if (bo->tbo.mem.mem_type == TTM_PL_TT) {
>>           r = amdgpu_ttm_alloc_gart(&bo->tbo);
>>           if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index eb84a69c4b74..2877a924086f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -31,6 +31,7 @@
>>   #define AMDGPU_PL_GDS        (TTM_PL_PRIV + 0)
>>   #define AMDGPU_PL_GWS        (TTM_PL_PRIV + 1)
>>   #define AMDGPU_PL_OA        (TTM_PL_PRIV + 2)
>> +#define AMDGPU_PL_PREEMPT    (TTM_PL_PRIV + 3)
>>     #define AMDGPU_GTT_MAX_TRANSFER_SIZE    512
>>   #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS    2
>> @@ -54,6 +55,11 @@ struct amdgpu_gtt_mgr {
>>       atomic64_t available;
>>   };
>>   +struct amdgpu_preempt_mgr {
>> +    struct ttm_resource_manager manager;
>> +    atomic64_t used;
>> +};
>> +
>>   struct amdgpu_mman {
>>       struct ttm_device        bdev;
>>       bool                initialized;
>> @@ -70,6 +76,7 @@ struct amdgpu_mman {
>>         struct amdgpu_vram_mgr vram_mgr;
>>       struct amdgpu_gtt_mgr gtt_mgr;
>> +    struct amdgpu_preempt_mgr preempt_mgr;
>>         uint64_t        stolen_vga_size;
>>       struct amdgpu_bo    *stolen_vga_memory;
>> @@ -97,6 +104,8 @@ struct amdgpu_copy_mem {
>>     int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t
>> gtt_size);
>>   void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
>> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
>> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
>>   int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
>>   void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>>   @@ -104,6 +113,8 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct
>> ttm_resource *mem);
>>   uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
>>   int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
>>   +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
>> +
>>   u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
>>   int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
>>                     struct ttm_resource *mem,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 55991f393481..da155c276c51 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1817,7 +1817,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>> *adev, struct amdgpu_bo_va *bo_va,
>>                   bo = gem_to_amdgpu_bo(gobj);
>>           }
>>           mem = &bo->tbo.mem;
>> -        if (mem->mem_type == TTM_PL_TT)
>> +        if (mem->mem_type == TTM_PL_TT ||
>> +            mem->mem_type == AMDGPU_PL_PREEMPT)
>>               pages_addr = bo->tbo.ttm->dma_address;
>>       }
>>   diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h
>> index 9169df7fadee..4e4422bc54ea 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -97,6 +97,9 @@ extern "C" {
>>    *
>>    * %AMDGPU_GEM_DOMAIN_OA    Ordered append, used by 3D or Compute
>> engines
>>    * for appending data.
>> + *
>> + * %AMDGPU_GEM_DOMAIN_PREEMPT    Userptrs and DMAbuf imports in
>> preemptible
>> + * contexts, which allow memory to be evicted without waiting for
>> fences.
>>    */
>>   #define AMDGPU_GEM_DOMAIN_CPU        0x1
>>   #define AMDGPU_GEM_DOMAIN_GTT        0x2
>> @@ -104,12 +107,14 @@ extern "C" {
>>   #define AMDGPU_GEM_DOMAIN_GDS        0x8
>>   #define AMDGPU_GEM_DOMAIN_GWS        0x10
>>   #define AMDGPU_GEM_DOMAIN_OA        0x20
>> +#define AMDGPU_GEM_DOMAIN_PREEMPT    0x40
>>   #define AMDGPU_GEM_DOMAIN_MASK        (AMDGPU_GEM_DOMAIN_CPU | \
>>                        AMDGPU_GEM_DOMAIN_GTT | \
>>                        AMDGPU_GEM_DOMAIN_VRAM | \
>>                        AMDGPU_GEM_DOMAIN_GDS | \
>>                        AMDGPU_GEM_DOMAIN_GWS | \
>> -                     AMDGPU_GEM_DOMAIN_OA)
>> +                     AMDGPU_GEM_DOMAIN_OA | \
>> +                     AMDGPU_GEM_DOMAIN_PREEMPT)
>>     /* Flag that CPU access will be required for the case of VRAM
>> domain */
>>   #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED    (1 << 0)
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs
  2021-05-19 15:22   ` Felix Kuehling
@ 2021-05-20 10:20     ` Christian König
  0 siblings, 0 replies; 7+ messages in thread
From: Christian König @ 2021-05-20 10:20 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx



Am 19.05.21 um 17:22 schrieb Felix Kuehling:
> Am 2021-05-19 um 6:04 a.m. schrieb Christian König:
>> Am 19.05.21 um 07:45 schrieb Felix Kuehling:
>>> SG BOs such as dmabuf imports and userptr BOs do not consume system
>>> resources directly. Instead they point to resources owned elsewhere.
>>> They typically get evicted by DMABuf move notifiers of MMU notifiers.
>>> If those notifiers don't need to wait for hardware fences (i.e. the SG
>>> BOs are used in a preemptible context), then we don't need to limit
>>> them to the GTT size and we don't need TTM to evict them.
>>>
>>> Create a new domain for such preemptible SG BOs that does not impose
>>> artificial size limits and TTM evictions.
>> Please don't create an GEM domain for this. This has just to much
>> potential to be abused by userspace.
>>
>> The kernel is the only place where we can decide if the BO is
>> preemptible or not.
> I did put a check in amdgpu_gem_create_ioctl to prevent user mode from
> directly creating preemptible BOs.
>
> Instead of a domain I can use a flag in the BO. But if I put it in a
> flag (say AMDGPU_GEM_CREATE_PREEMPTIBLE), that's also accessible to user
> mode and I need to filter it out in the ioctl API. I don't see how
> that's any different.
>
> Any other ideas?

Mhm, good question.

I think the amdgpu_bo_placement_from_domain() function needs to be able 
to figure that out on it's own.

So a BO flag sounds like the right approach to me. We already have 
plenty of those which can only be set/cleared in the kernel itself.

Christian.

>
> Thanks,
>    Felix
>
>
>> Christian.
>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/Makefile           |   7 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |   4 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   8 +
>>>    .../gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c   | 190 ++++++++++++++++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  37 +++-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |  11 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   3 +-
>>>    include/uapi/drm/amdgpu_drm.h                 |   7 +-
>>>    8 files changed, 258 insertions(+), 9 deletions(-)
>>>    create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 6331a11299d0..6cf0fe871d6c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -51,9 +51,10 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>>        atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>>>        amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>>>        amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>>> -    amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
>>> amdgpu_atomfirmware.o \
>>> -    amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
>>> -    amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o
>>> amdgpu_ras.o amdgpu_vm_cpu.o \
>>> +    amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o
>>> amdgpu_virt.o \
>>> +    amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
>>> +    amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \
>>> +    amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
>>>        amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o
>>> amdgpu_nbio.o \
>>>        amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>>>        amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index 311bcdc59eda..280cc0c0a9b3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -246,6 +246,10 @@ int amdgpu_gem_create_ioctl(struct drm_device
>>> *dev, void *data,
>>>        if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
>>>            return -EINVAL;
>>>    +    /* preemptible domain not supported by current CS API */
>>> +    if (args->in.domains & AMDGPU_GEM_DOMAIN_PREEMPT)
>>> +        return -EINVAL;
>>> +
>>>        if (!amdgpu_is_tmz(adev) && (flags &
>>> AMDGPU_GEM_CREATE_ENCRYPTED)) {
>>>            DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is
>>> disabled\n");
>>>            return -EINVAL;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> index 745fcf3ea450..5b538e746afa 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>> @@ -194,6 +194,14 @@ void amdgpu_bo_placement_from_domain(struct
>>> amdgpu_bo *abo, u32 domain)
>>>            c++;
>>>        }
>>>    +    if (domain & AMDGPU_GEM_DOMAIN_PREEMPT) {
>>> +        places[c].fpfn = 0;
>>> +        places[c].lpfn = 0;
>>> +        places[c].mem_type = AMDGPU_PL_PREEMPT;
>>> +        places[c].flags = 0;
>>> +        c++;
>>> +    }
>>> +
>>>        if (!c) {
>>>            places[c].fpfn = 0;
>>>            places[c].lpfn = 0;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>>> new file mode 100644
>>> index 000000000000..b4185dc3c394
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c
>>> @@ -0,0 +1,190 @@
>>> +/*
>>> + * Copyright 2016-2021 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person
>>> obtaining a
>>> + * copy of this software and associated documentation files (the
>>> "Software"),
>>> + * to deal in the Software without restriction, including without
>>> limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>> sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom
>>> the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be
>>> included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>> EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
>>> EVENT SHALL
>>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>>> DAMAGES OR
>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>> OTHERWISE,
>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>>> USE OR
>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>> + *
>>> + * Authors: Christian König, Felix Kuehling
>>> + */
>>> +
>>> +#include "amdgpu.h"
>>> +
>>> +static inline struct amdgpu_preempt_mgr *
>>> +to_preempt_mgr(struct ttm_resource_manager *man)
>>> +{
>>> +    return container_of(man, struct amdgpu_preempt_mgr, manager);
>>> +}
>>> +
>>> +/**
>>> + * DOC: mem_info_preempt_used
>>> + *
>>> + * The amdgpu driver provides a sysfs API for reporting current
>>> total amount of
>>> + * used preemptible memory.
>>> + * The file mem_info_preempt_used is used for this, and returns the
>>> current
>>> + * used size of the preemptible block, in bytes
>>> + */
>>> +static ssize_t amdgpu_mem_info_preempt_used_show(struct device *dev,
>>> +                         struct device_attribute *attr,
>>> +                         char *buf)
>>> +{
>>> +    struct drm_device *ddev = dev_get_drvdata(dev);
>>> +    struct amdgpu_device *adev = drm_to_adev(ddev);
>>> +    struct ttm_resource_manager *man;
>>> +
>>> +    man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT);
>>> +    return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man));
>>> +}
>>> +
>>> +static DEVICE_ATTR(mem_info_preempt_used, S_IRUGO,
>>> +               amdgpu_mem_info_preempt_used_show, NULL);
>>> +
>>> +/**
>>> + * amdgpu_preempt_mgr_new - allocate a new node
>>> + *
>>> + * @man: TTM memory type manager
>>> + * @tbo: TTM BO we need this range for
>>> + * @place: placement flags and restrictions
>>> + * @mem: the resulting mem object
>>> + *
>>> + * Dummy, just count the space used without allocating resources or
>>> any limit.
>>> + */
>>> +static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man,
>>> +                  struct ttm_buffer_object *tbo,
>>> +                  const struct ttm_place *place,
>>> +                  struct ttm_resource *mem)
>>> +{
>>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>>> +
>>> +    atomic64_add(mem->num_pages, &mgr->used);
>>> +
>>> +    mem->mm_node = NULL;
>>> +    mem->start = AMDGPU_BO_INVALID_OFFSET;
>>> +    return 0;
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_preempt_mgr_del - free ranges
>>> + *
>>> + * @man: TTM memory type manager
>>> + * @mem: TTM memory object
>>> + *
>>> + * Free the allocated GTT again.
>>> + */
>>> +static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man,
>>> +                   struct ttm_resource *mem)
>>> +{
>>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>>> +
>>> +    atomic64_sub(mem->num_pages, &mgr->used);
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain
>>> + *
>>> + * @man: TTM memory type manager
>>> + *
>>> + * Return how many bytes are used in the GTT domain
>>> + */
>>> +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man)
>>> +{
>>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>>> +    s64 result = atomic64_read(&mgr->used);
>>> +
>>> +    return (result > 0 ? result : 0) * PAGE_SIZE;
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_preempt_mgr_debug - dump VRAM table
>>> + *
>>> + * @man: TTM memory type manager
>>> + * @printer: DRM printer to use
>>> + *
>>> + * Dump the table content using printk.
>>> + */
>>> +static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man,
>>> +                     struct drm_printer *printer)
>>> +{
>>> +    struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man);
>>> +
>>> +    drm_printf(printer, "man size:%llu pages, preempt used:%lld
>>> pages\n",
>>> +           man->size, (u64)atomic64_read(&mgr->used));
>>> +}
>>> +
>>> +static const struct ttm_resource_manager_func
>>> amdgpu_preempt_mgr_func = {
>>> +    .alloc = amdgpu_preempt_mgr_new,
>>> +    .free = amdgpu_preempt_mgr_del,
>>> +    .debug = amdgpu_preempt_mgr_debug
>>> +};
>>> +
>>> +/**
>>> + * amdgpu_preempt_mgr_init - init PREEMPT manager and DRM MM
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + *
>>> + * Allocate and initialize the GTT manager.
>>> + */
>>> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev)
>>> +{
>>> +    struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
>>> +    struct ttm_resource_manager *man = &mgr->manager;
>>> +    int ret;
>>> +
>>> +    man->use_tt = true;
>>> +    man->func = &amdgpu_preempt_mgr_func;
>>> +
>>> +    ttm_resource_manager_init(man, (1 << 30));
>>> +
>>> +    atomic64_set(&mgr->used, 0);
>>> +
>>> +    ret = device_create_file(adev->dev,
>>> &dev_attr_mem_info_preempt_used);
>>> +    if (ret) {
>>> +        DRM_ERROR("Failed to create device file
>>> mem_info_preempt_used\n");
>>> +        return ret;
>>> +    }
>>> +
>>> +    ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT,
>>> +                   &mgr->manager);
>>> +    ttm_resource_manager_set_used(man, true);
>>> +    return 0;
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_preempt_mgr_fini - free and destroy GTT manager
>>> + *
>>> + * @adev: amdgpu_device pointer
>>> + *
>>> + * Destroy and free the GTT manager, returns -EBUSY if ranges are still
>>> + * allocated inside it.
>>> + */
>>> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev)
>>> +{
>>> +    struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr;
>>> +    struct ttm_resource_manager *man = &mgr->manager;
>>> +    int ret;
>>> +
>>> +    ttm_resource_manager_set_used(man, false);
>>> +
>>> +    ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
>>> +    if (ret)
>>> +        return;
>>> +
>>> +    device_remove_file(adev->dev, &dev_attr_mem_info_preempt_used);
>>> +
>>> +    ttm_resource_manager_cleanup(man);
>>> +    ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, NULL);
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index d856b204cd12..c6eb6f1c7082 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -158,6 +158,7 @@ static void amdgpu_evict_flags(struct
>>> ttm_buffer_object *bo,
>>>            }
>>>            break;
>>>        case TTM_PL_TT:
>>> +    case AMDGPU_PL_PREEMPT:
>>>        default:
>>>            amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
>>>            break;
>>> @@ -217,6 +218,7 @@ static int amdgpu_ttm_map_buffer(struct
>>> ttm_buffer_object *bo,
>>>          BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
>>>               AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
>>> +    BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
>>>          /* Map only what can't be accessed directly */
>>>        if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
>>> @@ -480,7 +482,8 @@ static int amdgpu_bo_move(struct
>>> ttm_buffer_object *bo, bool evict,
>>>        struct ttm_resource *old_mem = &bo->mem;
>>>        int r;
>>>    -    if (new_mem->mem_type == TTM_PL_TT) {
>>> +    if (new_mem->mem_type == TTM_PL_TT ||
>>> +        new_mem->mem_type == AMDGPU_PL_PREEMPT) {
>>>            r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
>>>            if (r)
>>>                return r;
>>> @@ -498,11 +501,13 @@ static int amdgpu_bo_move(struct
>>> ttm_buffer_object *bo, bool evict,
>>>            goto out;
>>>        }
>>>        if (old_mem->mem_type == TTM_PL_SYSTEM &&
>>> -        new_mem->mem_type == TTM_PL_TT) {
>>> +        (new_mem->mem_type == TTM_PL_TT ||
>>> +         new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
>>>            ttm_bo_move_null(bo, new_mem);
>>>            goto out;
>>>        }
>>> -    if (old_mem->mem_type == TTM_PL_TT &&
>>> +    if ((old_mem->mem_type == TTM_PL_TT ||
>>> +         old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
>>>            new_mem->mem_type == TTM_PL_SYSTEM) {
>>>            r = ttm_bo_wait_ctx(bo, ctx);
>>>            if (r)
>>> @@ -587,6 +592,7 @@ static int amdgpu_ttm_io_mem_reserve(struct
>>> ttm_device *bdev,
>>>            /* system memory */
>>>            return 0;
>>>        case TTM_PL_TT:
>>> +    case AMDGPU_PL_PREEMPT:
>>>            break;
>>>        case TTM_PL_VRAM:
>>>            mem->bus.offset = mem->start << PAGE_SHIFT;
>>> @@ -1293,7 +1299,8 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt
>>> *ttm, struct ttm_resource *mem)
>>>        if (mem && mem->mem_type != TTM_PL_SYSTEM)
>>>            flags |= AMDGPU_PTE_VALID;
>>>    -    if (mem && mem->mem_type == TTM_PL_TT) {
>>> +    if (mem && (mem->mem_type == TTM_PL_TT ||
>>> +            mem->mem_type == AMDGPU_PL_PREEMPT)) {
>>>            flags |= AMDGPU_PTE_SYSTEM;
>>>              if (ttm->caching == ttm_cached)
>>> @@ -1367,6 +1374,15 @@ static bool
>>> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>>>        }
>>>          switch (bo->mem.mem_type) {
>>> +    case AMDGPU_PL_PREEMPT:
>>> +        /* Preemptible BOs don't own system resources managed by the
>>> +         * driver (pages, VRAM, GART space). They point to resources
>>> +         * owned by someone else (e.g. pageable memory in user mode
>>> +         * or a DMABuf). They are used in a preemptible context so we
>>> +         * can guarantee no deadlocks and good QoS in case of MMU
>>> +         * notifiers or DMABuf move notifiers from the resource owner.
>>> +         */
>>> +        return false;
>>>        case TTM_PL_TT:
>>>            if (amdgpu_bo_is_amdgpu_bo(bo) &&
>>>                amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
>>> @@ -1748,6 +1764,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>>        DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
>>>             (unsigned)(gtt_size / (1024 * 1024)));
>>>    +    /* Initialize preemptible memory pool */
>>> +    r = amdgpu_preempt_mgr_init(adev);
>>> +    if (r) {
>>> +        DRM_ERROR("Failed initializing PREEMPT heap.\n");
>>> +        return r;
>>> +    }
>>> +
>>>        /* Initialize various on-chip memory pools */
>>>        r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS,
>>> adev->gds.gds_size);
>>>        if (r) {
>>> @@ -1792,6 +1815,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
>>>          amdgpu_vram_mgr_fini(adev);
>>>        amdgpu_gtt_mgr_fini(adev);
>>> +    amdgpu_preempt_mgr_fini(adev);
>>>        ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
>>>        ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
>>>        ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
>>> @@ -1986,6 +2010,11 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>>>            return -EINVAL;
>>>        }
>>>    +    if (bo->tbo.mem.mem_type == AMDGPU_PL_PREEMPT) {
>>> +        DRM_ERROR("Trying to clear preemptible memory.\n");
>>> +        return -EINVAL;
>>> +    }
>>> +
>>>        if (bo->tbo.mem.mem_type == TTM_PL_TT) {
>>>            r = amdgpu_ttm_alloc_gart(&bo->tbo);
>>>            if (r)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> index eb84a69c4b74..2877a924086f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>>> @@ -31,6 +31,7 @@
>>>    #define AMDGPU_PL_GDS        (TTM_PL_PRIV + 0)
>>>    #define AMDGPU_PL_GWS        (TTM_PL_PRIV + 1)
>>>    #define AMDGPU_PL_OA        (TTM_PL_PRIV + 2)
>>> +#define AMDGPU_PL_PREEMPT    (TTM_PL_PRIV + 3)
>>>      #define AMDGPU_GTT_MAX_TRANSFER_SIZE    512
>>>    #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS    2
>>> @@ -54,6 +55,11 @@ struct amdgpu_gtt_mgr {
>>>        atomic64_t available;
>>>    };
>>>    +struct amdgpu_preempt_mgr {
>>> +    struct ttm_resource_manager manager;
>>> +    atomic64_t used;
>>> +};
>>> +
>>>    struct amdgpu_mman {
>>>        struct ttm_device        bdev;
>>>        bool                initialized;
>>> @@ -70,6 +76,7 @@ struct amdgpu_mman {
>>>          struct amdgpu_vram_mgr vram_mgr;
>>>        struct amdgpu_gtt_mgr gtt_mgr;
>>> +    struct amdgpu_preempt_mgr preempt_mgr;
>>>          uint64_t        stolen_vga_size;
>>>        struct amdgpu_bo    *stolen_vga_memory;
>>> @@ -97,6 +104,8 @@ struct amdgpu_copy_mem {
>>>      int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t
>>> gtt_size);
>>>    void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
>>> +int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
>>> +void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev);
>>>    int amdgpu_vram_mgr_init(struct amdgpu_device *adev);
>>>    void amdgpu_vram_mgr_fini(struct amdgpu_device *adev);
>>>    @@ -104,6 +113,8 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct
>>> ttm_resource *mem);
>>>    uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man);
>>>    int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man);
>>>    +uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man);
>>> +
>>>    u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
>>>    int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
>>>                      struct ttm_resource *mem,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 55991f393481..da155c276c51 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1817,7 +1817,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>                    bo = gem_to_amdgpu_bo(gobj);
>>>            }
>>>            mem = &bo->tbo.mem;
>>> -        if (mem->mem_type == TTM_PL_TT)
>>> +        if (mem->mem_type == TTM_PL_TT ||
>>> +            mem->mem_type == AMDGPU_PL_PREEMPT)
>>>                pages_addr = bo->tbo.ttm->dma_address;
>>>        }
>>>    diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h
>>> index 9169df7fadee..4e4422bc54ea 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -97,6 +97,9 @@ extern "C" {
>>>     *
>>>     * %AMDGPU_GEM_DOMAIN_OA    Ordered append, used by 3D or Compute
>>> engines
>>>     * for appending data.
>>> + *
>>> + * %AMDGPU_GEM_DOMAIN_PREEMPT    Userptrs and DMAbuf imports in
>>> preemptible
>>> + * contexts, which allow memory to be evicted without waiting for
>>> fences.
>>>     */
>>>    #define AMDGPU_GEM_DOMAIN_CPU        0x1
>>>    #define AMDGPU_GEM_DOMAIN_GTT        0x2
>>> @@ -104,12 +107,14 @@ extern "C" {
>>>    #define AMDGPU_GEM_DOMAIN_GDS        0x8
>>>    #define AMDGPU_GEM_DOMAIN_GWS        0x10
>>>    #define AMDGPU_GEM_DOMAIN_OA        0x20
>>> +#define AMDGPU_GEM_DOMAIN_PREEMPT    0x40
>>>    #define AMDGPU_GEM_DOMAIN_MASK        (AMDGPU_GEM_DOMAIN_CPU | \
>>>                         AMDGPU_GEM_DOMAIN_GTT | \
>>>                         AMDGPU_GEM_DOMAIN_VRAM | \
>>>                         AMDGPU_GEM_DOMAIN_GDS | \
>>>                         AMDGPU_GEM_DOMAIN_GWS | \
>>> -                     AMDGPU_GEM_DOMAIN_OA)
>>> +                     AMDGPU_GEM_DOMAIN_OA | \
>>> +                     AMDGPU_GEM_DOMAIN_PREEMPT)
>>>      /* Flag that CPU access will be required for the case of VRAM
>>> domain */
>>>    #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED    (1 << 0)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-05-20 10:20 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-19  5:45 [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Felix Kuehling
2021-05-19  5:45 ` [PATCH 2/3] drm/amdgpu: Use preemptible domain for KFD Felix Kuehling
2021-05-19  5:51   ` Felix Kuehling
2021-05-19  5:45 ` [PATCH 3/3] drm/amdgpu: Workaround IOMMU driver bug Felix Kuehling
2021-05-19 10:04 ` [PATCH 1/3] drm/amdgpu: Add new domain for preemptible SG BOs Christian König
2021-05-19 15:22   ` Felix Kuehling
2021-05-20 10:20     ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.