All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
@ 2021-10-08 17:31 Zack Rusin
  2021-10-08 17:31 ` [PATCH 1/5] drm/vmwgfx: Remove the deprecated lower mem limit Zack Rusin
                   ` (5 more replies)
  0 siblings, 6 replies; 18+ messages in thread
From: Zack Rusin @ 2021-10-08 17:31 UTC (permalink / raw)
  To: dri-devel; +Cc: Zack Rusin, Christian König, Thomas Hellström

This is a largely trivial set that makes vmwgfx support module reload
and PCI hot-unplug. It also makes IGT's core_hotunplug pass instead
of kernel oops'ing.

The one "ugly" change is the "Introduce a new placement for MOB page
tables". It seems vmwgfx has been violating a TTM assumption that
TTM_PL_SYSTEM buffers are never fenced for a while. Apart from a kernel
oops on module unload it didn't seem to wreak too much havoc, but we
shouldn't be abusing TTM. So to solve it we're introducing a new
placement, which is basically system, but can deal with fenced bo's.

Cc: Christian König <christian.koenig@amd.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Zack Rusin (5):
  drm/vmwgfx: Remove the deprecated lower mem limit
  drm/vmwgfx: Release ttm memory if probe fails
  drm/vmwgfx: Fail to initialize on broken configs
  drm/vmwgfx: Introduce a new placement for MOB page tables
  drm/vmwgfx: Switch the internal BO's to ttm_bo_type_kernel

 drivers/gpu/drm/vmwgfx/Makefile               |  2 +-
 drivers/gpu/drm/vmwgfx/ttm_memory.c           | 99 +------------------
 drivers/gpu/drm/vmwgfx/ttm_memory.h           |  6 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c           |  7 ++
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c           | 40 +++++---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h           | 12 ++-
 .../gpu/drm/vmwgfx/vmwgfx_system_manager.c    | 90 +++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c    | 58 +++++------
 9 files changed, 164 insertions(+), 152 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c

-- 
2.30.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/5] drm/vmwgfx: Remove the deprecated lower mem limit
  2021-10-08 17:31 [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Zack Rusin
@ 2021-10-08 17:31 ` Zack Rusin
  2021-10-08 17:31 ` [PATCH 2/5] drm/vmwgfx: Release ttm memory if probe fails Zack Rusin
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 18+ messages in thread
From: Zack Rusin @ 2021-10-08 17:31 UTC (permalink / raw)
  To: dri-devel; +Cc: Zack Rusin, Martin Krastev

TTM during the transition to the new page allocator lost the ability
to constrain the allocations via the lower_mem_limit. The code has
been unused since the change:
256dd44bd897 ("drm/ttm: nuke old page allocator")
and there's no reason to keep it.

Fixes: 256dd44bd897 ("drm/ttm: nuke old page allocator")
Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
---
 drivers/gpu/drm/vmwgfx/ttm_memory.c | 99 +----------------------------
 drivers/gpu/drm/vmwgfx/ttm_memory.h |  6 +-
 2 files changed, 2 insertions(+), 103 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.c b/drivers/gpu/drm/vmwgfx/ttm_memory.c
index edd17c30d5a5..2ced4c06ca45 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.c
@@ -34,7 +34,6 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/swap.h>
 
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
@@ -173,69 +172,7 @@ static struct kobj_type ttm_mem_zone_kobj_type = {
 	.sysfs_ops = &ttm_mem_zone_ops,
 	.default_attrs = ttm_mem_zone_attrs,
 };
-
-static struct attribute ttm_mem_global_lower_mem_limit = {
-	.name = "lower_mem_limit",
-	.mode = S_IRUGO | S_IWUSR
-};
-
-static ssize_t ttm_mem_global_show(struct kobject *kobj,
-				 struct attribute *attr,
-				 char *buffer)
-{
-	struct ttm_mem_global *glob =
-		container_of(kobj, struct ttm_mem_global, kobj);
-	uint64_t val = 0;
-
-	spin_lock(&glob->lock);
-	val = glob->lower_mem_limit;
-	spin_unlock(&glob->lock);
-	/* convert from number of pages to KB */
-	val <<= (PAGE_SHIFT - 10);
-	return snprintf(buffer, PAGE_SIZE, "%llu\n",
-			(unsigned long long) val);
-}
-
-static ssize_t ttm_mem_global_store(struct kobject *kobj,
-				  struct attribute *attr,
-				  const char *buffer,
-				  size_t size)
-{
-	int chars;
-	uint64_t val64;
-	unsigned long val;
-	struct ttm_mem_global *glob =
-		container_of(kobj, struct ttm_mem_global, kobj);
-
-	chars = sscanf(buffer, "%lu", &val);
-	if (chars == 0)
-		return size;
-
-	val64 = val;
-	/* convert from KB to number of pages */
-	val64 >>= (PAGE_SHIFT - 10);
-
-	spin_lock(&glob->lock);
-	glob->lower_mem_limit = val64;
-	spin_unlock(&glob->lock);
-
-	return size;
-}
-
-static struct attribute *ttm_mem_global_attrs[] = {
-	&ttm_mem_global_lower_mem_limit,
-	NULL
-};
-
-static const struct sysfs_ops ttm_mem_global_ops = {
-	.show = &ttm_mem_global_show,
-	.store = &ttm_mem_global_store,
-};
-
-static struct kobj_type ttm_mem_glob_kobj_type = {
-	.sysfs_ops = &ttm_mem_global_ops,
-	.default_attrs = ttm_mem_global_attrs,
-};
+static struct kobj_type ttm_mem_glob_kobj_type = {0};
 
 static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
 					bool from_wq, uint64_t extra)
@@ -435,11 +372,6 @@ int ttm_mem_global_init(struct ttm_mem_global *glob, struct device *dev)
 
 	si_meminfo(&si);
 
-	spin_lock(&glob->lock);
-	/* set it as 0 by default to keep original behavior of OOM */
-	glob->lower_mem_limit = 0;
-	spin_unlock(&glob->lock);
-
 	ret = ttm_mem_init_kernel_zone(glob, &si);
 	if (unlikely(ret != 0))
 		goto out_no_zone;
@@ -527,35 +459,6 @@ void ttm_mem_global_free(struct ttm_mem_global *glob,
 }
 EXPORT_SYMBOL(ttm_mem_global_free);
 
-/*
- * check if the available mem is under lower memory limit
- *
- * a. if no swap disk at all or free swap space is under swap_mem_limit
- * but available system mem is bigger than sys_mem_limit, allow TTM
- * allocation;
- *
- * b. if the available system mem is less than sys_mem_limit but free
- * swap disk is bigger than swap_mem_limit, allow TTM allocation.
- */
-bool
-ttm_check_under_lowerlimit(struct ttm_mem_global *glob,
-			uint64_t num_pages,
-			struct ttm_operation_ctx *ctx)
-{
-	int64_t available;
-
-	/* We allow over commit during suspend */
-	if (ctx->force_alloc)
-		return false;
-
-	available = get_nr_swap_pages() + si_mem_available();
-	available -= num_pages;
-	if (available < glob->lower_mem_limit)
-		return true;
-
-	return false;
-}
-
 static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
 				  struct ttm_mem_zone *single_zone,
 				  uint64_t amount, bool reserve)
diff --git a/drivers/gpu/drm/vmwgfx/ttm_memory.h b/drivers/gpu/drm/vmwgfx/ttm_memory.h
index c50dba774485..7b0d617ebcb1 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_memory.h
+++ b/drivers/gpu/drm/vmwgfx/ttm_memory.h
@@ -50,8 +50,6 @@
  * @work: The workqueue callback for the shrink queue.
  * @lock: Lock to protect the @shrink - and the memory accounting members,
  * that is, essentially the whole structure with some exceptions.
- * @lower_mem_limit: include lower limit of swap space and lower limit of
- * system memory.
  * @zones: Array of pointers to accounting zones.
  * @num_zones: Number of populated entries in the @zones array.
  * @zone_kernel: Pointer to the kernel zone.
@@ -69,7 +67,6 @@ extern struct ttm_mem_global {
 	struct workqueue_struct *swap_queue;
 	struct work_struct work;
 	spinlock_t lock;
-	uint64_t lower_mem_limit;
 	struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES];
 	unsigned int num_zones;
 	struct ttm_mem_zone *zone_kernel;
@@ -91,6 +88,5 @@ int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
 void ttm_mem_global_free_page(struct ttm_mem_global *glob,
 			      struct page *page, uint64_t size);
 size_t ttm_round_pot(size_t size);
-bool ttm_check_under_lowerlimit(struct ttm_mem_global *glob, uint64_t num_pages,
-				struct ttm_operation_ctx *ctx);
+
 #endif
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/5] drm/vmwgfx: Release ttm memory if probe fails
  2021-10-08 17:31 [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Zack Rusin
  2021-10-08 17:31 ` [PATCH 1/5] drm/vmwgfx: Remove the deprecated lower mem limit Zack Rusin
@ 2021-10-08 17:31 ` Zack Rusin
  2021-10-08 17:31 ` [PATCH 3/5] drm/vmwgfx: Fail to initialize on broken configs Zack Rusin
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 18+ messages in thread
From: Zack Rusin @ 2021-10-08 17:31 UTC (permalink / raw)
  To: dri-devel; +Cc: Zack Rusin, Martin Krastev

The ttm mem global state was leaking if the vmwgfx driver load failed.

In case of a driver load failure we have to make sure we also release
the ttm mem global state.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index ab9a1750e1df..8d0b083ba267 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1617,34 +1617,40 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
 	if (ret)
-		return ret;
+		goto out_error;
 
 	ret = pcim_enable_device(pdev);
 	if (ret)
-		return ret;
+		goto out_error;
 
 	vmw = devm_drm_dev_alloc(&pdev->dev, &driver,
 				 struct vmw_private, drm);
-	if (IS_ERR(vmw))
-		return PTR_ERR(vmw);
+	if (IS_ERR(vmw)) {
+		ret = PTR_ERR(vmw);
+		goto out_error;
+	}
 
 	pci_set_drvdata(pdev, &vmw->drm);
 
 	ret = ttm_mem_global_init(&ttm_mem_glob, &pdev->dev);
 	if (ret)
-		return ret;
+		goto out_error;
 
 	ret = vmw_driver_load(vmw, ent->device);
 	if (ret)
-		return ret;
+		goto out_release;
 
 	ret = drm_dev_register(&vmw->drm, 0);
-	if (ret) {
-		vmw_driver_unload(&vmw->drm);
-		return ret;
-	}
+	if (ret)
+		goto out_unload;
 
 	return 0;
+out_unload:
+	vmw_driver_unload(&vmw->drm);
+out_release:
+	ttm_mem_global_release(&ttm_mem_glob);
+out_error:
+	return ret;
 }
 
 static int __init vmwgfx_init(void)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/5] drm/vmwgfx: Fail to initialize on broken configs
  2021-10-08 17:31 [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Zack Rusin
  2021-10-08 17:31 ` [PATCH 1/5] drm/vmwgfx: Remove the deprecated lower mem limit Zack Rusin
  2021-10-08 17:31 ` [PATCH 2/5] drm/vmwgfx: Release ttm memory if probe fails Zack Rusin
@ 2021-10-08 17:31 ` Zack Rusin
  2021-10-08 17:31 ` [PATCH 4/5] drm/vmwgfx: Introduce a new placement for MOB page tables Zack Rusin
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 18+ messages in thread
From: Zack Rusin @ 2021-10-08 17:31 UTC (permalink / raw)
  To: dri-devel; +Cc: Zack Rusin, Martin Krastev

Some of our hosts have a bug where rescaning a pci bus results in stale
fifo memory being mapped on the host. This makes any fifo communication
impossible resulting in various kernel crashes.

Instead of unexpectedly crashing, predictably fail to load the driver
which will preserve the system.

Fixes: fb1d9738ca05 ("drm/vmwgfx: Add DRM driver for VMware Virtual GPU")
Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
index 67db472d3493..a3bfbb6c3e14 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c
@@ -145,6 +145,13 @@ struct vmw_fifo_state *vmw_fifo_create(struct vmw_private *dev_priv)
 		 (unsigned int) max,
 		 (unsigned int) min,
 		 (unsigned int) fifo->capabilities);
+
+	if (unlikely(min >= max)) {
+		drm_warn(&dev_priv->drm,
+			 "FIFO memory is not usable. Driver failed to initialize.");
+		return ERR_PTR(-ENXIO);
+	}
+
 	return fifo;
 }
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/5] drm/vmwgfx: Introduce a new placement for MOB page tables
  2021-10-08 17:31 [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Zack Rusin
                   ` (2 preceding siblings ...)
  2021-10-08 17:31 ` [PATCH 3/5] drm/vmwgfx: Fail to initialize on broken configs Zack Rusin
@ 2021-10-08 17:31 ` Zack Rusin
  2021-10-12 18:57   ` Thomas Hellström
  2021-10-08 17:31 ` [PATCH 5/5] drm/vmwgfx: Switch the internal BO's to ttm_bo_type_kernel Zack Rusin
  2021-10-08 20:28 ` [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Thomas Hellström
  5 siblings, 1 reply; 18+ messages in thread
From: Zack Rusin @ 2021-10-08 17:31 UTC (permalink / raw)
  To: dri-devel
  Cc: Zack Rusin, Martin Krastev, Christian König, Thomas Hellström

For larger (bigger than a page) and noncontiguous mobs we have
to create page tables that allow the host to find the memory.
Those page tables just used regular system memory. Unfortunately
in TTM those BO's are not allowed to be busy thus can't be
fenced and we have to fence those bo's  because we don't want
to destroy the page tables while the host is still executing
the command buffers which might be accessing them.

To solve it we introduce a new placement VMW_PL_SYSTEM which
is very similar to TTM_PL_SYSTEM except that it allows
fencing. This fixes kernel oops'es during unloading of the driver
(and pci hot remove/add) which were caused by busy BO's in
TTM_PL_SYSTEM being present in the delayed deletion list in
TTM (TTM_PL_SYSTEM manager is destroyed before the delayed
deletions are executed)

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/vmwgfx/Makefile               |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c           | 14 ++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h           | 12 ++-
 .../gpu/drm/vmwgfx/vmwgfx_system_manager.c    | 90 +++++++++++++++++++
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c    | 58 ++++++------
 5 files changed, 138 insertions(+), 38 deletions(-)
 create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c

diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index bc323f7d4032..0188a312c38c 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -9,7 +9,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
 	    vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
 	    vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
-            vmwgfx_devcaps.o ttm_object.o ttm_memory.o
+	    vmwgfx_devcaps.o ttm_object.o ttm_memory.o vmwgfx_system_manager.o
 
 vmwgfx-$(CONFIG_DRM_FBDEV_EMULATION) += vmwgfx_fb.o
 vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 8d0b083ba267..daf65615308a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1071,6 +1071,12 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
 				 "3D will be disabled.\n");
 			dev_priv->has_mob = false;
 		}
+		if (vmw_sys_man_init(dev_priv) != 0) {
+			drm_info(&dev_priv->drm,
+				 "No MOB page table memory available. "
+				 "3D will be disabled.\n");
+			dev_priv->has_mob = false;
+		}
 	}
 
 	if (dev_priv->has_mob && (dev_priv->capabilities & SVGA_CAP_DX)) {
@@ -1121,8 +1127,10 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
 	vmw_overlay_close(dev_priv);
 	vmw_kms_close(dev_priv);
 out_no_kms:
-	if (dev_priv->has_mob)
+	if (dev_priv->has_mob) {
 		vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB);
+		vmw_sys_man_fini(dev_priv);
+	}
 	if (dev_priv->has_gmr)
 		vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR);
 	vmw_devcaps_destroy(dev_priv);
@@ -1172,8 +1180,10 @@ static void vmw_driver_unload(struct drm_device *dev)
 		vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR);
 
 	vmw_release_device_early(dev_priv);
-	if (dev_priv->has_mob)
+	if (dev_priv->has_mob) {
 		vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB);
+		vmw_sys_man_fini(dev_priv);
+	}
 	vmw_devcaps_destroy(dev_priv);
 	vmw_vram_manager_fini(dev_priv);
 	ttm_device_fini(&dev_priv->bdev);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a833751099b5..df19dfb3ce18 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -82,8 +82,9 @@
 			VMWGFX_NUM_GB_SURFACE +\
 			VMWGFX_NUM_GB_SCREEN_TARGET)
 
-#define VMW_PL_GMR (TTM_PL_PRIV + 0)
-#define VMW_PL_MOB (TTM_PL_PRIV + 1)
+#define VMW_PL_GMR      (TTM_PL_PRIV + 0)
+#define VMW_PL_MOB      (TTM_PL_PRIV + 1)
+#define VMW_PL_SYSTEM   (TTM_PL_PRIV + 2)
 
 #define VMW_RES_CONTEXT ttm_driver_type0
 #define VMW_RES_SURFACE ttm_driver_type1
@@ -1039,7 +1040,6 @@ extern struct ttm_placement vmw_vram_placement;
 extern struct ttm_placement vmw_vram_sys_placement;
 extern struct ttm_placement vmw_vram_gmr_placement;
 extern struct ttm_placement vmw_sys_placement;
-extern struct ttm_placement vmw_evictable_placement;
 extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_placement vmw_mob_placement;
 extern struct ttm_placement vmw_nonfixed_placement;
@@ -1251,6 +1251,12 @@ int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv);
 int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type);
 void vmw_gmrid_man_fini(struct vmw_private *dev_priv, int type);
 
+/**
+ * System memory manager
+ */
+int vmw_sys_man_init(struct vmw_private *dev_priv);
+void vmw_sys_man_fini(struct vmw_private *dev_priv);
+
 /**
  * Prime - vmwgfx_prime.c
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c
new file mode 100644
index 000000000000..2b86e2d8aefe
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2021 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "vmwgfx_drv.h"
+
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_device.h>
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_resource.h>
+#include <linux/slab.h>
+
+
+static int vmw_sys_man_alloc(struct ttm_resource_manager *man,
+			     struct ttm_buffer_object *bo,
+			     const struct ttm_place *place,
+			     struct ttm_resource **res)
+{
+	*res = kzalloc(sizeof(**res), GFP_KERNEL);
+	if (!*res)
+		return -ENOMEM;
+
+	ttm_resource_init(bo, place, *res);
+	return 0;
+}
+
+static void vmw_sys_man_free(struct ttm_resource_manager *man,
+			     struct ttm_resource *res)
+{
+	kfree(res);
+}
+
+static const struct ttm_resource_manager_func vmw_sys_manager_func = {
+	.alloc = vmw_sys_man_alloc,
+	.free = vmw_sys_man_free,
+};
+
+int vmw_sys_man_init(struct vmw_private *dev_priv)
+{
+	struct ttm_device *bdev = &dev_priv->bdev;
+	struct ttm_resource_manager *man =
+			kzalloc(sizeof(*man), GFP_KERNEL);
+
+	if (unlikely(!man))
+		return -ENOMEM;
+
+	man->use_tt = true;
+	man->func = &vmw_sys_manager_func;
+
+	ttm_resource_manager_init(man, 0);
+	ttm_set_driver_manager(bdev, VMW_PL_SYSTEM, man);
+	ttm_resource_manager_set_used(man, true);
+	return 0;
+}
+
+void vmw_sys_man_fini(struct vmw_private *dev_priv)
+{
+	struct ttm_resource_manager *man = ttm_manager_type(&dev_priv->bdev,
+							    VMW_PL_SYSTEM);
+
+	ttm_resource_manager_evict_all(&dev_priv->bdev, man);
+
+	ttm_resource_manager_set_used(man, false);
+	ttm_resource_manager_cleanup(man);
+
+	ttm_set_driver_manager(&dev_priv->bdev, VMW_PL_SYSTEM, NULL);
+	kfree(man);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index e899a936a42a..b15228e7dbeb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -92,6 +92,13 @@ static const struct ttm_place gmr_vram_placement_flags[] = {
 	}
 };
 
+static const struct ttm_place vmw_sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = VMW_PL_SYSTEM,
+	.flags = 0
+};
+
 struct ttm_placement vmw_vram_gmr_placement = {
 	.num_placement = 2,
 	.placement = vram_gmr_placement_flags,
@@ -113,28 +120,11 @@ struct ttm_placement vmw_sys_placement = {
 	.busy_placement = &sys_placement_flags
 };
 
-static const struct ttm_place evictable_placement_flags[] = {
-	{
-		.fpfn = 0,
-		.lpfn = 0,
-		.mem_type = TTM_PL_SYSTEM,
-		.flags = 0
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.mem_type = TTM_PL_VRAM,
-		.flags = 0
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.mem_type = VMW_PL_GMR,
-		.flags = 0
-	}, {
-		.fpfn = 0,
-		.lpfn = 0,
-		.mem_type = VMW_PL_MOB,
-		.flags = 0
-	}
+struct ttm_placement vmw_pt_sys_placement = {
+	.num_placement = 1,
+	.placement = &vmw_sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &vmw_sys_placement_flags
 };
 
 static const struct ttm_place nonfixed_placement_flags[] = {
@@ -156,13 +146,6 @@ static const struct ttm_place nonfixed_placement_flags[] = {
 	}
 };
 
-struct ttm_placement vmw_evictable_placement = {
-	.num_placement = 4,
-	.placement = evictable_placement_flags,
-	.num_busy_placement = 1,
-	.busy_placement = &sys_placement_flags
-};
-
 struct ttm_placement vmw_srf_placement = {
 	.num_placement = 1,
 	.num_busy_placement = 2,
@@ -484,6 +467,9 @@ static int vmw_ttm_bind(struct ttm_device *bdev,
 				    &vmw_be->vsgt, ttm->num_pages,
 				    vmw_be->gmr_id);
 		break;
+	case VMW_PL_SYSTEM:
+		/* Nothing to be done for a system bind */
+		break;
 	default:
 		BUG();
 	}
@@ -507,6 +493,8 @@ static void vmw_ttm_unbind(struct ttm_device *bdev,
 	case VMW_PL_MOB:
 		vmw_mob_unbind(vmw_be->dev_priv, vmw_be->mob);
 		break;
+	case VMW_PL_SYSTEM:
+		break;
 	default:
 		BUG();
 	}
@@ -624,6 +612,7 @@ static int vmw_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *
 
 	switch (mem->mem_type) {
 	case TTM_PL_SYSTEM:
+	case VMW_PL_SYSTEM:
 	case VMW_PL_GMR:
 	case VMW_PL_MOB:
 		return 0;
@@ -670,6 +659,11 @@ static void vmw_swap_notify(struct ttm_buffer_object *bo)
 	(void) ttm_bo_wait(bo, false, false);
 }
 
+static bool vmw_memtype_is_system(uint32_t mem_type)
+{
+	return mem_type == TTM_PL_SYSTEM || mem_type == VMW_PL_SYSTEM;
+}
+
 static int vmw_move(struct ttm_buffer_object *bo,
 		    bool evict,
 		    struct ttm_operation_ctx *ctx,
@@ -680,7 +674,7 @@ static int vmw_move(struct ttm_buffer_object *bo,
 	struct ttm_resource_manager *new_man = ttm_manager_type(bo->bdev, new_mem->mem_type);
 	int ret;
 
-	if (new_man->use_tt && new_mem->mem_type != TTM_PL_SYSTEM) {
+	if (new_man->use_tt && !vmw_memtype_is_system(new_mem->mem_type)) {
 		ret = vmw_ttm_bind(bo->bdev, bo->ttm, new_mem);
 		if (ret)
 			return ret;
@@ -689,7 +683,7 @@ static int vmw_move(struct ttm_buffer_object *bo,
 	vmw_move_notify(bo, bo->resource, new_mem);
 
 	if (old_man->use_tt && new_man->use_tt) {
-		if (bo->resource->mem_type == TTM_PL_SYSTEM) {
+		if (vmw_memtype_is_system(bo->resource->mem_type)) {
 			ttm_bo_move_null(bo, new_mem);
 			return 0;
 		}
@@ -736,7 +730,7 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv,
 	int ret;
 
 	ret = vmw_bo_create_kernel(dev_priv, bo_size,
-				   &vmw_sys_placement,
+				   &vmw_pt_sys_placement,
 				   &bo);
 	if (unlikely(ret != 0))
 		return ret;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/5] drm/vmwgfx: Switch the internal BO's to ttm_bo_type_kernel
  2021-10-08 17:31 [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Zack Rusin
                   ` (3 preceding siblings ...)
  2021-10-08 17:31 ` [PATCH 4/5] drm/vmwgfx: Introduce a new placement for MOB page tables Zack Rusin
@ 2021-10-08 17:31 ` Zack Rusin
  2021-10-08 20:28 ` [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Thomas Hellström
  5 siblings, 0 replies; 18+ messages in thread
From: Zack Rusin @ 2021-10-08 17:31 UTC (permalink / raw)
  To: dri-devel; +Cc: Zack Rusin, Martin Krastev

There's never a need to access our internal kernel bo's from
user-space. Those objects are used exclusively for internal
support to guest backed surfaces (in otable setup and mob
page tables) and there's no need to have them be of device
type, i.e. mmappable from user-space.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index fd007f1c1776..c97a3d5e90ce 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -494,7 +494,7 @@ int vmw_bo_create_kernel(struct vmw_private *dev_priv, unsigned long size,
 	drm_vma_node_reset(&bo->base.vma_node);
 
 	ret = ttm_bo_init_reserved(&dev_priv->bdev, bo, size,
-				   ttm_bo_type_device, placement, 0,
+				   ttm_bo_type_kernel, placement, 0,
 				   &ctx, NULL, NULL, NULL);
 	if (unlikely(ret))
 		goto error_account;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-08 17:31 [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Zack Rusin
                   ` (4 preceding siblings ...)
  2021-10-08 17:31 ` [PATCH 5/5] drm/vmwgfx: Switch the internal BO's to ttm_bo_type_kernel Zack Rusin
@ 2021-10-08 20:28 ` Thomas Hellström
  2021-10-08 20:40   ` Zack Rusin
  5 siblings, 1 reply; 18+ messages in thread
From: Thomas Hellström @ 2021-10-08 20:28 UTC (permalink / raw)
  To: Zack Rusin, dri-devel; +Cc: Christian König

On Fri, 2021-10-08 at 13:31 -0400, Zack Rusin wrote:
> This is a largely trivial set that makes vmwgfx support module reload
> and PCI hot-unplug. It also makes IGT's core_hotunplug pass instead
> of kernel oops'ing.
> 
> The one "ugly" change is the "Introduce a new placement for MOB page
> tables". It seems vmwgfx has been violating a TTM assumption that
> TTM_PL_SYSTEM buffers are never fenced for a while. Apart from a
> kernel
> oops on module unload it didn't seem to wreak too much havoc, but we
> shouldn't be abusing TTM. So to solve it we're introducing a new
> placement, which is basically system, but can deal with fenced bo's.
> 
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>

Hi, Zack,

What part of TTM doesn't allow fenced system memory currently? It was
certainly designed to allow that and vmwgfx has been relying on that
since the introduction of MOBs IIRC. Also i915 is currently relying on
that.

/Thomas



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-08 20:28 ` [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Thomas Hellström
@ 2021-10-08 20:40   ` Zack Rusin
  2021-10-08 21:13     ` Thomas Hellström
  0 siblings, 1 reply; 18+ messages in thread
From: Zack Rusin @ 2021-10-08 20:40 UTC (permalink / raw)
  To: dri-devel, thomas.hellstrom; +Cc: christian.koenig

On Fri, 2021-10-08 at 22:28 +0200, Thomas Hellström wrote:
> On Fri, 2021-10-08 at 13:31 -0400, Zack Rusin wrote:
> > This is a largely trivial set that makes vmwgfx support module reload
> > and PCI hot-unplug. It also makes IGT's core_hotunplug pass instead
> > of kernel oops'ing.
> > 
> > The one "ugly" change is the "Introduce a new placement for MOB page
> > tables". It seems vmwgfx has been violating a TTM assumption that
> > TTM_PL_SYSTEM buffers are never fenced for a while. Apart from a
> > kernel
> > oops on module unload it didn't seem to wreak too much havoc, but we
> > shouldn't be abusing TTM. So to solve it we're introducing a new
> > placement, which is basically system, but can deal with fenced bo's.
> > 
> > Cc: Christian König <christian.koenig@amd.com>
> > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> 
> Hi, Zack,
> 
> What part of TTM doesn't allow fenced system memory currently? It was
> certainly designed to allow that and vmwgfx has been relying on that
> since the introduction of MOBs IIRC. Also i915 is currently relying on
> that.

It's the shutdown. BO's allocated through the ttm system manager might
be busy during ttm_bo_put which results in them being scheduled for a
delayed deletion. The ttm system manager is disabled before the final
delayed deletion is ran in ttm_device_fini. This results in crashes
during freeing of the BO resources because they're trying to remove
themselves from a no longer existent ttm_resource_manager (e.g. in
IGT's core_hotunplug on vmwgfx)

During review of the trivial patch that was fixing it in ttm Christian
said that system domain buffers must be idle or otherwise a number of
assumptions in ttm breaks:
https://lists.freedesktop.org/archives/dri-devel/2021-September/324027.html
And later clarified that in fact system domain buffers being fenced is
illegal from a design point of view:
https://lists.freedesktop.org/archives/dri-devel/2021-September/324697.html

z

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-08 20:40   ` Zack Rusin
@ 2021-10-08 21:13     ` Thomas Hellström
  2021-10-11  8:17       ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Hellström @ 2021-10-08 21:13 UTC (permalink / raw)
  To: Zack Rusin, dri-devel; +Cc: christian.koenig

On Fri, 2021-10-08 at 20:40 +0000, Zack Rusin wrote:
> On Fri, 2021-10-08 at 22:28 +0200, Thomas Hellström wrote:
> > On Fri, 2021-10-08 at 13:31 -0400, Zack Rusin wrote:
> > > This is a largely trivial set that makes vmwgfx support module
> > > reload
> > > and PCI hot-unplug. It also makes IGT's core_hotunplug pass
> > > instead
> > > of kernel oops'ing.
> > > 
> > > The one "ugly" change is the "Introduce a new placement for MOB
> > > page
> > > tables". It seems vmwgfx has been violating a TTM assumption that
> > > TTM_PL_SYSTEM buffers are never fenced for a while. Apart from a
> > > kernel
> > > oops on module unload it didn't seem to wreak too much havoc, but
> > > we
> > > shouldn't be abusing TTM. So to solve it we're introducing a new
> > > placement, which is basically system, but can deal with fenced
> > > bo's.
> > > 
> > > Cc: Christian König <christian.koenig@amd.com>
> > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > 
> > Hi, Zack,
> > 
> > What part of TTM doesn't allow fenced system memory currently? It
> > was
> > certainly designed to allow that and vmwgfx has been relying on
> > that
> > since the introduction of MOBs IIRC. Also i915 is currently relying
> > on
> > that.
> 
> It's the shutdown. BO's allocated through the ttm system manager
> might
> be busy during ttm_bo_put which results in them being scheduled for a
> delayed deletion. The ttm system manager is disabled before the final
> delayed deletion is ran in ttm_device_fini. This results in crashes
> during freeing of the BO resources because they're trying to remove
> themselves from a no longer existent ttm_resource_manager (e.g. in
> IGT's core_hotunplug on vmwgfx)
> 
> During review of the trivial patch that was fixing it in ttm
> Christian
> said that system domain buffers must be idle or otherwise a number of
> assumptions in ttm breaks:
> https://lists.freedesktop.org/archives/dri-devel/2021-September/324027.html
> And later clarified that in fact system domain buffers being fenced
> is
> illegal from a design point of view:
> https://lists.freedesktop.org/archives/dri-devel/2021-September/324697.html

Hmm, this looks very odd, because I remember reminding Christian as
late as this spring that both vmwgfx and i915 sets up GPU bindings to
system buffers, as part of the review of a patch series pushing a
couple of changes to the swapout path that apparently had missed this.

This more sounds like there have been changes to TTM happening not
taking into account or knowing that TTM was designed for system buffers
bound to GPU and that there were drivers actually doing that. 

And there is still older code around clearly implying system buffers
can be fenced, like ttm_bo_swapout(), and that there is dma fences
signaling completion on work that has never touched the GPU, not to
mention async eviction where a bo may be evicted to system but has tons
of outstanding fenced work in the pipe.

So if there has been a design change WRT this I believe it should have
been brought up on dri-devel to have it properly discussed so affected
drivers could agree on the different options.

Perhaps Christian can enlighten us here. Christian?

/Thomas


> 
> z



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-08 21:13     ` Thomas Hellström
@ 2021-10-11  8:17       ` Christian König
  2021-10-11 12:04         ` Thomas Hellström
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-10-11  8:17 UTC (permalink / raw)
  To: Thomas Hellström, Zack Rusin, dri-devel

Am 08.10.21 um 23:13 schrieb Thomas Hellström:
> On Fri, 2021-10-08 at 20:40 +0000, Zack Rusin wrote:
>> On Fri, 2021-10-08 at 22:28 +0200, Thomas Hellström wrote:
>>> On Fri, 2021-10-08 at 13:31 -0400, Zack Rusin wrote:
>>>> This is a largely trivial set that makes vmwgfx support module
>>>> reload
>>>> and PCI hot-unplug. It also makes IGT's core_hotunplug pass
>>>> instead
>>>> of kernel oops'ing.
>>>>
>>>> The one "ugly" change is the "Introduce a new placement for MOB
>>>> page
>>>> tables". It seems vmwgfx has been violating a TTM assumption that
>>>> TTM_PL_SYSTEM buffers are never fenced for a while. Apart from a
>>>> kernel
>>>> oops on module unload it didn't seem to wreak too much havoc, but
>>>> we
>>>> shouldn't be abusing TTM. So to solve it we're introducing a new
>>>> placement, which is basically system, but can deal with fenced
>>>> bo's.
>>>>
>>>> Cc: Christian König <christian.koenig@amd.com>
>>>> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> Hi, Zack,
>>>
>>> What part of TTM doesn't allow fenced system memory currently? It
>>> was
>>> certainly designed to allow that and vmwgfx has been relying on
>>> that
>>> since the introduction of MOBs IIRC. Also i915 is currently relying
>>> on
>>> that.
>> It's the shutdown. BO's allocated through the ttm system manager
>> might
>> be busy during ttm_bo_put which results in them being scheduled for a
>> delayed deletion. The ttm system manager is disabled before the final
>> delayed deletion is ran in ttm_device_fini. This results in crashes
>> during freeing of the BO resources because they're trying to remove
>> themselves from a no longer existent ttm_resource_manager (e.g. in
>> IGT's core_hotunplug on vmwgfx)
>>
>> During review of the trivial patch that was fixing it in ttm
>> Christian
>> said that system domain buffers must be idle or otherwise a number of
>> assumptions in ttm breaks:
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Farchives%2Fdri-devel%2F2021-September%2F324027.html&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C2391a82208e6464c8db208d98aa08dd2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637693244449717755%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=BZ3C00rZDDdpKNoGa0PYwoHeM89uVzN1Md4iN2qkGB0%3D&amp;reserved=0
>> And later clarified that in fact system domain buffers being fenced
>> is
>> illegal from a design point of view:
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Farchives%2Fdri-devel%2F2021-September%2F324697.html&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C2391a82208e6464c8db208d98aa08dd2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637693244449717755%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=3eXNqeh7Ifqe6lllRMvdfJX%2F7rX7%2FqH3wldNE5AodMc%3D&amp;reserved=0
> Hmm, this looks very odd, because I remember reminding Christian as
> late as this spring that both vmwgfx and i915 sets up GPU bindings to
> system buffers, as part of the review of a patch series pushing a
> couple of changes to the swapout path that apparently had missed this.

Well that was the trigger to look more deeply into this and as far as I 
can tell TTM was never capable of doing this correctly.

> This more sounds like there have been changes to TTM happening not
> taking into account or knowing that TTM was designed for system buffers
> bound to GPU and that there were drivers actually doing that.
>
> And there is still older code around clearly implying system buffers
> can be fenced, like ttm_bo_swapout(), and that there is dma fences
> signaling completion on work that has never touched the GPU, not to
> mention async eviction where a bo may be evicted to system but has tons
> of outstanding fenced work in the pipe.
>
> So if there has been a design change WRT this I believe it should have
> been brought up on dri-devel to have it properly discussed so affected
> drivers could agree on the different options.
>
> Perhaps Christian can enlighten us here. Christian?

There are multiple occasions where we assume that BOs in the system 
domain are not accessible by the GPU, swapout and teardown are just two 
examples.

When Dave reorganized the buffer move code we also had to insert waits 
for moves to complete for anything which goes into the SYSTEM domain.

Apart from that it certainly makes sense to have that restriction. 
Memory which is accessed by the hardware and not directly evictable must 
be accounted differently.

Regards,
Christian.

>
> /Thomas
>
>
>> z
>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-11  8:17       ` Christian König
@ 2021-10-11 12:04         ` Thomas Hellström
  2021-10-12  8:27           ` Christian König
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Hellström @ 2021-10-11 12:04 UTC (permalink / raw)
  To: Christian König, Zack Rusin, dri-devel

On Mon, 2021-10-11 at 10:17 +0200, Christian König wrote:
> Am 08.10.21 um 23:13 schrieb Thomas Hellström:
> > On Fri, 2021-10-08 at 20:40 +0000, Zack Rusin wrote:
> > > On Fri, 2021-10-08 at 22:28 +0200, Thomas Hellström wrote:
> > > > On Fri, 2021-10-08 at 13:31 -0400, Zack Rusin wrote:
> > > > > This is a largely trivial set that makes vmwgfx support
> > > > > module
> > > > > reload
> > > > > and PCI hot-unplug. It also makes IGT's core_hotunplug pass
> > > > > instead
> > > > > of kernel oops'ing.
> > > > > 
> > > > > The one "ugly" change is the "Introduce a new placement for
> > > > > MOB
> > > > > page
> > > > > tables". It seems vmwgfx has been violating a TTM assumption
> > > > > that
> > > > > TTM_PL_SYSTEM buffers are never fenced for a while. Apart
> > > > > from a
> > > > > kernel
> > > > > oops on module unload it didn't seem to wreak too much havoc,
> > > > > but
> > > > > we
> > > > > shouldn't be abusing TTM. So to solve it we're introducing a
> > > > > new
> > > > > placement, which is basically system, but can deal with
> > > > > fenced
> > > > > bo's.
> > > > > 
> > > > > Cc: Christian König <christian.koenig@amd.com>
> > > > > Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > > Hi, Zack,
> > > > 
> > > > What part of TTM doesn't allow fenced system memory currently?
> > > > It
> > > > was
> > > > certainly designed to allow that and vmwgfx has been relying on
> > > > that
> > > > since the introduction of MOBs IIRC. Also i915 is currently
> > > > relying
> > > > on
> > > > that.
> > > It's the shutdown. BO's allocated through the ttm system manager
> > > might
> > > be busy during ttm_bo_put which results in them being scheduled
> > > for a
> > > delayed deletion. The ttm system manager is disabled before the
> > > final
> > > delayed deletion is ran in ttm_device_fini. This results in
> > > crashes
> > > during freeing of the BO resources because they're trying to
> > > remove
> > > themselves from a no longer existent ttm_resource_manager (e.g.
> > > in
> > > IGT's core_hotunplug on vmwgfx)
> > > 
> > > During review of the trivial patch that was fixing it in ttm
> > > Christian
> > > said that system domain buffers must be idle or otherwise a
> > > number of
> > > assumptions in ttm breaks:
> > > 
> > > 

> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Farchives%2Fdri-devel%2F2021-September%2F324027.html&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C2391a82208e6464c8db208d98aa08dd2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637693244449717755%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=BZ3C00rZDDdpKNoGa0PYwoHeM89uVzN1Md4iN2qkGB0%3D&amp;reserved=0
> > > And later clarified that in fact system domain buffers being
> > > fenced
> > > is
> > > illegal from a design point of view:
> > > 
> > > 

> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Farchives%2Fdri-devel%2F2021-September%2F324697.html&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C2391a82208e6464c8db208d98aa08dd2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637693244449717755%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=3eXNqeh7Ifqe6lllRMvdfJX%2F7rX7%2FqH3wldNE5AodMc%3D&amp;reserved=0
> > Hmm, this looks very odd, because I remember reminding Christian as
> > late as this spring that both vmwgfx and i915 sets up GPU bindings
> > to
> > system buffers, as part of the review of a patch series pushing a
> > couple of changes to the swapout path that apparently had missed
> > this.
> 
> Well that was the trigger to look more deeply into this and as far as
> I 
> can tell TTM was never capable of doing this correctly.

So apart from the teardown, which appear to be an oversight when the
system manager was introduced where do whe fail currently with this? 

> 
> > This more sounds like there have been changes to TTM happening not
> > taking into account or knowing that TTM was designed for system
> > buffers
> > bound to GPU and that there were drivers actually doing that.
> > 
> > And there is still older code around clearly implying system
> > buffers
> > can be fenced, like ttm_bo_swapout(), and that there is dma fences
> > signaling completion on work that has never touched the GPU, not to
> > mention async eviction where a bo may be evicted to system but has
> > tons
> > of outstanding fenced work in the pipe.
> > 
> > So if there has been a design change WRT this I believe it should
> > have
> > been brought up on dri-devel to have it properly discussed so
> > affected
> > drivers could agree on the different options.
> > 
> > Perhaps Christian can enlighten us here. Christian?
> 
> There are multiple occasions where we assume that BOs in the system 
> domain are not accessible by the GPU, swapout and teardown are just
> two 
> examples.
> 

At swapout we *do* wait for idle after moving to system, It's relying
on the swap_notifier to unbind. That's why the swap_notifier is there,
so swapout is working perfectly fine.


> When Dave reorganized the buffer move code we also had to insert
> waits 
> for moves to complete for anything which goes into the SYSTEM domain.
> 
> Apart from that it certainly makes sense to have that restriction. 
> Memory which is accessed by the hardware and not directly evictable
> must 
> be accounted differently.

Could you elaborate a bit on this? From a swapout point of view, it
looks to me like SYSTEM is treated just like TT by TTM? Or is the
accounting you mention something amdgpu-specific and more related to
the amd GEM domains than to the TTM memory types?

Note that TTM was never designed to deal with GPU binding, but to
provide a set of placements or memory-types where the memory can be
mapped by the CPU and bound by the GPU. TT was a special case solely
because of the mappable apertures. A bind mechanism had to be provided
for TTM to be able to map TT buffers, and most drivers used that bound
mechanism for convenience.

So now if this is going to be changed, I think we need to understand
why and think this through really thoroughly:

* What is not working and why (the teardown seems to be a trivial fix).
* How did we end up here,
* What's the cost of fixing that up compared to refactoring the drivers
that rely on bindable system memory,
* What's the justification of a system type at all if it's not GPU-
bindable, meaning it's basically equivalent to swapped-out shmem with
the exception that it's mappable?

It's probably a non-trivial effort to refactor i915 to not use system
for gpu-binding and in that case I think we need some solid
justification why we need to do that rather than fix up what's not
working with TTM + bindable system:

So could you please elaborate (assuming that the teardown is fixable)
on the other parts that don't work.

Thanks

/Thomas


> 
> Regards,
> Christian.
> 
> > 
> > /Thomas
> > 
> > 
> > > z
> > 
> 



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-11 12:04         ` Thomas Hellström
@ 2021-10-12  8:27           ` Christian König
  2021-10-12  9:10             ` Thomas Hellström
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2021-10-12  8:27 UTC (permalink / raw)
  To: Thomas Hellström, Zack Rusin, dri-devel

Am 11.10.21 um 14:04 schrieb Thomas Hellström:
> [SNIP]
>>> Hmm, this looks very odd, because I remember reminding Christian as
>>> late as this spring that both vmwgfx and i915 sets up GPU bindings
>>> to
>>> system buffers, as part of the review of a patch series pushing a
>>> couple of changes to the swapout path that apparently had missed
>>> this.
>> Well that was the trigger to look more deeply into this and as far as
>> I
>> can tell TTM was never capable of doing this correctly.
> So apart from the teardown, which appear to be an oversight when the
> system manager was introduced where do whe fail currently with this?

During validation for example. Moving BOs into the system domain means 
that they are potentially swapped out.

In other words when drivers are accessing BOs in the system domain they 
always need to take care of TTM_TT_FLAG_SWAPPED manually.

>>> This more sounds like there have been changes to TTM happening not
>>> taking into account or knowing that TTM was designed for system
>>> buffers
>>> bound to GPU and that there were drivers actually doing that.
>>>
>>> And there is still older code around clearly implying system
>>> buffers
>>> can be fenced, like ttm_bo_swapout(), and that there is dma fences
>>> signaling completion on work that has never touched the GPU, not to
>>> mention async eviction where a bo may be evicted to system but has
>>> tons
>>> of outstanding fenced work in the pipe.
>>>
>>> So if there has been a design change WRT this I believe it should
>>> have
>>> been brought up on dri-devel to have it properly discussed so
>>> affected
>>> drivers could agree on the different options.
>>>
>>> Perhaps Christian can enlighten us here. Christian?
>> There are multiple occasions where we assume that BOs in the system
>> domain are not accessible by the GPU, swapout and teardown are just
>> two
>> examples.
>>
> At swapout we *do* wait for idle after moving to system, It's relying
> on the swap_notifier to unbind. That's why the swap_notifier is there,
> so swapout is working perfectly fine.

You can of course define that BOs are not swapable or call 
ttm_bo_validate() with a system domain placement and then make sure they 
are swapped in manually, but that is extremely hacky and bad design.

As far as I know that's what i915 does, but that doesn't mean that this 
is a good idea.

Additional to that I've already noted that I think this swap_notify 
callback is not a good idea either. We should rather have a separate 
TTM_PL_SWAPPED domain for this so that drivers are cleanly informed 
about the state change.

>> When Dave reorganized the buffer move code we also had to insert
>> waits
>> for moves to complete for anything which goes into the SYSTEM domain.
>>
>> Apart from that it certainly makes sense to have that restriction.
>> Memory which is accessed by the hardware and not directly evictable
>> must
>> be accounted differently.
> Could you elaborate a bit on this? From a swapout point of view, it
> looks to me like SYSTEM is treated just like TT by TTM? Or is the
> accounting you mention something amdgpu-specific and more related to
> the amd GEM domains than to the TTM memory types?

No, that is something the Android people came up with to improve the 
shrinker behavior.

> Note that TTM was never designed to deal with GPU binding, but to
> provide a set of placements or memory-types where the memory can be
> mapped by the CPU and bound by the GPU. TT was a special case solely
> because of the mappable apertures. A bind mechanism had to be provided
> for TTM to be able to map TT buffers, and most drivers used that bound
> mechanism for convenience.

Well that's certainly not correct. Before Dave moved this back into the 
drivers TTM had bind/unbind callbacks for the translation tables.

It's just that vmwgfx was an exception and all other drivers where using 
that correctly. This vmwgfx exception is now what Zack is trying to fix 
here.

> So now if this is going to be changed, I think we need to understand
> why and think this through really thoroughly:
>
> * What is not working and why (the teardown seems to be a trivial fix).
> * How did we end up here,
> * What's the cost of fixing that up compared to refactoring the drivers
> that rely on bindable system memory,
> * What's the justification of a system type at all if it's not GPU-
> bindable, meaning it's basically equivalent to swapped-out shmem with
> the exception that it's mappable?

Well, once more that isn't correct. This is nothing new and as far as I 
know that behavior existing as long as TTM existed.

It's just that vmwgfx was a little bit special and broken in some aspects.

> It's probably a non-trivial effort to refactor i915 to not use system
> for gpu-binding and in that case I think we need some solid
> justification why we need to do that rather than fix up what's not
> working with TTM + bindable system:
>
> So could you please elaborate (assuming that the teardown is fixable)
> on the other parts that don't work.

Well you can of course design i915 however you like, you just need to be 
aware that when it breaks you need to keep the pieces.

Regards,
Christian.

>
> Thanks
>
> /Thomas
>
>
>> Regards,
>> Christian.
>>
>>> /Thomas
>>>
>>>
>>>> z
>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-12  8:27           ` Christian König
@ 2021-10-12  9:10             ` Thomas Hellström
  2021-10-12 17:34               ` Zack Rusin
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Hellström @ 2021-10-12  9:10 UTC (permalink / raw)
  To: Christian König, Zack Rusin, dri-devel

On Tue, 2021-10-12 at 10:27 +0200, Christian König wrote:
> Am 11.10.21 um 14:04 schrieb Thomas Hellström:
> > [SNIP]
> > > > Hmm, this looks very odd, because I remember reminding
> > > > Christian as
> > > > late as this spring that both vmwgfx and i915 sets up GPU
> > > > bindings
> > > > to
> > > > system buffers, as part of the review of a patch series pushing
> > > > a
> > > > couple of changes to the swapout path that apparently had
> > > > missed
> > > > this.
> > > Well that was the trigger to look more deeply into this and as
> > > far as
> > > I
> > > can tell TTM was never capable of doing this correctly.
> > So apart from the teardown, which appear to be an oversight when
> > the
> > system manager was introduced where do whe fail currently with
> > this?
> 
> During validation for example. Moving BOs into the system domain
> means 
> that they are potentially swapped out.
> 
> In other words when drivers are accessing BOs in the system domain
> they 
> always need to take care of TTM_TT_FLAG_SWAPPED manually.

Yes, that's true. Initially TTMs were populated on a page basis. All
users of a particular page was required to validate that it was
present. This was later changed to per-tt population by Jerome I think
and somewhere along the line that requirement on the user to validate
appears to have gotten lost as well.

> 
> > > > This more sounds like there have been changes to TTM happening
> > > > not
> > > > taking into account or knowing that TTM was designed for system
> > > > buffers
> > > > bound to GPU and that there were drivers actually doing that.
> > > > 
> > > > And there is still older code around clearly implying system
> > > > buffers
> > > > can be fenced, like ttm_bo_swapout(), and that there is dma
> > > > fences
> > > > signaling completion on work that has never touched the GPU,
> > > > not to
> > > > mention async eviction where a bo may be evicted to system but
> > > > has
> > > > tons
> > > > of outstanding fenced work in the pipe.
> > > > 
> > > > So if there has been a design change WRT this I believe it
> > > > should
> > > > have
> > > > been brought up on dri-devel to have it properly discussed so
> > > > affected
> > > > drivers could agree on the different options.
> > > > 
> > > > Perhaps Christian can enlighten us here. Christian?
> > > There are multiple occasions where we assume that BOs in the
> > > system
> > > domain are not accessible by the GPU, swapout and teardown are
> > > just
> > > two
> > > examples.
> > > 
> > At swapout we *do* wait for idle after moving to system, It's
> > relying
> > on the swap_notifier to unbind. That's why the swap_notifier is
> > there,
> > so swapout is working perfectly fine.
> 
> You can of course define that BOs are not swapable or call 
> ttm_bo_validate() with a system domain placement and then make sure
> they 
> are swapped in manually, but that is extremely hacky and bad design.
> 
> As far as I know that's what i915 does, but that doesn't mean that
> this 
> is a good idea.

It might be that it was not a good idea, but this was the initial
design for TTM. 

> 
> Additional to that I've already noted that I think this swap_notify 
> callback is not a good idea either. We should rather have a separate 
> TTM_PL_SWAPPED domain for this so that drivers are cleanly informed 
> about the state change.
> 
> > > When Dave reorganized the buffer move code we also had to insert
> > > waits
> > > for moves to complete for anything which goes into the SYSTEM
> > > domain.
> > > 
> > > Apart from that it certainly makes sense to have that
> > > restriction.
> > > Memory which is accessed by the hardware and not directly
> > > evictable
> > > must
> > > be accounted differently.
> > Could you elaborate a bit on this? From a swapout point of view, it
> > looks to me like SYSTEM is treated just like TT by TTM? Or is the
> > accounting you mention something amdgpu-specific and more related
> > to
> > the amd GEM domains than to the TTM memory types?
> 
> No, that is something the Android people came up with to improve the 
> shrinker behavior.
> 
> > Note that TTM was never designed to deal with GPU binding, but to
> > provide a set of placements or memory-types where the memory can be
> > mapped by the CPU and bound by the GPU. TT was a special case
> > solely
> > because of the mappable apertures. A bind mechanism had to be
> > provided
> > for TTM to be able to map TT buffers, and most drivers used that
> > bound
> > mechanism for convenience.
> 
> Well that's certainly not correct. Before Dave moved this back into
> the 
> drivers TTM had bind/unbind callbacks for the translation tables.

Yes it had, and as discussed when Dave removed them, these were solely
intended to be used from TTM's point of view to keep track of when data
was in mappable apertures, so that TTM could point CPU mappings
correctly. 

Now most later drivers found that convenient and used those also to
bind other memory types, like vmwgfx does for GMR and MOB memory for
example, but that never meant it was a required use-pattern. Don't have
time to dig commit messages up but IIRC this was mentioned a number of
times during the years.

> 
> It's just that vmwgfx was an exception and all other drivers where
> using 
> that correctly. This vmwgfx exception is now what Zack is trying to
> fix 
> here.

While vmwgfx is binding page-tables to system it also used the bind /
unbind mechanisms for other memory types for convenience. Other drivers
most probably used copy-paste and wasn't aware of the feature.

> 
> > So now if this is going to be changed, I think we need to
> > understand
> > why and think this through really thoroughly:
> > 
> > * What is not working and why (the teardown seems to be a trivial
> > fix).
> > * How did we end up here,
> > * What's the cost of fixing that up compared to refactoring the
> > drivers
> > that rely on bindable system memory,
> > * What's the justification of a system type at all if it's not GPU-
> > bindable, meaning it's basically equivalent to swapped-out shmem
> > with
> > the exception that it's mappable?
> 
> Well, once more that isn't correct. This is nothing new and as far as
> I 
> know that behavior existing as long as TTM existed.

I'm not sure whats incorrect? I'm trying to explain what the initial
design was, and it may of course have been bad and the one you propose
a better one and if required we certainly need to fix i915 to align
with a new one.

What worries me though, that if you perceive the design differently and
change things in TTM according to that perception that breaks drivers
that rely on the initial design and then force drivers to change
claiming they are incorrect without a thorough discussion on dri-devel,
that's IMHO not good.

So I guess we don't have much choice other than to refactor i915 to
align to not gpu-bind system, but could we meanwhile at least fix that
takedown ordering while that's being worked on?

/Thomas



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-12  9:10             ` Thomas Hellström
@ 2021-10-12 17:34               ` Zack Rusin
  2021-10-13 12:50                 ` Daniel Vetter
  0 siblings, 1 reply; 18+ messages in thread
From: Zack Rusin @ 2021-10-12 17:34 UTC (permalink / raw)
  To: dri-devel, christian.koenig, thomas.hellstrom

On Tue, 2021-10-12 at 11:10 +0200, Thomas Hellström wrote:
> On Tue, 2021-10-12 at 10:27 +0200, Christian König wrote:
> > Am 11.10.21 um 14:04 schrieb Thomas Hellström:
> > 
> > > > 
> 
> > > So now if this is going to be changed, I think we need to
> > > understand
> > > why and think this through really thoroughly:
> > > 
> > > * What is not working and why (the teardown seems to be a trivial
> > > fix).
> > > * How did we end up here,
> > > * What's the cost of fixing that up compared to refactoring the
> > > drivers
> > > that rely on bindable system memory,
> > > * What's the justification of a system type at all if it's not
> > > GPU-
> > > bindable, meaning it's basically equivalent to swapped-out shmem
> > > with
> > > the exception that it's mappable?
> > 
> > Well, once more that isn't correct. This is nothing new and as far
> > as
> > I 
> > know that behavior existing as long as TTM existed.
> 
> I'm not sure whats incorrect? I'm trying to explain what the initial
> design was, and it may of course have been bad and the one you
> propose
> a better one and if required we certainly need to fix i915 to align
> with a new one.
> 
> What worries me though, that if you perceive the design differently
> and
> change things in TTM according to that perception that breaks drivers
> that rely on the initial design and then force drivers to change
> claiming they are incorrect without a thorough discussion on dri-
> devel,
> that's IMHO not good.

We should probably do that in a seperate thread so that this,
fundametally important, discussion is easier to find and reference in
the future. It looks like we're settling on a decision here so I'd
appreciate an Acked-by for the patch 4/5 just so it doesn't look like I
was making things up to someone looking at git history in the future.

It seems that in general TTM was designed to be able to handle an
amazing number of special/corner cases at a cost of complexity which
meant that over the years very few people understood it and the code
handling those cases sometimes broke. It sounds like Christian is now
trying to reign it in and make the code a lot more focused.

Working on other OS'es for the last few years, certainly made me
appreciate simple frameworks that move complexity towards drivers that
actually need them, e.g. it's of course anecdotal but I found wddm gpu
virtual addressing models (iommu/gpummu) a lot easier to grok.

On the flip side that does mean that vmwgfx and i915 need to redo some
code. For vmwgfx it's probably a net positive anyway as we've been
using TTM for, what is really nowadays, an integrated GPU so maybe it's
time for us to think about transition to gem.

z


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/vmwgfx: Introduce a new placement for MOB page tables
  2021-10-08 17:31 ` [PATCH 4/5] drm/vmwgfx: Introduce a new placement for MOB page tables Zack Rusin
@ 2021-10-12 18:57   ` Thomas Hellström
  2021-10-13  4:09     ` Zack Rusin
  0 siblings, 1 reply; 18+ messages in thread
From: Thomas Hellström @ 2021-10-12 18:57 UTC (permalink / raw)
  To: Zack Rusin, dri-devel; +Cc: Martin Krastev, Christian König


On 10/8/21 19:31, Zack Rusin wrote:
> For larger (bigger than a page) and noncontiguous mobs we have
> to create page tables that allow the host to find the memory.
> Those page tables just used regular system memory. Unfortunately
> in TTM those BO's are not allowed to be busy thus can't be
> fenced and we have to fence those bo's  because we don't want
> to destroy the page tables while the host is still executing
> the command buffers which might be accessing them.
>
> To solve it we introduce a new placement VMW_PL_SYSTEM which
> is very similar to TTM_PL_SYSTEM except that it allows
> fencing. This fixes kernel oops'es during unloading of the driver
> (and pci hot remove/add) which were caused by busy BO's in
> TTM_PL_SYSTEM being present in the delayed deletion list in
> TTM (TTM_PL_SYSTEM manager is destroyed before the delayed
> deletions are executed)
>
> Signed-off-by: Zack Rusin <zackr@vmware.com>
> Reviewed-by: Martin Krastev <krastevm@vmware.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>

In general looks good to me. Some suggestions below:


> ---
>   drivers/gpu/drm/vmwgfx/Makefile               |  2 +-
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.c           | 14 ++-
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.h           | 12 ++-
>   .../gpu/drm/vmwgfx/vmwgfx_system_manager.c    | 90 +++++++++++++++++++
>   drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c    | 58 ++++++------
>   5 files changed, 138 insertions(+), 38 deletions(-)
>   create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c
>
> diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
> index bc323f7d4032..0188a312c38c 100644
> --- a/drivers/gpu/drm/vmwgfx/Makefile
> +++ b/drivers/gpu/drm/vmwgfx/Makefile
> @@ -9,7 +9,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
>   	    vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
>   	    vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
>   	    vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
> -            vmwgfx_devcaps.o ttm_object.o ttm_memory.o
> +	    vmwgfx_devcaps.o ttm_object.o ttm_memory.o vmwgfx_system_manager.o
>   
>   vmwgfx-$(CONFIG_DRM_FBDEV_EMULATION) += vmwgfx_fb.o
>   vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index 8d0b083ba267..daf65615308a 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1071,6 +1071,12 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
>   				 "3D will be disabled.\n");
>   			dev_priv->has_mob = false;
>   		}
> +		if (vmw_sys_man_init(dev_priv) != 0) {
> +			drm_info(&dev_priv->drm,
> +				 "No MOB page table memory available. "
> +				 "3D will be disabled.\n");
> +			dev_priv->has_mob = false;
> +		}
>   	}
>   
>   	if (dev_priv->has_mob && (dev_priv->capabilities & SVGA_CAP_DX)) {
> @@ -1121,8 +1127,10 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
>   	vmw_overlay_close(dev_priv);
>   	vmw_kms_close(dev_priv);
>   out_no_kms:
> -	if (dev_priv->has_mob)
> +	if (dev_priv->has_mob) {
>   		vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB);
> +		vmw_sys_man_fini(dev_priv);
> +	}
>   	if (dev_priv->has_gmr)
>   		vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR);
>   	vmw_devcaps_destroy(dev_priv);
> @@ -1172,8 +1180,10 @@ static void vmw_driver_unload(struct drm_device *dev)
>   		vmw_gmrid_man_fini(dev_priv, VMW_PL_GMR);
>   
>   	vmw_release_device_early(dev_priv);
> -	if (dev_priv->has_mob)
> +	if (dev_priv->has_mob) {
>   		vmw_gmrid_man_fini(dev_priv, VMW_PL_MOB);
> +		vmw_sys_man_fini(dev_priv);
> +	}
>   	vmw_devcaps_destroy(dev_priv);
>   	vmw_vram_manager_fini(dev_priv);
>   	ttm_device_fini(&dev_priv->bdev);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> index a833751099b5..df19dfb3ce18 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
> @@ -82,8 +82,9 @@
>   			VMWGFX_NUM_GB_SURFACE +\
>   			VMWGFX_NUM_GB_SCREEN_TARGET)
>   
> -#define VMW_PL_GMR (TTM_PL_PRIV + 0)
> -#define VMW_PL_MOB (TTM_PL_PRIV + 1)
> +#define VMW_PL_GMR      (TTM_PL_PRIV + 0)
> +#define VMW_PL_MOB      (TTM_PL_PRIV + 1)
> +#define VMW_PL_SYSTEM   (TTM_PL_PRIV + 2)
>   
>   #define VMW_RES_CONTEXT ttm_driver_type0
>   #define VMW_RES_SURFACE ttm_driver_type1
> @@ -1039,7 +1040,6 @@ extern struct ttm_placement vmw_vram_placement;
>   extern struct ttm_placement vmw_vram_sys_placement;
>   extern struct ttm_placement vmw_vram_gmr_placement;
>   extern struct ttm_placement vmw_sys_placement;
> -extern struct ttm_placement vmw_evictable_placement;
>   extern struct ttm_placement vmw_srf_placement;
>   extern struct ttm_placement vmw_mob_placement;
>   extern struct ttm_placement vmw_nonfixed_placement;
> @@ -1251,6 +1251,12 @@ int vmw_overlay_num_free_overlays(struct vmw_private *dev_priv);
>   int vmw_gmrid_man_init(struct vmw_private *dev_priv, int type);
>   void vmw_gmrid_man_fini(struct vmw_private *dev_priv, int type);
>   
> +/**
> + * System memory manager
> + */
> +int vmw_sys_man_init(struct vmw_private *dev_priv);
> +void vmw_sys_man_fini(struct vmw_private *dev_priv);
> +
>   /**
>    * Prime - vmwgfx_prime.c
>    */
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c
> new file mode 100644
> index 000000000000..2b86e2d8aefe
> --- /dev/null
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_system_manager.c
> @@ -0,0 +1,90 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/*
> + * Copyright 2021 VMware, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> + * obtaining a copy of this software and associated documentation
> + * files (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use, copy,
> + * modify, merge, publish, distribute, sublicense, and/or sell copies
> + * of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> + * included in all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +#include "vmwgfx_drv.h"
> +
> +#include <drm/ttm/ttm_bo_driver.h>
> +#include <drm/ttm/ttm_device.h>
> +#include <drm/ttm/ttm_placement.h>
> +#include <drm/ttm/ttm_resource.h>
> +#include <linux/slab.h>
> +
> +
> +static int vmw_sys_man_alloc(struct ttm_resource_manager *man,
> +			     struct ttm_buffer_object *bo,
> +			     const struct ttm_place *place,
> +			     struct ttm_resource **res)
> +{
> +	*res = kzalloc(sizeof(**res), GFP_KERNEL);
> +	if (!*res)
> +		return -ENOMEM;
> +
> +	ttm_resource_init(bo, place, *res);
> +	return 0;
> +}
> +
> +static void vmw_sys_man_free(struct ttm_resource_manager *man,
> +			     struct ttm_resource *res)
> +{
> +	kfree(res);
> +}
> +
> +static const struct ttm_resource_manager_func vmw_sys_manager_func = {
> +	.alloc = vmw_sys_man_alloc,
> +	.free = vmw_sys_man_free,
> +};
> +
> +int vmw_sys_man_init(struct vmw_private *dev_priv)
> +{
> +	struct ttm_device *bdev = &dev_priv->bdev;
> +	struct ttm_resource_manager *man =
> +			kzalloc(sizeof(*man), GFP_KERNEL);
> +
> +	if (unlikely(!man))
> +		return -ENOMEM;
Nit: Branch prediction hints are typically not used in driver code. And 
here the potential benefit indeed appears small :)
> +
> +	man->use_tt = true;
> +	man->func = &vmw_sys_manager_func;
> +
> +	ttm_resource_manager_init(man, 0);
> +	ttm_set_driver_manager(bdev, VMW_PL_SYSTEM, man);
> +	ttm_resource_manager_set_used(man, true);
> +	return 0;
> +}
> +
> +void vmw_sys_man_fini(struct vmw_private *dev_priv)
> +{
> +	struct ttm_resource_manager *man = ttm_manager_type(&dev_priv->bdev,
> +							    VMW_PL_SYSTEM);
> +
> +	ttm_resource_manager_evict_all(&dev_priv->bdev, man);
> +
> +	ttm_resource_manager_set_used(man, false);
> +	ttm_resource_manager_cleanup(man);
> +
> +	ttm_set_driver_manager(&dev_priv->bdev, VMW_PL_SYSTEM, NULL);
> +	kfree(man);
> +}

I seem to recognize the general pattern here from the ttm_sys_manager, 
Any chance we could add what's needed to the ttm_sys_manager and make 
the code reusable? That's the _fini function and the memory type choice 
I guess. I figure i915 will need exactly the same.

/Thomas



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] drm/vmwgfx: Introduce a new placement for MOB page tables
  2021-10-12 18:57   ` Thomas Hellström
@ 2021-10-13  4:09     ` Zack Rusin
  0 siblings, 0 replies; 18+ messages in thread
From: Zack Rusin @ 2021-10-13  4:09 UTC (permalink / raw)
  To: dri-devel, thomas.hellstrom; +Cc: christian.koenig, Martin Krastev

On Tue, 2021-10-12 at 20:57 +0200, Thomas Hellström wrote:
> > +void vmw_sys_man_fini(struct vmw_private *dev_priv)
> > +{
> > +       struct ttm_resource_manager *man =
> > ttm_manager_type(&dev_priv->bdev,
> > +                                                          
> > VMW_PL_SYSTEM);
> > +
> > +       ttm_resource_manager_evict_all(&dev_priv->bdev, man);
> > +
> > +       ttm_resource_manager_set_used(man, false);
> > +       ttm_resource_manager_cleanup(man);
> > +
> > +       ttm_set_driver_manager(&dev_priv->bdev, VMW_PL_SYSTEM,
> > NULL);
> > +       kfree(man);
> > +}
> 
> I seem to recognize the general pattern here from the
> ttm_sys_manager, 
> Any chance we could add what's needed to the ttm_sys_manager and make
> the code reusable? That's the _fini function and the memory type
> choice 
> I guess. I figure i915 will need exactly the same.

I think technically we could share this entire thing. I'm not sure how
many GPU specific features one needs for "system memory placement but
allowing fencing".

As to sharing just the fini, I'd be happy to share any code we can but
it'd be probably be only between vmwgfx and i915 because the default
sys_man doesn't need the evict_all and it's allocated as part of its
parent struct so it doesn't need to free. We could trivially add void
ttm_fencable_sys_man_fini(struct ttm_device *dev, u32 mem_type) but I'd
probably wait until i915 is ready to use it to avoid adding shared code
that's only used by a single driver :)

z

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-12 17:34               ` Zack Rusin
@ 2021-10-13 12:50                 ` Daniel Vetter
  2021-10-13 14:56                   ` Zack Rusin
  0 siblings, 1 reply; 18+ messages in thread
From: Daniel Vetter @ 2021-10-13 12:50 UTC (permalink / raw)
  To: Zack Rusin; +Cc: dri-devel, christian.koenig, thomas.hellstrom

On Tue, Oct 12, 2021 at 05:34:50PM +0000, Zack Rusin wrote:
> On Tue, 2021-10-12 at 11:10 +0200, Thomas Hellström wrote:
> > On Tue, 2021-10-12 at 10:27 +0200, Christian König wrote:
> > > Am 11.10.21 um 14:04 schrieb Thomas Hellström:
> > > 
> > > > > 
> > 
> > > > So now if this is going to be changed, I think we need to
> > > > understand
> > > > why and think this through really thoroughly:
> > > > 
> > > > * What is not working and why (the teardown seems to be a trivial
> > > > fix).
> > > > * How did we end up here,
> > > > * What's the cost of fixing that up compared to refactoring the
> > > > drivers
> > > > that rely on bindable system memory,
> > > > * What's the justification of a system type at all if it's not
> > > > GPU-
> > > > bindable, meaning it's basically equivalent to swapped-out shmem
> > > > with
> > > > the exception that it's mappable?
> > > 
> > > Well, once more that isn't correct. This is nothing new and as far
> > > as
> > > I 
> > > know that behavior existing as long as TTM existed.
> > 
> > I'm not sure whats incorrect? I'm trying to explain what the initial
> > design was, and it may of course have been bad and the one you
> > propose
> > a better one and if required we certainly need to fix i915 to align
> > with a new one.
> > 
> > What worries me though, that if you perceive the design differently
> > and
> > change things in TTM according to that perception that breaks drivers
> > that rely on the initial design and then force drivers to change
> > claiming they are incorrect without a thorough discussion on dri-
> > devel,
> > that's IMHO not good.
> 
> We should probably do that in a seperate thread so that this,
> fundametally important, discussion is easier to find and reference in
> the future. It looks like we're settling on a decision here so I'd
> appreciate an Acked-by for the patch 4/5 just so it doesn't look like I
> was making things up to someone looking at git history in the future.

Jumping in sidesways and late, and also without real context on the
decision itself:

The way to properly formalize this is
- type a kerneldoc patch which writes down the rules we agree on, whether
  that's uapi, or internal helper api like for ttm, or on types or
  whatever
- get acks from everyone who participated + everyone who might care
- merge it

> It seems that in general TTM was designed to be able to handle an
> amazing number of special/corner cases at a cost of complexity which
> meant that over the years very few people understood it and the code
> handling those cases sometimes broke. It sounds like Christian is now
> trying to reign it in and make the code a lot more focused.
> 
> Working on other OS'es for the last few years, certainly made me
> appreciate simple frameworks that move complexity towards drivers that
> actually need them, e.g. it's of course anecdotal but I found wddm gpu
> virtual addressing models (iommu/gpummu) a lot easier to grok.
> 
> On the flip side that does mean that vmwgfx and i915 need to redo some
> code. For vmwgfx it's probably a net positive anyway as we've been
> using TTM for, what is really nowadays, an integrated GPU so maybe it's
> time for us to think about transition to gem.

Aside, but we're looking at adopting ttm for integrated gpu too. The
execbuf utils and dynamic memory management helpers for pure gem just
aren't quite there yet, and improving ttm a bit in this area looks
reasonable (like adding a unified memory aware shrinker like we have in
i915-gem).

Also I thought vmwgfx is using ttm to also manage some id spaces, you'd
have to hand-roll that.

Anyway entirely orthogonal.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug
  2021-10-13 12:50                 ` Daniel Vetter
@ 2021-10-13 14:56                   ` Zack Rusin
  0 siblings, 0 replies; 18+ messages in thread
From: Zack Rusin @ 2021-10-13 14:56 UTC (permalink / raw)
  To: daniel; +Cc: dri-devel, christian.koenig, thomas.hellstrom

On Wed, 2021-10-13 at 14:50 +0200, Daniel Vetter wrote:
> On Tue, Oct 12, 2021 at 05:34:50PM +0000, Zack Rusin wrote:
> 
> > On the flip side that does mean that vmwgfx and i915 need to redo
> > some
> > code. For vmwgfx it's probably a net positive anyway as we've been
> > using TTM for, what is really nowadays, an integrated GPU so maybe
> > it's
> > time for us to think about transition to gem.
> 
> Aside, but we're looking at adopting ttm for integrated gpu too. The
> execbuf utils and dynamic memory management helpers for pure gem just
> aren't quite there yet, and improving ttm a bit in this area looks
> reasonable (like adding a unified memory aware shrinker like we have
> in
> i915-gem).
> 

That would certainly be a big help. The situation that I want to avoid
is having vmwgfx using TTM for what no other driver is using it for.


> Also I thought vmwgfx is using ttm to also manage some id spaces,
> you'd
> have to hand-roll that.

Yes, it's work either way, it's likely less code with GEM but we'd lose
support for 3D on older hardware where our device did have dedicated
VRAM. 

Nowadays memory management in our device is rather trivial: every GPU
object is just kernel virtual memory. To allow our virtual device to
write into that memory we send it an identifier to be able to name the
object (we use id's but it could be just the kernel virtual address as
integer) plus a page table because, of course, vmx can't read guest
kernel's page tables so we need to map the kernel virtual address space
to physical addresses so that the host can write into them. So mm in
vmwgfx shouldn't require performance enhancing drugs to understand and
drug usage while writing vmwgfx code should remain purely recreational
;)

z


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2021-10-13 14:56 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-08 17:31 [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Zack Rusin
2021-10-08 17:31 ` [PATCH 1/5] drm/vmwgfx: Remove the deprecated lower mem limit Zack Rusin
2021-10-08 17:31 ` [PATCH 2/5] drm/vmwgfx: Release ttm memory if probe fails Zack Rusin
2021-10-08 17:31 ` [PATCH 3/5] drm/vmwgfx: Fail to initialize on broken configs Zack Rusin
2021-10-08 17:31 ` [PATCH 4/5] drm/vmwgfx: Introduce a new placement for MOB page tables Zack Rusin
2021-10-12 18:57   ` Thomas Hellström
2021-10-13  4:09     ` Zack Rusin
2021-10-08 17:31 ` [PATCH 5/5] drm/vmwgfx: Switch the internal BO's to ttm_bo_type_kernel Zack Rusin
2021-10-08 20:28 ` [PATCH 0/5] drm/vmwgfx: Support module unload and hotunplug Thomas Hellström
2021-10-08 20:40   ` Zack Rusin
2021-10-08 21:13     ` Thomas Hellström
2021-10-11  8:17       ` Christian König
2021-10-11 12:04         ` Thomas Hellström
2021-10-12  8:27           ` Christian König
2021-10-12  9:10             ` Thomas Hellström
2021-10-12 17:34               ` Zack Rusin
2021-10-13 12:50                 ` Daniel Vetter
2021-10-13 14:56                   ` Zack Rusin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.