Explicit fencing on multiple timelines, again

All of lore.kernel.org
 help / color / mirror / Atom feed

* Explicit fencing on multiple timelines, again
@ 2016-10-07  9:45 Chris Wilson
  2016-10-07  9:45 ` [PATCH 01/42] drm/i915: Allow disabling error capture Chris Wilson
                   ` (44 more replies)
  0 siblings, 45 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:45 UTC (permalink / raw)
  To: intel-gfx

After a hiatus to fix up some fallout from the reset work, here we are again.
Biggest features needing review are the changes to memory management to
allow deferred freeing to prevent recursion whilst unbinding the active
object, and the interaction with atomic modesets.
-Chris

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* [PATCH 01/42] drm/i915: Allow disabling error capture
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
@ 2016-10-07  9:45 ` Chris Wilson
  2016-10-07  9:45 ` [PATCH 02/42] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
                   ` (43 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:45 UTC (permalink / raw)
  To: intel-gfx

We currently capture the GPU state after we detect a hang. This is vital
for us to both triage and debug hangs in the wild (post-mortem
debugging). However, it comes at the cost of running some potentially
dangerous code (since it has to make very few assumption about the state
of the driver) that is quite resource intensive.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig          | 10 ++++++++++
 drivers/gpu/drm/i915/i915_debugfs.c   |  6 ++++++
 drivers/gpu/drm/i915/i915_drv.h       | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c |  7 +++++++
 drivers/gpu/drm/i915/i915_params.c    |  9 +++++++++
 drivers/gpu/drm/i915/i915_params.h    |  1 +
 drivers/gpu/drm/i915/i915_sysfs.c     |  8 ++++++++
 drivers/gpu/drm/i915/intel_display.c  |  4 ++++
 drivers/gpu/drm/i915/intel_overlay.c  |  4 ++++
 9 files changed, 65 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..10a6ac11b6a9 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -46,6 +46,16 @@ config DRM_I915_PRELIMINARY_HW_SUPPORT
 
 	  If in doubt, say "N".
 
+config DRM_I915_CAPTURE_ERROR
+	bool "Enable capturing GPU state following a hang"
+	depends on DRM_I915
+	default y
+	help
+	  This option enables capturing the GPU state when a hang is detected.
+	  This information is vital for triaging hangs and assists in debugging.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_USERPTR
 	bool "Always enable userptr support"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 20689f1cd719..e4b5ba771bea 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -960,6 +960,8 @@ static int i915_hws_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static ssize_t
 i915_error_state_write(struct file *filp,
 		       const char __user *ubuf,
@@ -1042,6 +1044,8 @@ static const struct file_operations i915_error_state_fops = {
 	.release = i915_error_state_release,
 };
 
+#endif
+
 static int
 i915_next_seqno_get(void *data, u64 *val)
 {
@@ -5398,7 +5402,9 @@ static const struct i915_debugfs_files {
 	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
 	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
 	{"i915_error_state", &i915_error_state_fops},
+#endif
 	{"i915_next_seqno", &i915_next_seqno_fops},
 	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
 	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a219a3534750..e24f9ccf4467 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3544,6 +3544,8 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
 #endif
 
 /* i915_gpu_error.c */
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
 int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
@@ -3564,6 +3566,20 @@ void i915_error_state_get(struct drm_device *dev,
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
 void i915_destroy_error_state(struct drm_device *dev);
 
+#else
+
+static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
+					    u32 engine_mask,
+					    const char *error_msg)
+{
+}
+
+static inline void i915_destroy_error_state(struct drm_device *dev)
+{
+}
+
+#endif
+
 void i915_get_engine_instdone(struct drm_i915_private *dev_priv,
 			      enum intel_engine_id engine_id,
 			      struct intel_instdone *instdone);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b5b58692ac5a..9b395ffa3b6a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -30,6 +30,8 @@
 #include <generated/utsrelease.h>
 #include "i915_drv.h"
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static const char *engine_str(int engine)
 {
 	switch (engine) {
@@ -1464,6 +1466,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 	struct drm_i915_error_state *error;
 	unsigned long flags;
 
+	if (!i915.error_capture)
+		return;
+
 	if (READ_ONCE(dev_priv->gpu_error.first_error))
 		return;
 
@@ -1549,6 +1554,8 @@ void i915_destroy_error_state(struct drm_device *dev)
 		kref_put(&error->ref, i915_error_state_free);
 }
 
+#endif
+
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
 {
 	switch (type) {
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 768ad89d9cd4..e72a41223535 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -47,6 +47,7 @@ struct i915_params i915 __read_mostly = {
 	.load_detect_test = 0,
 	.force_reset_modeset_test = 0,
 	.reset = true,
+	.error_capture = true,
 	.invert_brightness = 0,
 	.disable_display = 0,
 	.enable_cmd_parser = 1,
@@ -115,6 +116,14 @@ MODULE_PARM_DESC(vbt_sdvo_panel_type,
 module_param_named_unsafe(reset, i915.reset, bool, 0600);
 MODULE_PARM_DESC(reset, "Attempt GPU resets (default: true)");
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+module_param_named(error_capture, i915.error_capture, bool, 0600);
+MODULE_PARM_DESC(error_capture,
+	"Record the GPU state following a hang. "
+	"This information in /sys/class/drm/card<N>/error is vital for "
+	"triaging and debugging hangs.");
+#endif
+
 module_param_named_unsafe(enable_hangcheck, i915.enable_hangcheck, bool, 0644);
 MODULE_PARM_DESC(enable_hangcheck,
 	"Periodically check GPU activity for detecting hangs. "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 3a0dd78ddb38..94efc899c1ef 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -59,6 +59,7 @@ struct i915_params {
 	bool load_detect_test;
 	bool force_reset_modeset_test;
 	bool reset;
+	bool error_capture;
 	bool disable_display;
 	bool verbose_state_checks;
 	bool nuclear_pageflip;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 1012eeea1324..c9b71f676a57 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -514,6 +514,8 @@ static const struct attribute *vlv_attrs[] = {
 	NULL,
 };
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr, char *buf,
 				loff_t off, size_t count)
@@ -571,6 +573,8 @@ static struct bin_attribute error_state_attr = {
 	.write = error_state_write,
 };
 
+#endif
+
 void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
@@ -617,17 +621,21 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 	if (ret)
 		DRM_ERROR("RPS sysfs setup failed\n");
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
 	ret = sysfs_create_bin_file(&kdev->kobj,
 				    &error_state_attr);
 	if (ret)
 		DRM_ERROR("error_state sysfs setup failed\n");
+#endif
 }
 
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
 {
 	struct device *kdev = dev_priv->drm.primary->kdev;
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
 	sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
+#endif
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		sysfs_remove_files(&kdev->kobj, vlv_attrs);
 	else
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a366656bcec5..4b3d315c5254 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -17096,6 +17096,8 @@ int intel_modeset_vga_set_state(struct drm_device *dev, bool state)
 	return 0;
 }
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 struct intel_display_error_state {
 
 	u32 power_well_driver;
@@ -17278,3 +17280,5 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
 	}
 }
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a24bc8c7889f..7c392547711f 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1470,6 +1470,8 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
 	kfree(dev_priv->overlay);
 }
 
+#ifdef CONFIG_DRM_I915_CAPTURE_ERROR
+
 struct intel_overlay_error_state {
 	struct overlay_registers regs;
 	unsigned long base;
@@ -1587,3 +1589,5 @@ intel_overlay_print_error_state(struct drm_i915_error_state_buf *m,
 	P(UVSCALEV);
 #undef P
 }
+
+#endif
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 02/42] drm/i915: Stop the machine whilst capturing the GPU crash dump
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
  2016-10-07  9:45 ` [PATCH 01/42] drm/i915: Allow disabling error capture Chris Wilson
@ 2016-10-07  9:45 ` Chris Wilson
  2016-10-07 10:11   ` Joonas Lahtinen
  2016-10-07  9:45 ` [PATCH 03/42] drm/i915: Always use the GTT for error capture Chris Wilson
                   ` (42 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:45 UTC (permalink / raw)
  To: intel-gfx

The error state is purposefully racy as we expect it to be called at any
time and so have avoided any locking whilst capturing the crash dump.
However, with multi-engine GPUs and multiple CPUs, those races can
manifest into OOPSes as we attempt to chase dangling pointers freed on
other CPUs. Under discussion are lots of ways to slow down normal
operation in order to protect the post-mortem error capture, but what it
we take the opposite approach and freeze the machine whilst the error
capture runs (note the GPU may still running, but as long as we don't
process any of the results the driver's bookkeeping will be static).

Note that by of itself, this is not a complete fix. It also depends on
the compiler barriers in list_add/list_del to prevent traversing the
lists into the void. We also depend that we only require state from
carefully controlled sources - i.e. all the state we require for
post-mortem debugging should be reachable from the request itself so
that we only have to worry about retrieving the request carefully. Once
we have the request, we know that all pointers from it are intact.

v2: Avoid drm_clflush_pages() inside stop_machine() as it may use
stop_machine() itself for its wbinvd fallback.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig          |  1 +
 drivers/gpu/drm/i915/i915_drv.h       |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 46 +++++++++++++++++++++--------------
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 10a6ac11b6a9..0f46a9c04c0e 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -4,6 +4,7 @@ config DRM_I915
 	depends on X86 && PCI
 	select INTEL_GTT
 	select INTERVAL_TREE
+	select STOP_MACHINE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e24f9ccf4467..303ffb7c882a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -746,6 +746,8 @@ struct drm_i915_error_state {
 	struct kref ref;
 	struct timeval time;
 
+	struct drm_i915_private *i915;
+
 	char error_msg[128];
 	bool simulated;
 	int iommu;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9b395ffa3b6a..69d4cffe4a32 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -28,6 +28,7 @@
  */
 
 #include <generated/utsrelease.h>
+#include <linux/stop_machine.h>
 #include "i915_drv.h"
 
 #ifdef CONFIG_DRM_I915_CAPTURE_ERROR
@@ -746,14 +747,12 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 
 	dst->page_count = num_pages;
 	while (num_pages--) {
-		unsigned long flags;
 		void *d;
 
 		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
 		if (d == NULL)
 			goto unwind;
 
-		local_irq_save(flags);
 		if (use_ggtt) {
 			void __iomem *s;
 
@@ -772,15 +771,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 
 			page = i915_gem_object_get_page(src, i);
 
-			drm_clflush_pages(&page, 1);
-
 			s = kmap_atomic(page);
 			memcpy(d, s, PAGE_SIZE);
 			kunmap_atomic(s);
-
-			drm_clflush_pages(&page, 1);
 		}
-		local_irq_restore(flags);
 
 		dst->pages[i++] = d;
 		reloc_offset += PAGE_SIZE;
@@ -1449,6 +1443,31 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
 	       sizeof(error->device_info));
 }
 
+static int capture(void *data)
+{
+	struct drm_i915_error_state *error = data;
+
+	/* Ensure that what we readback from memory matches what the GPU sees */
+	wbinvd();
+
+	i915_capture_gen_state(error->i915, error);
+	i915_capture_reg_state(error->i915, error);
+	i915_gem_record_fences(error->i915, error);
+	i915_gem_record_rings(error->i915, error);
+	i915_capture_active_buffers(error->i915, error);
+	i915_capture_pinned_buffers(error->i915, error);
+
+	do_gettimeofday(&error->time);
+
+	error->overlay = intel_overlay_capture_error_state(error->i915);
+	error->display = intel_display_capture_error_state(error->i915);
+
+	/* And make sure we don't leave trash in the CPU cache */
+	wbinvd();
+
+	return 0;
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -1480,18 +1499,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
 	}
 
 	kref_init(&error->ref);
+	error->i915 = dev_priv;
 
-	i915_capture_gen_state(dev_priv, error);
-	i915_capture_reg_state(dev_priv, error);
-	i915_gem_record_fences(dev_priv, error);
-	i915_gem_record_rings(dev_priv, error);
-	i915_capture_active_buffers(dev_priv, error);
-	i915_capture_pinned_buffers(dev_priv, error);
-
-	do_gettimeofday(&error->time);
-
-	error->overlay = intel_overlay_capture_error_state(dev_priv);
-	error->display = intel_display_capture_error_state(dev_priv);
+	stop_machine(capture, error, NULL);
 
 	i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
 	DRM_INFO("%s\n", error->error_msg);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 03/42] drm/i915: Always use the GTT for error capture
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
  2016-10-07  9:45 ` [PATCH 01/42] drm/i915: Allow disabling error capture Chris Wilson
  2016-10-07  9:45 ` [PATCH 02/42] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
@ 2016-10-07  9:45 ` Chris Wilson
  2016-10-07  9:45 ` [PATCH 04/42] drm/i915: Consolidate error object printing Chris Wilson
                   ` (41 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:45 UTC (permalink / raw)
  To: intel-gfx

Since the GTT provides universal access to any GPU page, we can use it
to reduce our plethora of read methods to just one. It also has the
important characteristic of being exactly what the GPU sees - if there
are incoherency problems, seeing the batch as executed (rather than as
trapped inside the cpu cache) is important.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  43 ++++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 120 ++++++++++++----------------------
 3 files changed, 74 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0bb4232f66bc..2d846aa39ca5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2717,6 +2717,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	 */
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	unsigned long hole_start, hole_end;
+	struct i915_hw_ppgtt *ppgtt;
 	struct drm_mm_node *entry;
 	int ret;
 
@@ -2724,6 +2725,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
+	/* Reserve a mappable slot for our lockless error capture */
+	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
+						  &ggtt->error_capture,
+						  4096, 0, -1,
+						  0, ggtt->mappable_end,
+						  0, 0);
+	if (ret)
+		return ret;
+
 	/* Clear any non-preallocated blocks */
 	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2738,25 +2748,21 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 			       true);
 
 	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
-		struct i915_hw_ppgtt *ppgtt;
-
 		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-		if (!ppgtt)
-			return -ENOMEM;
+		if (!ppgtt) {
+			ret = -ENOMEM;
+			goto err;
+		}
 
 		ret = __hw_ppgtt_init(ppgtt, dev_priv);
-		if (ret) {
-			kfree(ppgtt);
-			return ret;
-		}
+		if (ret)
+			goto err_ppgtt;
 
-		if (ppgtt->base.allocate_va_range)
+		if (ppgtt->base.allocate_va_range) {
 			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
 							    ppgtt->base.total);
-		if (ret) {
-			ppgtt->base.cleanup(&ppgtt->base);
-			kfree(ppgtt);
-			return ret;
+			if (ret)
+				goto err_ppgtt_cleanup;
 		}
 
 		ppgtt->base.clear_range(&ppgtt->base,
@@ -2770,6 +2776,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	}
 
 	return 0;
+
+err_ppgtt_cleanup:
+	ppgtt->base.cleanup(&ppgtt->base);
+err_ppgtt:
+	kfree(ppgtt);
+err:
+	drm_mm_remove_node(&ggtt->error_capture);
+	return ret;
 }
 
 /**
@@ -2788,6 +2802,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 
 	i915_gem_cleanup_stolen(&dev_priv->drm);
 
+	if (drm_mm_node_allocated(&ggtt->error_capture))
+		drm_mm_remove_node(&ggtt->error_capture);
+
 	if (drm_mm_initialized(&ggtt->base.mm)) {
 		intel_vgt_deballoon(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index ec78be2f8c77..bd93fb8f99d2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -450,6 +450,8 @@ struct i915_ggtt {
 	bool do_idle_maps;
 
 	int mtrr;
+
+	struct drm_mm_node error_capture;
 };
 
 struct i915_hw_ppgtt {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 69d4cffe4a32..be82eadac9bb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -658,7 +658,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
 		return;
 
 	for (page = 0; page < obj->page_count; page++)
-		kfree(obj->pages[page]);
+		free_page((unsigned long)obj->pages[page]);
 
 	kfree(obj);
 }
@@ -695,98 +695,69 @@ static void i915_error_state_free(struct kref *error_ref)
 	kfree(error);
 }
 
+static int compress_page(void *src, struct drm_i915_error_object *dst)
+{
+	unsigned long page;
+
+	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (!page)
+		return -ENOMEM;
+
+	dst->pages[dst->page_count++] = (void *)page;
+
+	memcpy((void *)page, src, PAGE_SIZE);
+	return 0;
+}
+
 static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *dev_priv,
+i915_error_object_create(struct drm_i915_private *i915,
 			 struct i915_vma *vma)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct drm_i915_gem_object *src;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	const u64 slot = ggtt->error_capture.start;
 	struct drm_i915_error_object *dst;
-	int num_pages;
-	bool use_ggtt;
-	int i = 0;
-	u64 reloc_offset;
+	unsigned long num_pages;
+	struct sgt_iter iter;
+	dma_addr_t dma;
 
 	if (!vma)
 		return NULL;
 
-	src = vma->obj;
-	if (!src->pages)
-		return NULL;
-
-	num_pages = src->base.size >> PAGE_SHIFT;
-
-	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
+	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
+	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
+		      GFP_ATOMIC | __GFP_NOWARN);
 	if (!dst)
 		return NULL;
 
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
+	dst->page_count = 0;
 
-	reloc_offset = dst->gtt_offset;
-	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
-		   (vma->flags & I915_VMA_GLOBAL_BIND) &&
-		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
-
-	/* Cannot access stolen address directly, try to use the aperture */
-	if (src->stolen) {
-		use_ggtt = true;
-
-		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
-			goto unwind;
-
-		reloc_offset = vma->node.start;
-		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
-			goto unwind;
-	}
+	for_each_sgt_dma(dma, iter, vma->pages) {
+		void __iomem *s;
+		int ret;
 
-	/* Cannot access snooped pages through the aperture */
-	if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
-	    !HAS_LLC(dev_priv))
-		goto unwind;
+		ggtt->base.insert_page(&ggtt->base, dma, slot,
+				       I915_CACHE_NONE, 0);
 
-	dst->page_count = num_pages;
-	while (num_pages--) {
-		void *d;
+		s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
+		ret = compress_page((void * __force)s, dst);
+		io_mapping_unmap_atomic(s);
 
-		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
-		if (d == NULL)
+		if (ret)
 			goto unwind;
-
-		if (use_ggtt) {
-			void __iomem *s;
-
-			/* Simply ignore tiling or any overlapping fence.
-			 * It's part of the error state, and this hopefully
-			 * captures what the GPU read.
-			 */
-
-			s = io_mapping_map_atomic_wc(&ggtt->mappable,
-						     reloc_offset);
-			memcpy_fromio(d, s, PAGE_SIZE);
-			io_mapping_unmap_atomic(s);
-		} else {
-			struct page *page;
-			void *s;
-
-			page = i915_gem_object_get_page(src, i);
-
-			s = kmap_atomic(page);
-			memcpy(d, s, PAGE_SIZE);
-			kunmap_atomic(s);
-		}
-
-		dst->pages[i++] = d;
-		reloc_offset += PAGE_SIZE;
 	}
-
-	return dst;
+	goto out;
 
 unwind:
-	while (i--)
-		kfree(dst->pages[i]);
+	while (dst->page_count--)
+		free_page((unsigned long)dst->pages[dst->page_count]);
 	kfree(dst);
-	return NULL;
+	dst = NULL;
+
+out:
+	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
+	return dst;
 }
 
 /* The error capture is special as tries to run underneath the normal
@@ -1447,9 +1418,6 @@ static int capture(void *data)
 {
 	struct drm_i915_error_state *error = data;
 
-	/* Ensure that what we readback from memory matches what the GPU sees */
-	wbinvd();
-
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);
@@ -1462,9 +1430,6 @@ static int capture(void *data)
 	error->overlay = intel_overlay_capture_error_state(error->i915);
 	error->display = intel_display_capture_error_state(error->i915);
 
-	/* And make sure we don't leave trash in the CPU cache */
-	wbinvd();
-
 	return 0;
 }
 
@@ -1541,7 +1506,6 @@ void i915_error_state_get(struct drm_device *dev,
 	if (error_priv->error)
 		kref_get(&error_priv->error->ref);
 	spin_unlock_irq(&dev_priv->gpu_error.lock);
-
 }
 
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 04/42] drm/i915: Consolidate error object printing
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (2 preceding siblings ...)
  2016-10-07  9:45 ` [PATCH 03/42] drm/i915: Always use the GTT for error capture Chris Wilson
@ 2016-10-07  9:45 ` Chris Wilson
  2016-10-07  9:45 ` [PATCH 05/42] drm/i915: Compress GPU objects in error state Chris Wilson
                   ` (40 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:45 UTC (permalink / raw)
  To: intel-gfx

Leave all the pretty printing to userspace and simplify the error
capture to only have a single common object printer. It makes the kernel
code more compact, and the refactoring allows us to apply more complex
transformations like compressing the output.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 100 +++++++++-------------------------
 1 file changed, 25 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index be82eadac9bb..bf2498297341 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -343,10 +343,22 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 }
 
 static void print_error_obj(struct drm_i915_error_state_buf *m,
+			    struct intel_engine_cs *engine,
+			    const char *name,
 			    struct drm_i915_error_object *obj)
 {
 	int page, offset, elt;
 
+	if (!obj)
+		return;
+
+	if (name) {
+		err_printf(m, "%s --- %s = 0x%08x %08x\n",
+			   engine ? engine->name : "global", name,
+			   upper_32_bits(obj->gtt_offset),
+			   lower_32_bits(obj->gtt_offset));
+	}
+
 	for (page = offset = 0; page < obj->page_count; page++) {
 		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
 			err_printf(m, "%08x :  %08x\n", offset,
@@ -372,8 +384,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct drm_i915_error_state *error = error_priv->error;
 	struct drm_i915_error_object *obj;
-	int i, j, offset, elt;
 	int max_hangcheck_score;
+	int i, j;
 
 	if (!error) {
 		err_printf(m, "no error state collected\n");
@@ -493,15 +505,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
 				   upper_32_bits(obj->gtt_offset),
 				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
-		}
-
-		obj = ee->wa_batchbuffer;
-		if (obj) {
-			err_printf(m, "%s (w/a) --- gtt_offset = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
+			print_error_obj(m, &dev_priv->engine[i], NULL, obj);
 		}
 
 		if (ee->num_requests) {
@@ -533,77 +537,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
-		if ((obj = ee->ringbuffer)) {
-			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"ringbuffer", ee->ringbuffer);
 
-		if ((obj = ee->hws_page)) {
-			u64 hws_offset = obj->gtt_offset;
-			u32 *hws_page = &obj->pages[0][0];
+		print_error_obj(m, &dev_priv->engine[i],
+				"HW Status", ee->hws_page);
 
-			if (i915.enable_execlists) {
-				hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
-				hws_page = &obj->pages[LRC_PPHWSP_PN][0];
-			}
-			err_printf(m, "%s --- HW Status = 0x%08llx\n",
-				   dev_priv->engine[i].name, hws_offset);
-			offset = 0;
-			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   hws_page[elt],
-					   hws_page[elt+1],
-					   hws_page[elt+2],
-					   hws_page[elt+3]);
-				offset += 16;
-			}
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"HW context", ee->ctx);
 
-		obj = ee->wa_ctx;
-		if (obj) {
-			u64 wa_ctx_offset = obj->gtt_offset;
-			u32 *wa_ctx_page = &obj->pages[0][0];
-			struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-			u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
-					   engine->wa_ctx.per_ctx.size);
-
-			err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
-				   dev_priv->engine[i].name, wa_ctx_offset);
-			offset = 0;
-			for (elt = 0; elt < wa_ctx_size; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   wa_ctx_page[elt + 0],
-					   wa_ctx_page[elt + 1],
-					   wa_ctx_page[elt + 2],
-					   wa_ctx_page[elt + 3]);
-				offset += 16;
-			}
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"WA context", ee->wa_ctx);
 
-		if ((obj = ee->ctx)) {
-			err_printf(m, "%s --- HW Context = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
-			print_error_obj(m, obj);
-		}
+		print_error_obj(m, &dev_priv->engine[i],
+				"WA batchbuffer", ee->wa_batchbuffer);
 	}
 
-	if ((obj = error->semaphore)) {
-		err_printf(m, "Semaphore page = 0x%08x\n",
-			   lower_32_bits(obj->gtt_offset));
-		for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-			err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-				   elt * 4,
-				   obj->pages[0][elt],
-				   obj->pages[0][elt+1],
-				   obj->pages[0][elt+2],
-				   obj->pages[0][elt+3]);
-		}
-	}
+	print_error_obj(m, NULL, "Semaphores", error->semaphore);
 
 	if (error->overlay)
 		intel_overlay_print_error_state(m, error->overlay);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 05/42] drm/i915: Compress GPU objects in error state
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (3 preceding siblings ...)
  2016-10-07  9:45 ` [PATCH 04/42] drm/i915: Consolidate error object printing Chris Wilson
@ 2016-10-07  9:45 ` Chris Wilson
  2016-10-07  9:45 ` [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
                   ` (39 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:45 UTC (permalink / raw)
  To: intel-gfx

Our error states are quickly growing, pinning kernel memory with them.
The majority of the space is taken up by the error objects. These
compress well using zlib and without decode are mostly meaningless, so
encoding them does not hinder quickly parsing the error state for
familiarity.

v2: Make the zlib dependency optional

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig          |  12 +++
 drivers/gpu/drm/i915/i915_drv.h       |   3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 176 ++++++++++++++++++++++++++++++----
 3 files changed, 169 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 0f46a9c04c0e..92ecced1bc8f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -57,6 +57,18 @@ config DRM_I915_CAPTURE_ERROR
 
 	  If in doubt, say "Y".
 
+config DRM_I915_COMPRESS_ERROR
+	bool "Compress GPU error state"
+	depends on DRM_I915_CAPTURE_ERROR
+	select ZLIB_DEFLATE
+	default y
+	help
+	  This option selects ZLIB_DEFLATE if it isn't already
+	  selected and causes any error state captured upon a GPU hang
+	  to be compressed using zlib.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_USERPTR
 	bool "Always enable userptr support"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 303ffb7c882a..54b1b931573c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -820,9 +820,10 @@ struct drm_i915_error_state {
 		struct intel_instdone instdone;
 
 		struct drm_i915_error_object {
-			int page_count;
 			u64 gtt_offset;
 			u64 gtt_size;
+			int page_count;
+			int unused;
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index bf2498297341..6c22be28ba01 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -29,6 +29,7 @@
 
 #include <generated/utsrelease.h>
 #include <linux/stop_machine.h>
+#include <linux/zlib.h>
 #include "i915_drv.h"
 
 #ifdef CONFIG_DRM_I915_CAPTURE_ERROR
@@ -175,6 +176,110 @@ static void i915_error_puts(struct drm_i915_error_state_buf *e,
 #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
 #define err_puts(e, s) i915_error_puts(e, s)
 
+#ifdef CONFIG_DRM_I915_COMPRESS_ERROR
+
+static bool compress_init(struct z_stream_s *zstream)
+{
+	memset(zstream, 0, sizeof(*zstream));
+
+	zstream->workspace =
+		kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
+			GFP_ATOMIC | __GFP_NOWARN);
+	if (!zstream->workspace)
+		return false;
+
+	if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
+		kfree(zstream->workspace);
+		return false;
+	}
+
+	return true;
+}
+
+static int compress_page(struct z_stream_s *zstream,
+			 void *src,
+			 struct drm_i915_error_object *dst)
+{
+	zstream->next_in = src;
+	zstream->avail_in = PAGE_SIZE;
+
+	do {
+		if (zstream->avail_out == 0) {
+			unsigned long page;
+
+			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+			if (!page)
+				return -ENOMEM;
+
+			dst->pages[dst->page_count++] = (void *)page;
+
+			zstream->next_out = (void *)page;
+			zstream->avail_out = PAGE_SIZE;
+		}
+
+		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+			return -EIO;
+	} while (zstream->avail_in);
+
+	/* Fallback to uncompressed if we increase size? */
+	if (0 && zstream->total_out > zstream->total_in)
+		return -E2BIG;
+
+	return 0;
+}
+
+static void compress_fini(struct z_stream_s *zstream,
+			  struct drm_i915_error_object *dst)
+{
+	if (dst) {
+		zlib_deflate(zstream, Z_FINISH);
+		dst->unused = zstream->avail_out;
+	}
+
+	zlib_deflateEnd(zstream);
+	kfree(zstream->workspace);
+}
+
+static void err_compression_marker(struct drm_i915_error_state_buf *m)
+{
+	err_puts(m, ":");
+}
+
+#else
+
+static bool compress_init(struct z_stream_s *zstream)
+{
+	return true;
+}
+
+static int compress_page(struct z_stream_s *zstream,
+			 void *src,
+			 struct drm_i915_error_object *dst)
+{
+	unsigned long page;
+
+	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+	if (!page)
+		return -ENOMEM;
+
+	dst->pages[dst->page_count++] =
+		memcpy((void *)page, src, PAGE_SIZE);
+
+	return 0;
+}
+
+static void compress_fini(struct z_stream_s *zstream,
+			  struct drm_i915_error_object *dst)
+{
+}
+
+static void err_compression_marker(struct drm_i915_error_state_buf *m)
+{
+	err_puts(m, "~");
+}
+
+#endif
+
 static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				const char *name,
 				struct drm_i915_error_buffer *err,
@@ -342,12 +447,36 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
+static int
+ascii85_encode_len(int len)
+{
+	return DIV_ROUND_UP(len, 4);
+}
+
+static bool
+ascii85_encode(u32 in, char *out)
+{
+	int i;
+
+	if (in == 0)
+		return false;
+
+	out[5] = '\0';
+	for (i = 5; i--; ) {
+		out[i] = '!' + in % 85;
+		in /= 85;
+	}
+
+	return true;
+}
+
 static void print_error_obj(struct drm_i915_error_state_buf *m,
 			    struct intel_engine_cs *engine,
 			    const char *name,
 			    struct drm_i915_error_object *obj)
 {
-	int page, offset, elt;
+	char out[6];
+	int page;
 
 	if (!obj)
 		return;
@@ -359,13 +488,23 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
 			   lower_32_bits(obj->gtt_offset));
 	}
 
-	for (page = offset = 0; page < obj->page_count; page++) {
-		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
-			err_printf(m, "%08x :  %08x\n", offset,
-				   obj->pages[page][elt]);
-			offset += 4;
+	err_compression_marker(m);
+	for (page = 0; page < obj->page_count; page++) {
+		int i, len;
+
+		len = PAGE_SIZE;
+		if (page == obj->page_count - 1)
+			len -= obj->unused;
+		len = ascii85_encode_len(len);
+
+		for (i = 0; i < len; i++) {
+			if (ascii85_encode(obj->pages[page][i], out))
+				err_puts(m, out);
+			else
+				err_puts(m, "z");
 		}
 	}
+	err_puts(m, "\n");
 }
 
 static void err_print_capabilities(struct drm_i915_error_state_buf *m,
@@ -645,20 +784,6 @@ static void i915_error_state_free(struct kref *error_ref)
 	kfree(error);
 }
 
-static int compress_page(void *src, struct drm_i915_error_object *dst)
-{
-	unsigned long page;
-
-	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
-	if (!page)
-		return -ENOMEM;
-
-	dst->pages[dst->page_count++] = (void *)page;
-
-	memcpy((void *)page, src, PAGE_SIZE);
-	return 0;
-}
-
 static struct drm_i915_error_object *
 i915_error_object_create(struct drm_i915_private *i915,
 			 struct i915_vma *vma)
@@ -666,6 +791,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	const u64 slot = ggtt->error_capture.start;
 	struct drm_i915_error_object *dst;
+	struct z_stream_s zstream;
 	unsigned long num_pages;
 	struct sgt_iter iter;
 	dma_addr_t dma;
@@ -674,6 +800,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 		return NULL;
 
 	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
+	num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
 	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
 		      GFP_ATOMIC | __GFP_NOWARN);
 	if (!dst)
@@ -682,6 +809,12 @@ i915_error_object_create(struct drm_i915_private *i915,
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
 	dst->page_count = 0;
+	dst->unused = 0;
+
+	if (!compress_init(&zstream)) {
+		kfree(dst);
+		return NULL;
+	}
 
 	for_each_sgt_dma(dma, iter, vma->pages) {
 		void __iomem *s;
@@ -691,7 +824,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 				       I915_CACHE_NONE, 0);
 
 		s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
-		ret = compress_page((void * __force)s, dst);
+		ret = compress_page(&zstream, (void  __force *)s, dst);
 		io_mapping_unmap_atomic(s);
 
 		if (ret)
@@ -706,6 +839,7 @@ unwind:
 	dst = NULL;
 
 out:
+	compress_fini(&zstream, dst);
 	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
 	return dst;
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (4 preceding siblings ...)
  2016-10-07  9:45 ` [PATCH 05/42] drm/i915: Compress GPU objects in error state Chris Wilson
@ 2016-10-07  9:45 ` Chris Wilson
  2016-10-07  9:56   ` Joonas Lahtinen
  2016-10-07 15:51   ` Tvrtko Ursulin
  2016-10-07  9:46 ` [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
                   ` (38 subsequent siblings)
  44 siblings, 2 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:45 UTC (permalink / raw)
  To: intel-gfx

We will need to wait on DMA completion (as signaled via struct fence)
before executing our i915_gem_request. Therefore we want to expose a
method for adding the await on the fence itself to the request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
 2 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8832f8ec1583..e1f7a32d4080 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/prefetch.h>
+#include <linux/fence-array.h>
 
 #include "i915_drv.h"
 
@@ -495,6 +496,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	return 0;
 }
 
+int
+i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+			     struct fence *fence)
+{
+	struct fence_array *array;
+	int ret;
+	int i;
+
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return 0;
+
+	if (fence_is_i915(fence))
+		return i915_gem_request_await_request(req, to_request(fence));
+
+	if (!fence_is_array(fence)) {
+		ret = i915_sw_fence_await_dma_fence(&req->submit,
+						    fence, 10*HZ,
+						    GFP_KERNEL);
+		return ret < 0 ? ret : 0;
+	}
+
+	array = to_fence_array(fence);
+	for (i = 0; i < array->num_fences; i++) {
+		struct fence *child = array->fences[i];
+
+		if (fence_is_i915(child))
+			ret = i915_gem_request_await_request(req,
+							     to_request(child));
+		else
+			ret = i915_sw_fence_await_dma_fence(&req->submit,
+							    child, 10*HZ,
+							    GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
 /**
  * i915_gem_request_await_object - set this request to (async) wait upon a bo
  *
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 974bd7bcc801..c85a3d82febf 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -214,6 +214,8 @@ int
 i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write);
+int i915_gem_request_await_fence(struct drm_i915_gem_request *req,
+				 struct fence *fence);
 
 void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (5 preceding siblings ...)
  2016-10-07  9:45 ` [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 16:10   ` Tvrtko Ursulin
  2016-10-07  9:46 ` [PATCH 08/42] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
                   ` (37 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

In forthcoming patches, we want to be able to dynamically allocate the
wait_queue_t used whilst awaiting. This is more convenient if we extend
the i915_sw_fence_await_sw_fence() to perform the allocation for us if
we pass in a gfp mask as an alternative than a preallocated struct.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_sw_fence.c | 40 ++++++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_sw_fence.h |  8 ++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 1e5cbc585ca2..dfb59dced0ce 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -13,6 +13,8 @@
 
 #include "i915_sw_fence.h"
 
+#define I915_SW_FENCE_FLAG_ALLOC BIT(0)
+
 static DEFINE_SPINLOCK(i915_sw_fence_lock);
 
 static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
@@ -135,6 +137,8 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *
 	list_del(&wq->task_list);
 	__i915_sw_fence_complete(wq->private, key);
 	i915_sw_fence_put(wq->private);
+	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
+		kfree(wq);
 	return 0;
 }
 
@@ -192,9 +196,9 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	return err;
 }
 
-int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
-				 struct i915_sw_fence *signaler,
-				 wait_queue_t *wq)
+static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+					  struct i915_sw_fence *signaler,
+					  wait_queue_t *wq, gfp_t gfp)
 {
 	unsigned long flags;
 	int pending;
@@ -206,8 +210,22 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
 		return -EINVAL;
 
+	pending = 0;
+	if (!wq) {
+		wq = kmalloc(sizeof(*wq), gfp);
+		if (!wq) {
+			if (!gfpflags_allow_blocking(gfp))
+				return -ENOMEM;
+
+			i915_sw_fence_wait(signaler);
+			return 0;
+		}
+
+		pending |= I915_SW_FENCE_FLAG_ALLOC;
+	}
+
 	INIT_LIST_HEAD(&wq->task_list);
-	wq->flags = 0;
+	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
 	wq->private = i915_sw_fence_get(fence);
 
@@ -226,6 +244,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	return pending;
 }
 
+int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
+				 struct i915_sw_fence *signaler,
+				 wait_queue_t *wq)
+{
+	return __i915_sw_fence_await_sw_fence(fence, signaler, wq, 0);
+}
+
+int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
+				     struct i915_sw_fence *signaler,
+				     gfp_t gfp)
+{
+	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
+}
+
 struct dma_fence_cb {
 	struct fence_cb base;
 	struct i915_sw_fence *fence;
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 373141602ca4..5861c69f24d4 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -46,6 +46,9 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence);
 int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 				 struct i915_sw_fence *after,
 				 wait_queue_t *wq);
+int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
+				     struct i915_sw_fence *after,
+				     gfp_t gfp);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 				  struct fence *dma,
 				  unsigned long timeout,
@@ -62,4 +65,9 @@ static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
 	return atomic_read(&fence->pending) < 0;
 }
 
+static inline void i915_sw_fence_wait(struct i915_sw_fence *fence)
+{
+	wait_event(fence->wait, i915_sw_fence_done(fence));
+}
+
 #endif /* _I915_SW_FENCE_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 08/42] drm/i915: Rearrange i915_wait_request() accounting with callers
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (6 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:58   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 09/42] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
                   ` (36 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Our low-level wait routine has evolved from our generic wait interface
that handled unlocked, RPS boosting, waits with time tracking. If we
push our GEM fence tracking to use reservation_objects (required for
handling multiple timelines), we lose the ability to pass the required
information down to i915_wait_request(). However, if we push the extra
functionality from i915_wait_request() to the individual callsites
(i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use
of those extras, we can both simplify our low level wait and prepare for
extending the GEM interface for use of reservation_objects.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |   7 +-
 drivers/gpu/drm/i915/i915_gem.c         | 301 +++++++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_gem_request.c | 119 ++-----------
 drivers/gpu/drm/i915/i915_gem_request.h |  32 ++--
 drivers/gpu/drm/i915/i915_gem_userptr.c |  12 +-
 drivers/gpu/drm/i915/intel_display.c    |  27 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c |  14 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |   3 +-
 8 files changed, 283 insertions(+), 232 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54b1b931573c..ee25e265416f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3301,9 +3301,10 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void i915_gem_resume(struct drm_device *dev);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
-int __must_check
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly);
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+			 unsigned int flags,
+			 long timeout,
+			 struct intel_rps_client *rps);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
 				  bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a89a88922448..7fa5cb764739 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -292,7 +292,12 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	 * must wait for all rendering to complete to the object (as unbinding
 	 * must anyway), and retire the requests.
 	 */
-	ret = i915_gem_object_wait_rendering(obj, false);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
 	if (ret)
 		return ret;
 
@@ -311,88 +316,171 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- * @obj: i915 gem object
- * @readonly: waiting for just read access or read-write access
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-			       bool readonly)
+static long
+i915_gem_object_wait_fence(struct fence *fence,
+			   unsigned int flags,
+			   long timeout,
+			   struct intel_rps_client *rps)
 {
-	struct reservation_object *resv;
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
+	struct drm_i915_gem_request *rq;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
 
-	if (!readonly) {
-		active = obj->last_read;
-		active_mask = i915_gem_object_get_active(obj);
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	if (!fence_is_i915(fence))
+		return fence_wait_timeout(fence,
+					  flags & I915_WAIT_INTERRUPTIBLE,
+					  timeout);
+
+	rq = to_request(fence);
+	if (i915_gem_request_completed(rq))
+		goto out;
+
+	/* This client is about to stall waiting for the GPU. In many cases
+	 * this is undesirable and limits the throughput of the system, as
+	 * many clients cannot continue processing user input/output whilst
+	 * blocked. RPS autotuning may take tens of milliseconds to respond
+	 * to the GPU load and thus incurs additional latency for the client.
+	 * We can circumvent that by promoting the GPU frequency to maximum
+	 * before we wait. This makes the GPU throttle up much more quickly
+	 * (good for benchmarks and user experience, e.g. window animations),
+	 * but at a cost of spending more power processing the workload
+	 * (bad for battery). Not all clients even want their results
+	 * immediately and for them we should just let the GPU select its own
+	 * frequency to maximise efficiency. To prevent a single client from
+	 * forcing the clocks too high for the whole system, we only allow
+	 * each client to waitboost once in a busy period.
+	 */
+	if (rps) {
+		if (INTEL_GEN(rq->i915) >= 6)
+			gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
+		else
+			rps = NULL;
 	}
 
-	for_each_active(active_mask, idx) {
+	timeout = i915_wait_request(rq, flags, timeout);
+
+out:
+	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
+		i915_gem_request_retire_upto(rq);
+
+	if (rps && rq->fence.seqno == rq->engine->last_submitted_seqno) {
+		/* The GPU is now idle and this client has stalled.
+		 * Since no other client has submitted a request in the
+		 * meantime, assume that this client is the only one
+		 * supplying work to the GPU but is unable to keep that
+		 * work supplied because it is waiting. Since the GPU is
+		 * then never kept fully busy, RPS autoclocking will
+		 * keep the clocks relatively low, causing further delays.
+		 * Compensate by giving the synchronous client credit for
+		 * a waitboost next time.
+		 */
+		spin_lock(&rq->i915->rps.client_lock);
+		list_del_init(&rps->link);
+		spin_unlock(&rq->i915->rps.client_lock);
+	}
+
+	return timeout;
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+				 unsigned int flags,
+				 long timeout,
+				 struct intel_rps_client *rps)
+{
+	struct fence *excl;
+
+	if (flags & I915_WAIT_ALL) {
+		struct fence **shared;
+		unsigned int count, i;
 		int ret;
 
-		ret = i915_gem_active_wait(&active[idx],
-					   &obj->base.dev->struct_mutex);
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
 		if (ret)
 			return ret;
-	}
 
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv) {
-		long err;
+		for (i = 0; i < count; i++) {
+			timeout = i915_gem_object_wait_fence(shared[i],
+							     flags, timeout,
+							     rps);
+			if (timeout <= 0)
+				break;
 
-		err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
-							  MAX_SCHEDULE_TIMEOUT);
-		if (err < 0)
-			return err;
+			fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			fence_put(shared[i]);
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
 	}
 
-	return 0;
+	if (excl && timeout > 0)
+		timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
+
+	fence_put(excl);
+
+	return timeout;
 }
 
-/* A nonblocking variant of the above wait. Must be called prior to
- * acquiring the mutex for the object, as the object state may change
- * during this call. A reference must be held by the caller for the object.
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ * @rps: client (user process) to charge for any waitboosting
  */
-static __must_check int
-__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
-			struct intel_rps_client *rps,
-			bool readonly)
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+		     unsigned int flags,
+		     long timeout,
+		     struct intel_rps_client *rps)
 {
+	struct reservation_object *resv;
 	struct i915_gem_active *active;
 	unsigned long active_mask;
 	int idx;
 
-	active_mask = __I915_BO_ACTIVE(obj);
-	if (!active_mask)
-		return 0;
+	might_sleep();
+#if IS_ENABLED(CONFIG_LOCKDEP)
+	GEM_BUG_ON(!!lockdep_is_held(&obj->base.dev->struct_mutex) !=
+		   !!(flags & I915_WAIT_LOCKED));
+#endif
+	GEM_BUG_ON(timeout < 0);
 
-	if (!readonly) {
+	if (flags & I915_WAIT_ALL) {
 		active = obj->last_read;
+		active_mask = i915_gem_object_get_active(obj);
 	} else {
 		active_mask = 1;
 		active = &obj->last_write;
 	}
 
 	for_each_active(active_mask, idx) {
-		int ret;
-
-		ret = i915_gem_active_wait_unlocked(&active[idx],
-						    I915_WAIT_INTERRUPTIBLE,
-						    NULL, rps);
-		if (ret)
-			return ret;
+		struct drm_i915_gem_request *request;
+
+		request = i915_gem_active_get_unlocked(&active[idx]);
+		if (request) {
+			timeout = i915_gem_object_wait_fence(&request->fence,
+							     flags, timeout,
+							     rps);
+			i915_gem_request_put(request);
+		}
+		if (timeout < 0)
+			return timeout;
 	}
 
-	return 0;
+	resv = i915_gem_object_get_dmabuf_resv(obj);
+	if (resv)
+		timeout = i915_gem_object_wait_reservation(resv,
+							   flags, timeout,
+							   rps);
+	return timeout < 0 ? timeout : 0;
 }
 
 static struct intel_rps_client *to_rps_client(struct drm_file *file)
@@ -449,12 +537,18 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	void *vaddr = obj->phys_handle->vaddr + args->offset;
 	char __user *user_data = u64_to_user_ptr(args->data_ptr);
-	int ret = 0;
+	int ret;
 
 	/* We manually control the domain here and pretend that it
 	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
 	 */
-	ret = i915_gem_object_wait_rendering(obj, false);
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT,
+				   to_rps_client(file_priv));
 	if (ret)
 		return ret;
 
@@ -614,12 +708,17 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	*needs_clflush = 0;
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
+	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_wait_rendering(obj, true);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED,
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
 	if (ret)
 		return ret;
 
@@ -661,11 +760,18 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	*needs_clflush = 0;
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_wait_rendering(obj, false);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
 	if (ret)
 		return ret;
 
@@ -1050,7 +1156,10 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 
 	trace_i915_gem_object_pread(obj, args->offset, args->size);
 
-	ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT,
+				   to_rps_client(file));
 	if (ret)
 		goto err;
 
@@ -1450,7 +1559,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 
-	ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT,
+				   to_rps_client(file));
 	if (ret)
 		goto err;
 
@@ -1537,7 +1650,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 * We will repeat the flush holding the lock in the normal manner
 	 * to catch cases where we are gazumped.
 	 */
-	ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT,
+				   to_rps_client(file));
 	if (ret)
 		goto err;
 
@@ -1773,7 +1890,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	 * repeat the flush holding the lock in the normal manner to catch cases
 	 * where we are gazumped.
 	 */
-	ret = __unsafe_wait_rendering(obj, NULL, !write);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
 	if (ret)
 		goto err;
 
@@ -2800,10 +2920,9 @@ int
 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
 	struct drm_i915_gem_wait *args = data;
-	struct intel_rps_client *rps = to_rps_client(file);
 	struct drm_i915_gem_object *obj;
-	unsigned long active;
-	int idx, ret = 0;
+	ktime_t start;
+	long ret;
 
 	if (args->flags != 0)
 		return -EINVAL;
@@ -2812,14 +2931,17 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (!obj)
 		return -ENOENT;
 
-	active = __I915_BO_ACTIVE(obj);
-	for_each_active(active, idx) {
-		s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
-		ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
-						    I915_WAIT_INTERRUPTIBLE,
-						    timeout, rps);
-		if (ret)
-			break;
+	start = ktime_get();
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+				   args->timeout_ns < 0 ? MAX_SCHEDULE_TIMEOUT : nsecs_to_jiffies(args->timeout_ns),
+				   to_rps_client(file));
+
+	if (args->timeout_ns > 0) {
+		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout_ns < 0)
+			args->timeout_ns = 0;
 	}
 
 	i915_gem_object_put_unlocked(obj);
@@ -3234,7 +3356,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
-	ret = i915_gem_object_wait_rendering(obj, !write);
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
 	if (ret)
 		return ret;
 
@@ -3351,7 +3479,12 @@ restart:
 		 * If we wait upon the object, we know that all the bound
 		 * VMA are no longer active.
 		 */
-		ret = i915_gem_object_wait_rendering(obj, false);
+		ret = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT,
+					   NULL);
 		if (ret)
 			return ret;
 
@@ -3606,7 +3739,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
-	ret = i915_gem_object_wait_rendering(obj, !write);
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
 	if (ret)
 		return ret;
 
@@ -3662,11 +3801,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
 	struct drm_i915_gem_request *request, *target = NULL;
-	int ret;
-
-	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
-	if (ret)
-		return ret;
+	long ret;
 
 	/* ABI: return -EIO if already wedged */
 	if (i915_terminally_wedged(&dev_priv->gpu_error))
@@ -3693,10 +3828,12 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	if (target == NULL)
 		return 0;
 
-	ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+	ret = i915_wait_request(target,
+				I915_WAIT_INTERRUPTIBLE,
+				MAX_SCHEDULE_TIMEOUT);
 	i915_gem_request_put(target);
 
-	return ret;
+	return ret < 0 ? ret : 0;
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e1f7a32d4080..9b2ac082d5da 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -59,31 +59,9 @@ static bool i915_fence_enable_signaling(struct fence *fence)
 
 static signed long i915_fence_wait(struct fence *fence,
 				   bool interruptible,
-				   signed long timeout_jiffies)
+				   signed long timeout)
 {
-	s64 timeout_ns, *timeout;
-	int ret;
-
-	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
-		timeout_ns = jiffies_to_nsecs(timeout_jiffies);
-		timeout = &timeout_ns;
-	} else {
-		timeout = NULL;
-	}
-
-	ret = i915_wait_request(to_request(fence),
-				interruptible, timeout,
-				NO_WAITBOOST);
-	if (ret == -ETIME)
-		return 0;
-
-	if (ret < 0)
-		return ret;
-
-	if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
-		timeout_jiffies = nsecs_to_jiffies(timeout_ns);
-
-	return timeout_jiffies;
+	return i915_wait_request(to_request(fence), interruptible, timeout);
 }
 
 static void i915_fence_value_str(struct fence *fence, char *str, int size)
@@ -166,7 +144,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	struct i915_gem_active *active, *next;
 
 	trace_i915_gem_request_retire(request);
-	list_del(&request->link);
+	list_del_init(&request->link);
 
 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
@@ -224,7 +202,8 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 	struct drm_i915_gem_request *tmp;
 
 	lockdep_assert_held(&req->i915->drm.struct_mutex);
-	GEM_BUG_ON(list_empty(&req->link));
+	if (list_empty(&req->link))
+		return;
 
 	do {
 		tmp = list_first_entry(&engine->request_list,
@@ -786,60 +765,30 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
  * Returns 0 if the request was found within the alloted time. Else returns the
  * errno with remaining time filled in timeout argument.
  */
-int i915_wait_request(struct drm_i915_gem_request *req,
-		      unsigned int flags,
-		      s64 *timeout,
-		      struct intel_rps_client *rps)
+long i915_wait_request(struct drm_i915_gem_request *req,
+		       unsigned int flags,
+		       long timeout)
 {
 	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
 		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
 	DEFINE_WAIT(reset);
 	struct intel_wait wait;
-	unsigned long timeout_remain;
-	int ret = 0;
 
 	might_sleep();
 #if IS_ENABLED(CONFIG_LOCKDEP)
 	GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
 		   !!(flags & I915_WAIT_LOCKED));
 #endif
+	GEM_BUG_ON(timeout < 0);
 
 	if (i915_gem_request_completed(req))
-		return 0;
-
-	timeout_remain = MAX_SCHEDULE_TIMEOUT;
-	if (timeout) {
-		if (WARN_ON(*timeout < 0))
-			return -EINVAL;
+		return timeout;
 
-		if (*timeout == 0)
-			return -ETIME;
-
-		/* Record current time in case interrupted, or wedged */
-		timeout_remain = nsecs_to_jiffies_timeout(*timeout);
-		*timeout += ktime_get_raw_ns();
-	}
+	if (!timeout)
+		return -ETIME;
 
 	trace_i915_gem_request_wait_begin(req);
 
-	/* This client is about to stall waiting for the GPU. In many cases
-	 * this is undesirable and limits the throughput of the system, as
-	 * many clients cannot continue processing user input/output whilst
-	 * blocked. RPS autotuning may take tens of milliseconds to respond
-	 * to the GPU load and thus incurs additional latency for the client.
-	 * We can circumvent that by promoting the GPU frequency to maximum
-	 * before we wait. This makes the GPU throttle up much more quickly
-	 * (good for benchmarks and user experience, e.g. window animations),
-	 * but at a cost of spending more power processing the workload
-	 * (bad for battery). Not all clients even want their results
-	 * immediately and for them we should just let the GPU select its own
-	 * frequency to maximise efficiency. To prevent a single client from
-	 * forcing the clocks too high for the whole system, we only allow
-	 * each client to waitboost once in a busy period.
-	 */
-	if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
-		gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
-
 	/* Optimistic short spin before touching IRQs */
 	if (i915_spin_request(req, state, 5))
 		goto complete;
@@ -858,16 +807,17 @@ int i915_wait_request(struct drm_i915_gem_request *req,
 
 	for (;;) {
 		if (signal_pending_state(state, current)) {
-			ret = -ERESTARTSYS;
+			timeout = -ERESTARTSYS;
 			break;
 		}
 
-		timeout_remain = io_schedule_timeout(timeout_remain);
-		if (timeout_remain == 0) {
-			ret = -ETIME;
+		if (!timeout) {
+			timeout = -ETIME;
 			break;
 		}
 
+		timeout = io_schedule_timeout(timeout);
+
 		if (intel_wait_complete(&wait))
 			break;
 
@@ -914,40 +864,7 @@ wakeup:
 complete:
 	trace_i915_gem_request_wait_end(req);
 
-	if (timeout) {
-		*timeout -= ktime_get_raw_ns();
-		if (*timeout < 0)
-			*timeout = 0;
-
-		/*
-		 * Apparently ktime isn't accurate enough and occasionally has a
-		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
-		 * things up to make the test happy. We allow up to 1 jiffy.
-		 *
-		 * This is a regrssion from the timespec->ktime conversion.
-		 */
-		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
-			*timeout = 0;
-	}
-
-	if (IS_RPS_USER(rps) &&
-	    req->fence.seqno == req->engine->last_submitted_seqno) {
-		/* The GPU is now idle and this client has stalled.
-		 * Since no other client has submitted a request in the
-		 * meantime, assume that this client is the only one
-		 * supplying work to the GPU but is unable to keep that
-		 * work supplied because it is waiting. Since the GPU is
-		 * then never kept fully busy, RPS autoclocking will
-		 * keep the clocks relatively low, causing further delays.
-		 * Compensate by giving the synchronous client credit for
-		 * a waitboost next time.
-		 */
-		spin_lock(&req->i915->rps.client_lock);
-		list_del_init(&rps->link);
-		spin_unlock(&req->i915->rps.client_lock);
-	}
-
-	return ret;
+	return timeout;
 }
 
 static bool engine_retire_requests(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index c85a3d82febf..f1ce390c9244 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -228,13 +228,13 @@ struct intel_rps_client;
 #define IS_RPS_CLIENT(p) (!IS_ERR(p))
 #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
 
-int i915_wait_request(struct drm_i915_gem_request *req,
-		      unsigned int flags,
-		      s64 *timeout,
-		      struct intel_rps_client *rps)
+long i915_wait_request(struct drm_i915_gem_request *req,
+		       unsigned int flags,
+		       long timeout)
 	__attribute__((nonnull(1)));
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
+#define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
 
@@ -583,14 +583,16 @@ static inline int __must_check
 i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
 {
 	struct drm_i915_gem_request *request;
+	long ret;
 
 	request = i915_gem_active_peek(active, mutex);
 	if (!request)
 		return 0;
 
-	return i915_wait_request(request,
-				 I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				 NULL, NULL);
+	ret = i915_wait_request(request,
+				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				MAX_SCHEDULE_TIMEOUT);
+	return ret < 0 ? ret : 0;
 }
 
 /**
@@ -617,20 +619,18 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
  */
 static inline int
 i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
-			      unsigned int flags,
-			      s64 *timeout,
-			      struct intel_rps_client *rps)
+			      unsigned int flags)
 {
 	struct drm_i915_gem_request *request;
-	int ret = 0;
+	long ret = 0;
 
 	request = i915_gem_active_get_unlocked(active);
 	if (request) {
-		ret = i915_wait_request(request, flags, timeout, rps);
+		ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT);
 		i915_gem_request_put(request);
 	}
 
-	return ret;
+	return ret < 0 ? ret : 0;
 }
 
 /**
@@ -647,7 +647,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
 		       struct mutex *mutex)
 {
 	struct drm_i915_gem_request *request;
-	int ret;
+	long ret;
 
 	request = i915_gem_active_raw(active, mutex);
 	if (!request)
@@ -655,8 +655,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
 
 	ret = i915_wait_request(request,
 				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				NULL, NULL);
-	if (ret)
+				MAX_SCHEDULE_TIMEOUT);
+	if (ret < 0)
 		return ret;
 
 	list_del_init(&active->link);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index e537930c64b5..1c891b92ac80 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -61,23 +61,13 @@ struct i915_mmu_object {
 	bool attached;
 };
 
-static void wait_rendering(struct drm_i915_gem_object *obj)
-{
-	unsigned long active = __I915_BO_ACTIVE(obj);
-	int idx;
-
-	for_each_active(active, idx)
-		i915_gem_active_wait_unlocked(&obj->last_read[idx],
-					      0, NULL, NULL);
-}
-
 static void cancel_userptr(struct work_struct *work)
 {
 	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
 	struct drm_i915_gem_object *obj = mo->obj;
 	struct drm_device *dev = obj->base.dev;
 
-	wait_rendering(obj);
+	i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
 
 	mutex_lock(&dev->struct_mutex);
 	/* Cancel any active worker and force us to re-evaluate gup */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4b3d315c5254..5ee7bab6e560 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12042,7 +12042,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 
 	if (work->flip_queued_req)
 		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  0, NULL, NO_WAITBOOST));
+					  0, MAX_SCHEDULE_TIMEOUT) < 0);
 
 	/* For framebuffer backed by dmabuf, wait for fence */
 	resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -14089,19 +14089,21 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 		for_each_plane_in_state(state, plane, plane_state, i) {
 			struct intel_plane_state *intel_plane_state =
 				to_intel_plane_state(plane_state);
+			long timeout;
 
 			if (!intel_plane_state->wait_req)
 				continue;
 
-			ret = i915_wait_request(intel_plane_state->wait_req,
-						I915_WAIT_INTERRUPTIBLE,
-						NULL, NULL);
-			if (ret) {
+			timeout = i915_wait_request(intel_plane_state->wait_req,
+						    I915_WAIT_INTERRUPTIBLE,
+						    MAX_SCHEDULE_TIMEOUT);
+			if (timeout < 0) {
 				/* Any hang should be swallowed by the wait */
-				WARN_ON(ret == -EIO);
+				WARN_ON(timeout == -EIO);
 				mutex_lock(&dev->struct_mutex);
 				drm_atomic_helper_cleanup_planes(dev, state);
 				mutex_unlock(&dev->struct_mutex);
+				ret = timeout;
 				break;
 			}
 		}
@@ -14303,7 +14305,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	bool hw_check = intel_state->modeset;
 	unsigned long put_domains[I915_MAX_PIPES] = {};
 	unsigned crtc_vblank_mask = 0;
-	int i, ret;
+	int i;
 
 	for_each_plane_in_state(state, plane, plane_state, i) {
 		struct intel_plane_state *intel_plane_state =
@@ -14312,11 +14314,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 		if (!intel_plane_state->wait_req)
 			continue;
 
-		ret = i915_wait_request(intel_plane_state->wait_req,
-					0, NULL, NULL);
 		/* EIO should be eaten, and we can't get interrupted in the
 		 * worker, and blocking commits have waited already. */
-		WARN_ON(ret);
+		WARN_ON(i915_wait_request(intel_plane_state->wait_req,
+					  0, MAX_SCHEDULE_TIMEOUT) < 0);
 	}
 
 	drm_atomic_helper_wait_for_dependencies(state);
@@ -14679,7 +14680,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		 * can safely continue.
 		 */
 		if (needs_modeset(crtc_state))
-			ret = i915_gem_object_wait_rendering(old_obj, true);
+			ret = i915_gem_object_wait(old_obj,
+						   I915_WAIT_INTERRUPTIBLE |
+						   I915_WAIT_LOCKED,
+						   MAX_SCHEDULE_TIMEOUT,
+						   NULL);
 		if (ret) {
 			/* GPU hangs should have been swallowed by the wait */
 			WARN_ON(ret == -EIO);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 729f373782e2..b60c6f09fbfd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2156,7 +2156,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 {
 	struct intel_ring *ring = req->ring;
 	struct drm_i915_gem_request *target;
-	int ret;
+	long timeout;
+
+	lockdep_assert_held(&req->i915->drm.struct_mutex);
 
 	intel_ring_update_space(ring);
 	if (ring->space >= bytes)
@@ -2186,11 +2188,11 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
 	if (WARN_ON(&target->ring_link == &ring->request_list))
 		return -ENOSPC;
 
-	ret = i915_wait_request(target,
-				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				NULL, NO_WAITBOOST);
-	if (ret)
-		return ret;
+	timeout = i915_wait_request(target,
+				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				    MAX_SCHEDULE_TIMEOUT);
+	if (timeout < 0)
+		return timeout;
 
 	i915_gem_request_retire_upto(target);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 124f4646958d..dbb7176534af 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -530,8 +530,7 @@ static inline int intel_engine_idle(struct intel_engine_cs *engine,
 				    unsigned int flags)
 {
 	/* Wait upon the last request to be completed */
-	return i915_gem_active_wait_unlocked(&engine->last_request,
-					     flags, NULL, NULL);
+	return i915_gem_active_wait_unlocked(&engine->last_request, flags);
 }
 
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 09/42] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked()
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (7 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 08/42] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 10/42] drm/i915: Defer active reference until required Chris Wilson
                   ` (35 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Since we only use the more generic unlocked variant, just rename it as
the normal i915_gem_active_wait(). The temporary cost is that we need to
always acquire the reference in a RCU safe manner, but the benefit is
that we will combine the common paths.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.h | 34 +++------------------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 2 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index f1ce390c9244..45998eedda2c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -569,40 +569,13 @@ i915_gem_active_is_idle(const struct i915_gem_active *active,
 }
 
 /**
- * i915_gem_active_wait - waits until the request is completed
- * @active - the active request on which to wait
- *
- * i915_gem_active_wait() waits until the request is completed before
- * returning. Note that it does not guarantee that the request is
- * retired first, see i915_gem_active_retire().
- *
- * i915_gem_active_wait() returns immediately if the active
- * request is already complete.
- */
-static inline int __must_check
-i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
-{
-	struct drm_i915_gem_request *request;
-	long ret;
-
-	request = i915_gem_active_peek(active, mutex);
-	if (!request)
-		return 0;
-
-	ret = i915_wait_request(request,
-				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				MAX_SCHEDULE_TIMEOUT);
-	return ret < 0 ? ret : 0;
-}
-
-/**
- * i915_gem_active_wait_unlocked - waits until the request is completed
+ * i915_gem_active_wait- waits until the request is completed
  * @active - the active request on which to wait
  * @flags - how to wait
  * @timeout - how long to wait at most
  * @rps - userspace client to charge for a waitboost
  *
- * i915_gem_active_wait_unlocked() waits until the request is completed before
+ * i915_gem_active_wait() waits until the request is completed before
  * returning, without requiring any locks to be held. Note that it does not
  * retire any requests before returning.
  *
@@ -618,8 +591,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
  * Returns 0 if successful, or a negative error code.
  */
 static inline int
-i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
-			      unsigned int flags)
+i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
 {
 	struct drm_i915_gem_request *request;
 	long ret = 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index dbb7176534af..a0510d4940b2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -530,7 +530,7 @@ static inline int intel_engine_idle(struct intel_engine_cs *engine,
 				    unsigned int flags)
 {
 	/* Wait upon the last request to be completed */
-	return i915_gem_active_wait_unlocked(&engine->last_request, flags);
+	return i915_gem_active_wait(&engine->last_request, flags);
 }
 
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 10/42] drm/i915: Defer active reference until required
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (8 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 09/42] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 16:35   ` Tvrtko Ursulin
  2016-10-07  9:46 ` [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
                   ` (34 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

We only need the active reference to keep the object alive after the
handle has been deleted (so as to prevent a synchronous gem_close). Why
then pay the price of a kref on every execbuf when we can insert that
final active ref just in time for the handle deletion?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h              | 28 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c              | 22 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 --
 drivers/gpu/drm/i915/i915_gem_gtt.c          |  7 ++++++-
 drivers/gpu/drm/i915/i915_gem_render_state.c |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++++++---
 8 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee25e265416f..fee5cc92e2f2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2232,6 +2232,12 @@ struct drm_i915_gem_object {
 	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
+	 * Have we taken a reference for the object for incomplete GPU
+	 * activity?
+	 */
+#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
+
+	/**
 	 * This is set if the object has been written to since last bound
 	 * to the GTT
 	 */
@@ -2399,6 +2405,28 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
 	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
 }
 
+static inline bool
+i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
+{
+	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+static inline void
+i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
+}
+
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
+
 static inline unsigned int
 i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7fa5cb764739..b560263bf446 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2618,7 +2618,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
 		list_move_tail(&obj->global_list,
 			       &request->i915->mm.bound_list);
 
-	i915_gem_object_put(obj);
+	if (i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_clear_active_reference(obj);
+		i915_gem_object_put(obj);
+	}
 }
 
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@ -2889,6 +2892,12 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
 	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
 		if (vma->vm->file == fpriv)
 			i915_vma_close(vma);
+
+	if (i915_gem_object_is_active(obj) &&
+	    !i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_set_active_reference(obj);
+		i915_gem_object_get(obj);
+	}
 	mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
@@ -4365,6 +4374,17 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	intel_runtime_pm_put(dev_priv);
 }
 
+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
+	if (i915_gem_object_is_active(obj))
+		i915_gem_object_set_active_reference(obj);
+	else
+		i915_gem_object_put(obj);
+}
+
 int i915_gem_suspend(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index ed989596d9a3..cb25cad3318c 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 		list_for_each_entry_safe(obj, next,
 					 &pool->cache_list[n],
 					 batch_pool_link)
-			i915_gem_object_put(obj);
+			__i915_gem_object_release_unless_active(obj);
 
 		INIT_LIST_HEAD(&pool->cache_list[n]);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index df10f4e95736..1d2ab73a8f43 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 		if (ce->ring)
 			intel_ring_free(ce->ring);
 
-		i915_vma_put(ce->state);
+		__i915_gem_object_release_unless_active(ce->state->obj);
 	}
 
 	put_pid(ctx->pid);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 72c7c1855e70..0deecd4e3b6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1299,8 +1299,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!i915_gem_object_is_active(obj))
-		i915_gem_object_get(obj);
 	i915_gem_object_set_active(obj, idx);
 	i915_gem_active_set(&obj->last_read[idx], req);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2d846aa39ca5..1c95da8424cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3712,11 +3712,16 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 void i915_vma_unpin_and_release(struct i915_vma **p_vma)
 {
 	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
 
 	vma = fetch_and_zero(p_vma);
 	if (!vma)
 		return;
 
+	obj = vma->obj;
+
 	i915_vma_unpin(vma);
-	i915_vma_put(vma);
+	i915_vma_close(vma);
+
+	__i915_gem_object_release_unless_active(obj);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 95b7e9afd5f8..09cf4874c45f 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -224,7 +224,8 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	i915_vma_move_to_active(so.vma, req, 0);
 err_unpin:
 	i915_vma_unpin(so.vma);
+	i915_vma_close(so.vma);
 err_obj:
-	i915_gem_object_put(obj);
+	__i915_gem_object_release_unless_active(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b60c6f09fbfd..f3dfb7ca625d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1763,14 +1763,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
 static void cleanup_status_page(struct intel_engine_cs *engine)
 {
 	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
 
 	vma = fetch_and_zero(&engine->status_page.vma);
 	if (!vma)
 		return;
 
+	obj = vma->obj;
+
 	i915_vma_unpin(vma);
-	i915_gem_object_unpin_map(vma->obj);
-	i915_vma_put(vma);
+	i915_vma_close(vma);
+
+	i915_gem_object_unpin_map(obj);
+	__i915_gem_object_release_unless_active(obj);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
@@ -1968,7 +1973,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 void
 intel_ring_free(struct intel_ring *ring)
 {
-	i915_vma_put(ring->vma);
+	struct drm_i915_gem_object *obj = ring->vma->obj;
+
+	i915_vma_close(ring->vma);
+	__i915_gem_object_release_unless_active(obj);
+
 	kfree(ring);
 }
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (9 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 10/42] drm/i915: Defer active reference until required Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 10:01   ` Joonas Lahtinen
  2016-10-07 16:52   ` Tvrtko Ursulin
  2016-10-07  9:46 ` [PATCH 12/42] drm/i915: Reuse the active golden render state batch Chris Wilson
                   ` (33 subsequent siblings)
  44 siblings, 2 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Quite a few of our objects used for internal hardware programming do not
benefit from being swappable or from being zero initialised. As such
they do not benefit from using a shmemfs backing storage and since they
are internal and never directly exposed to the user, we do not need to
worry about providing a filp. For these we can use an
drm_i915_gem_object wrapper around a sg_table of plain struct page. They
are not swap backed and not automatically pinned. If they are reaped
by the shrinker, the pages are released and the contents discarded. For
the internal use case, this is fine as for example, ringbuffers are
pinned from being written by a request to be read by the hardware. Once
they are idle, they can be discarded entirely. As such they are a good
match for execlist ringbuffers and a small variety of other internal
objects.

In the first iteration, this is limited to the scratch batch buffers we
use (for command parsing and state initialisation).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile                |   1 +
 drivers/gpu/drm/i915/i915_drv.h              |   5 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
 drivers/gpu/drm/i915/i915_gem_internal.c     | 161 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
 7 files changed, 189 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a998c2bce70a..b94a90f34d2d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_execbuffer.o \
 	  i915_gem_fence.o \
 	  i915_gem_gtt.o \
+	  i915_gem_internal.o \
 	  i915_gem.o \
 	  i915_gem_render_state.o \
 	  i915_gem_request.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fee5cc92e2f2..bad97f1e5265 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 					       u32 gtt_offset,
 					       u32 size);
 
+/* i915_gem_internal.c */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_device *dev,
+				unsigned int size);
+
 /* i915_gem_shrinker.c */
 unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
 			      unsigned long target,
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index cb25cad3318c..3934c9103cf2 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 			size_t size)
 {
 	struct drm_i915_gem_object *obj = NULL;
-	struct drm_i915_gem_object *tmp, *next;
+	struct drm_i915_gem_object *tmp;
 	struct list_head *list;
-	int n;
+	int n, ret;
 
 	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
 
@@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 		n = ARRAY_SIZE(pool->cache_list) - 1;
 	list = &pool->cache_list[n];
 
-	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
+	list_for_each_entry(tmp, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
 		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
 					     &tmp->base.dev->struct_mutex))
 			break;
 
-		/* While we're looping, do some clean up */
-		if (tmp->madv == __I915_MADV_PURGED) {
-			list_del(&tmp->batch_pool_link);
-			i915_gem_object_put(tmp);
-			continue;
-		}
-
 		if (tmp->base.size >= size) {
 			obj = tmp;
 			break;
@@ -132,19 +125,16 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	}
 
 	if (obj == NULL) {
-		int ret;
-
-		obj = i915_gem_object_create(&pool->engine->i915->drm, size);
+		obj = i915_gem_object_create_internal(&pool->engine->i915->drm,
+						      size);
 		if (IS_ERR(obj))
 			return obj;
-
-		ret = i915_gem_object_get_pages(obj);
-		if (ret)
-			return ERR_PTR(ret);
-
-		obj->madv = I915_MADV_DONTNEED;
 	}
 
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ERR_PTR(ret);
+
 	list_move_tail(&obj->batch_pool_link, list);
 	i915_gem_object_pin_pages(obj);
 	return obj;
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
new file mode 100644
index 000000000000..534a61c1aba2
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2014-2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+static void internal_free_pages(struct sg_table *st)
+{
+	struct sg_page_iter sg_iter;
+
+	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
+		put_page(sg_page_iter_page(&sg_iter));
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+{
+	const unsigned int npages = obj->base.size / PAGE_SIZE;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned long last_pfn = 0;	/* suppress gcc warning */
+	gfp_t gfp;
+	int i;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	st->nents = 0;
+
+	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
+	if (IS_CRESTLINE(obj->base.dev) || IS_BROADWATER(obj->base.dev)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		gfp &= ~__GFP_HIGHMEM;
+		gfp |= __GFP_DMA32;
+	}
+
+	for (i = 0; i < npages; i++) {
+		struct page *page;
+
+		page = alloc_page(gfp);
+		if (!page)
+			goto err;
+
+#ifdef CONFIG_SWIOTLB
+		if (swiotlb_nr_tbl()) {
+			st->nents++;
+			sg_set_page(sg, page, PAGE_SIZE, 0);
+			sg = sg_next(sg);
+			continue;
+		}
+#endif
+		if (!i || page_to_pfn(page) != last_pfn + 1) {
+			if (i)
+				sg = sg_next(sg);
+			st->nents++;
+			sg_set_page(sg, page, PAGE_SIZE, 0);
+		} else {
+			sg->length += PAGE_SIZE;
+		}
+		last_pfn = page_to_pfn(page);
+	}
+#ifdef CONFIG_SWIOTLB
+	if (!swiotlb_nr_tbl())
+#endif
+		sg_mark_end(sg);
+	obj->pages = st;
+
+	if (i915_gem_gtt_prepare_object(obj)) {
+		obj->pages = NULL;
+		goto err;
+	}
+
+	/* Mark the pages as dontneed whilst they are still pinned. As soon
+	 * as they are unpinned they are allowed to be reaped by the shrinker,
+	 * and the caller is expected to repopulate - the contents of this
+	 * object are only valid whilst active and pinned.
+	 */
+	obj->madv = I915_MADV_DONTNEED;
+	return 0;
+
+err:
+	sg_mark_end(sg);
+	internal_free_pages(st);
+	return -ENOMEM;
+}
+
+static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
+{
+	internal_free_pages(obj->pages);
+
+	obj->dirty = 0;
+	obj->madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
+	.get_pages = i915_gem_object_get_pages_internal,
+	.put_pages = i915_gem_object_put_pages_internal,
+};
+
+/**
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse). Note that it is not cleared upon allocation.
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_device *dev,
+				unsigned int size)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_alloc(dev);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(dev, &obj->base, size);
+	i915_gem_object_init(obj, &i915_gem_object_internal_ops);
+
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 09cf4874c45f..c009ee1c27fd 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -187,7 +187,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	if (so.rodata->batch_items * 4 > 4096)
 		return -EINVAL;
 
-	obj = i915_gem_object_create(&req->i915->drm, 4096);
+	obj = i915_gem_object_create_internal(&req->i915->drm, 4096);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 480584c09306..99737b842d73 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -251,7 +251,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
 
 	obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
 	if (!obj)
-		obj = i915_gem_object_create(&engine->i915->drm, size);
+		obj = i915_gem_object_create_internal(&engine->i915->drm, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate scratch page\n");
 		return PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f3dfb7ca625d..b5d9c03f84ce 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1783,9 +1783,10 @@ static int init_status_page(struct intel_engine_cs *engine)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	unsigned int flags;
+	void *vaddr;
 	int ret;
 
-	obj = i915_gem_object_create(&engine->i915->drm, 4096);
+	obj = i915_gem_object_create_internal(&engine->i915->drm, 4096);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate status page\n");
 		return PTR_ERR(obj);
@@ -1818,15 +1819,22 @@ static int init_status_page(struct intel_engine_cs *engine)
 	if (ret)
 		goto err;
 
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto err_unpin;
+	}
+
 	engine->status_page.vma = vma;
 	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-	engine->status_page.page_addr =
-		i915_gem_object_pin_map(obj, I915_MAP_WB);
+	engine->status_page.page_addr = memset(vaddr, 0, 4096);
 
 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
 			 engine->name, i915_ggtt_offset(vma));
 	return 0;
 
+err_unpin:
+	i915_vma_unpin(vma);
 err:
 	i915_gem_object_put(obj);
 	return ret;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 12/42] drm/i915: Reuse the active golden render state batch
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (10 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 13/42] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
                   ` (32 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

The golden render state is constant, but we recreate the batch setting
it up for every new context. If we keep that batch in a volatile cache
we can safely reuse it whenever we need to initialise a new context. We
mark the pages as purgeable and use the shrinker to recover pages from
the batch whenever we face memory pressues, recreating that batch afresh
on the next new context.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtien@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 184 +++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_render_state.h |   4 +-
 drivers/gpu/drm/i915/intel_engine_cs.c       |   5 +
 drivers/gpu/drm/i915/intel_lrc.c             |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h      |   3 +
 6 files changed, 129 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index c009ee1c27fd..fc3421c59906 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,17 +28,19 @@
 #include "i915_drv.h"
 #include "intel_renderstate.h"
 
-struct render_state {
+struct intel_render_state {
 	const struct intel_renderstate_rodata *rodata;
 	struct i915_vma *vma;
-	u32 aux_batch_size;
-	u32 aux_batch_offset;
+	u32 batch_offset;
+	u32 batch_size;
+	u32 aux_offset;
+	u32 aux_size;
 };
 
 static const struct intel_renderstate_rodata *
-render_state_get_rodata(const struct drm_i915_gem_request *req)
+render_state_get_rodata(const struct intel_engine_cs *engine)
 {
-	switch (INTEL_GEN(req->i915)) {
+	switch (INTEL_GEN(engine->i915)) {
 	case 6:
 		return &gen6_null_state;
 	case 7:
@@ -63,29 +65,27 @@ render_state_get_rodata(const struct drm_i915_gem_request *req)
  */
 #define OUT_BATCH(batch, i, val)				\
 	do {							\
-		if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {	\
-			ret = -ENOSPC;				\
-			goto err_out;				\
-		}						\
+		if ((i) >= PAGE_SIZE / sizeof(u32))		\
+			goto err;				\
 		(batch)[(i)++] = (val);				\
 	} while(0)
 
-static int render_state_setup(struct render_state *so)
+static int render_state_setup(struct intel_render_state *so,
+			      struct drm_i915_private *i915)
 {
-	struct drm_device *dev = so->vma->vm->dev;
 	const struct intel_renderstate_rodata *rodata = so->rodata;
-	const bool has_64bit_reloc = INTEL_GEN(dev) >= 8;
+	const bool has_64bit_reloc = INTEL_GEN(i915) >= 8;
+	struct drm_i915_gem_object *obj = so->vma->obj;
 	unsigned int i = 0, reloc_index = 0;
-	struct page *page;
+	unsigned int needs_clflush;
 	u32 *d;
 	int ret;
 
-	ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
+	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
 	if (ret)
 		return ret;
 
-	page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
-	d = kmap(page);
+	d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
 
 	while (i < rodata->batch_items) {
 		u32 s = rodata->batch[i];
@@ -95,10 +95,8 @@ static int render_state_setup(struct render_state *so)
 			s = lower_32_bits(r);
 			if (has_64bit_reloc) {
 				if (i + 1 >= rodata->batch_items ||
-				    rodata->batch[i + 1] != 0) {
-					ret = -EINVAL;
-					goto err_out;
-				}
+				    rodata->batch[i + 1] != 0)
+					goto err;
 
 				d[i++] = s;
 				s = upper_32_bits(r);
@@ -110,12 +108,20 @@ static int render_state_setup(struct render_state *so)
 		d[i++] = s;
 	}
 
+	if (rodata->reloc[reloc_index] != -1) {
+		DRM_ERROR("only %d relocs resolved\n", reloc_index);
+		goto err;
+	}
+
+	so->batch_offset = so->vma->node.start;
+	so->batch_size = rodata->batch_items * sizeof(u32);
+
 	while (i % CACHELINE_DWORDS)
 		OUT_BATCH(d, i, MI_NOOP);
 
-	so->aux_batch_offset = i * sizeof(u32);
+	so->aux_offset = i * sizeof(u32);
 
-	if (HAS_POOLED_EU(dev)) {
+	if (HAS_POOLED_EU(i915)) {
 		/*
 		 * We always program 3x6 pool config but depending upon which
 		 * subslice is disabled HW drops down to appropriate config
@@ -143,89 +149,131 @@ static int render_state_setup(struct render_state *so)
 	}
 
 	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
-	so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
-
+	so->aux_size = i * sizeof(u32) - so->aux_offset;
+	so->aux_offset += so->batch_offset;
 	/*
 	 * Since we are sending length, we need to strictly conform to
 	 * all requirements. For Gen2 this must be a multiple of 8.
 	 */
-	so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
-
-	kunmap(page);
-
-	ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false);
-	if (ret)
-		return ret;
-
-	if (rodata->reloc[reloc_index] != -1) {
-		DRM_ERROR("only %d relocs resolved\n", reloc_index);
-		return -EINVAL;
-	}
+	so->aux_size = ALIGN(so->aux_size, 8);
 
-	return 0;
+	if (needs_clflush)
+		drm_clflush_virt_range(d, i * sizeof(u32));
+	kunmap_atomic(d);
 
-err_out:
-	kunmap(page);
+	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+out:
+	i915_gem_obj_finish_shmem_access(obj);
 	return ret;
+
+err:
+	kunmap_atomic(d);
+	ret = -EINVAL;
+	goto out;
 }
 
 #undef OUT_BATCH
 
-int i915_gem_render_state_init(struct drm_i915_gem_request *req)
+int i915_gem_render_state_init(struct intel_engine_cs *engine)
 {
-	struct render_state so;
+	struct intel_render_state *so;
+	const struct intel_renderstate_rodata *rodata;
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	if (WARN_ON(req->engine->id != RCS))
-		return -ENOENT;
+	if (engine->id != RCS)
+		return 0;
 
-	so.rodata = render_state_get_rodata(req);
-	if (!so.rodata)
+	rodata = render_state_get_rodata(engine);
+	if (!rodata)
 		return 0;
 
-	if (so.rodata->batch_items * 4 > 4096)
+	if (rodata->batch_items * 4 > 4096)
 		return -EINVAL;
 
-	obj = i915_gem_object_create_internal(&req->i915->drm, 4096);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+	so = kmalloc(sizeof(*so), GFP_KERNEL);
+	if (!so)
+		return -ENOMEM;
 
-	so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL);
-	if (IS_ERR(so.vma)) {
-		ret = PTR_ERR(so.vma);
-		goto err_obj;
+	obj = i915_gem_object_create_internal(&engine->i915->drm, 4096);
+	if (IS_ERR(obj)) {
+		ret = PTR_ERR(obj);
+		goto err_free;
 	}
 
-	ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL);
-	if (ret)
+	so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
+	if (IS_ERR(so->vma)) {
+		ret = PTR_ERR(so->vma);
 		goto err_obj;
+	}
+
+	so->rodata = rodata;
+	engine->render_state = so;
+	return 0;
 
-	ret = render_state_setup(&so);
+err_obj:
+	i915_gem_object_put(obj);
+err_free:
+	kfree(so);
+	return ret;
+}
+
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
+{
+	struct intel_render_state *so;
+	int ret;
+
+	so = req->engine->render_state;
+	if (!so)
+		return 0;
+
+	/* Recreate the page after shrinking */
+	if (!so->vma->obj->pages)
+		so->batch_offset = -1;
+
+	ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
-	ret = req->engine->emit_bb_start(req, so.vma->node.start,
-					 so.rodata->batch_items * 4,
+	if (so->vma->node.start != so->batch_offset) {
+		ret = render_state_setup(so, req->i915);
+		if (ret)
+			goto err_unpin;
+	}
+
+	ret = req->engine->emit_bb_start(req,
+					 so->batch_offset, so->batch_size,
 					 I915_DISPATCH_SECURE);
 	if (ret)
 		goto err_unpin;
 
-	if (so.aux_batch_size > 8) {
+	if (so->aux_size > 8) {
 		ret = req->engine->emit_bb_start(req,
-						 (so.vma->node.start +
-						  so.aux_batch_offset),
-						 so.aux_batch_size,
+						 so->aux_offset, so->aux_size,
 						 I915_DISPATCH_SECURE);
 		if (ret)
 			goto err_unpin;
 	}
 
-	i915_vma_move_to_active(so.vma, req, 0);
+	i915_vma_move_to_active(so->vma, req, 0);
 err_unpin:
-	i915_vma_unpin(so.vma);
-	i915_vma_close(so.vma);
-err_obj:
-	__i915_gem_object_release_unless_active(obj);
+	i915_vma_unpin(so->vma);
 	return ret;
 }
+
+void i915_gem_render_state_fini(struct intel_engine_cs *engine)
+{
+	struct intel_render_state *so;
+	struct drm_i915_gem_object *obj;
+
+	so = fetch_and_zero(&engine->render_state);
+	if (!so)
+		return;
+
+	obj = so->vma->obj;
+
+	i915_vma_close(so->vma);
+	__i915_gem_object_release_unless_active(obj);
+
+	kfree(so);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index 18cce3f06e9c..87481845799d 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -26,6 +26,8 @@
 
 struct drm_i915_gem_request;
 
-int i915_gem_render_state_init(struct drm_i915_gem_request *req);
+int i915_gem_render_state_init(struct intel_engine_cs *engine);
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req);
+void i915_gem_render_state_fini(struct intel_engine_cs *engine);
 
 #endif /* _I915_GEM_RENDER_STATE_H_ */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 99737b842d73..b48c51964cb3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -301,6 +301,10 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
+	ret = i915_gem_render_state_init(engine);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
@@ -315,6 +319,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
 	intel_engine_cleanup_scratch(engine);
 
+	i915_gem_render_state_fini(engine);
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
 	i915_gem_batch_pool_fini(&engine->batch_pool);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 10fcea57e4dd..02c48d390148 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1637,7 +1637,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 	if (ret)
 		DRM_ERROR("MOCS failed to program: expect performance issues.\n");
 
-	return i915_gem_render_state_init(req);
+	return i915_gem_render_state_emit(req);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b5d9c03f84ce..ec06d75a42aa 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -648,7 +648,7 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_render_state_init(req);
+	ret = i915_gem_render_state_emit(req);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a0510d4940b2..4289fcda4b3f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -157,6 +157,7 @@ struct i915_ctx_workarounds {
 };
 
 struct drm_i915_gem_request;
+struct intel_render_state;
 
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
@@ -184,6 +185,8 @@ struct intel_engine_cs {
 	unsigned int irq_shift;
 	struct intel_ring *buffer;
 
+	struct intel_render_state *render_state;
+
 	/* Rather than have every client wait upon all user interrupts,
 	 * with the herd waking after every interrupt and each doing the
 	 * heavyweight seqno dance, we delegate the task (of being the
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 13/42] drm/i915: Markup GEM API with lockdep asserts
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (11 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 12/42] drm/i915: Reuse the active golden render state batch Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
                   ` (31 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Add lockdep_assert_held(struct_mutex) to the API preamble of the
internal GEM interfaces.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c              | 21 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_evict.c        |  5 ++++-
 drivers/gpu/drm/i915/i915_gem_gtt.c          |  2 ++
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 ++
 drivers/gpu/drm/i915/i915_gem_request.c      |  6 ++++++
 5 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b560263bf446..ada837e393a7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -104,6 +104,8 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
 	int ret;
 
+	might_sleep();
+
 	if (!i915_reset_in_progress(error))
 		return 0;
 
@@ -2295,6 +2297,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
 	const struct drm_i915_gem_object_ops *ops = obj->ops;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	if (obj->pages == NULL)
 		return 0;
 
@@ -2466,6 +2470,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	const struct drm_i915_gem_object_ops *ops = obj->ops;
 	int ret;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	if (obj->pages)
 		return 0;
 
@@ -2746,6 +2752,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
 
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
 	i915_gem_retire_requests(dev_priv);
 
 	for_each_engine(engine, dev_priv)
@@ -2974,6 +2982,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	unsigned long active;
 	int ret;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	/* First wait upon any activity as retiring the request may
 	 * have side-effects such as unpinning or even unbinding this vma.
 	 */
@@ -3366,6 +3376,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_LOCKED |
@@ -3444,6 +3455,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret = 0;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	if (obj->cache_level == cache_level)
 		goto out;
 
@@ -3654,6 +3667,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	u32 old_read_domains, old_write_domain;
 	int ret;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
 	 */
@@ -3720,6 +3735,8 @@ err_unpin_display:
 void
 i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
 {
+	lockdep_assert_held(&vma->vm->dev->struct_mutex);
+
 	if (WARN_ON(vma->obj->pin_display == 0))
 		return;
 
@@ -3749,6 +3766,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	int ret;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_LOCKED |
@@ -3904,6 +3922,7 @@ int __i915_vma_do_pin(struct i915_vma *vma,
 	unsigned int bound = vma->flags;
 	int ret;
 
+	lockdep_assert_held(&vma->vm->dev->struct_mutex);
 	GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
 	GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
 
@@ -3944,6 +3963,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view);
 	if (IS_ERR(vma))
 		return vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 5b6f81c1dbca..61f716c8768c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -102,6 +102,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	struct i915_vma *vma, *next;
 	int ret;
 
+	lockdep_assert_held(&vm->dev->struct_mutex);
 	trace_i915_gem_evict(vm, min_size, alignment, flags);
 
 	/*
@@ -212,6 +213,8 @@ i915_gem_evict_for_vma(struct i915_vma *target)
 {
 	struct drm_mm_node *node, *next;
 
+	lockdep_assert_held(&target->vm->dev->struct_mutex);
+
 	list_for_each_entry_safe(node, next,
 			&target->vm->mm.head_node.node_list,
 			node_list) {
@@ -265,7 +268,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 	struct i915_vma *vma, *next;
 	int ret;
 
-	WARN_ON(!mutex_is_locked(&vm->dev->struct_mutex));
+	lockdep_assert_held(&vm->dev->struct_mutex);
 	trace_i915_gem_evict_vm(vm);
 
 	if (do_idle) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1c95da8424cb..78b692e5b998 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3427,6 +3427,7 @@ i915_vma_create(struct drm_i915_gem_object *obj,
 		struct i915_address_space *vm,
 		const struct i915_ggtt_view *view)
 {
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 	GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view));
 
@@ -3454,6 +3455,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 {
 	struct i915_vma *vma;
 
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 
 	vma = i915_gem_obj_to_vma(obj, vm, view);
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index fc3421c59906..adb00c5125ad 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -223,6 +223,8 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
 	struct intel_render_state *so;
 	int ret;
 
+	lockdep_assert_held(&req->i915->drm.struct_mutex);
+
 	so = req->engine->render_state;
 	if (!so)
 		return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 9b2ac082d5da..5ad990add14d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -143,6 +143,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
 	struct i915_gem_active *active, *next;
 
+	lockdep_assert_held(&request->i915->drm.struct_mutex);
+	GEM_BUG_ON(!i915_gem_request_completed(request));
+
 	trace_i915_gem_request_retire(request);
 	list_del_init(&request->link);
 
@@ -267,6 +270,8 @@ int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
 	if (seqno == 0)
 		return -EINVAL;
 
@@ -603,6 +608,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	u32 reserved_tail;
 	int ret;
 
+	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	trace_i915_gem_request_add(request);
 
 	/*
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (12 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 13/42] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 10:12   ` Joonas Lahtinen
                     ` (2 more replies)
  2016-10-07  9:46 ` [PATCH 15/42] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
                   ` (30 subsequent siblings)
  44 siblings, 3 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

A while ago we switched from a contiguous array of pages into an sglist,
for that was both more convenient for mapping to hardware and avoided
the requirement for a vmalloc array of pages on every object. However,
certain GEM API calls (like pwrite, pread as well as performing
relocations) do desire access to individual struct pages. A quick hack
was to introduce a cache of the last access such that finding the
following page was quick - this works so long as the caller desired
sequential access. Walking backwards, or multiple callers, still hits a
slow linear search for each page. One solution is to store each
successful lookup in a radix tree.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  57 ++++--------
 drivers/gpu/drm/i915/i915_gem.c         | 149 ++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_stolen.c  |   4 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |   4 +-
 4 files changed, 154 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bad97f1e5265..a96b446d8db4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2278,9 +2278,12 @@ struct drm_i915_gem_object {
 
 	struct sg_table *pages;
 	int pages_pin_count;
-	struct get_page {
-		struct scatterlist *sg;
-		int last;
+	struct i915_gem_object_page_iter {
+		struct scatterlist *sg_pos;
+		unsigned long sg_idx;
+
+		struct radix_tree_root radix;
+		struct mutex lock;
 	} get_page;
 	void *mapping;
 
@@ -3168,45 +3171,21 @@ static inline int __sg_page_count(struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
-
-static inline dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
-{
-	if (n < obj->get_page.last) {
-		obj->get_page.sg = obj->pages->sgl;
-		obj->get_page.last = 0;
-	}
-
-	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
-		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
-		if (unlikely(sg_is_chain(obj->get_page.sg)))
-			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
-	}
-
-	return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
-}
-
-static inline struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
-{
-	if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
-		return NULL;
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned long n, unsigned int *offset);
 
-	if (n < obj->get_page.last) {
-		obj->get_page.sg = obj->pages->sgl;
-		obj->get_page.last = 0;
-	}
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+			 unsigned long n);
 
-	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
-		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
-		if (unlikely(sg_is_chain(obj->get_page.sg)))
-			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
-	}
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned long n);
 
-	return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
-}
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n);
 
 static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ada837e393a7..af7d51f16658 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2292,6 +2292,15 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 	kfree(obj->pages);
 }
 
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+
+	radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->get_page.radix, iter.index);
+}
+
 int
 i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
@@ -2324,6 +2333,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 		obj->mapping = NULL;
 	}
 
+	__i915_gem_object_reset_page_iter(obj);
+
 	ops->put_pages(obj);
 	obj->pages = NULL;
 
@@ -2488,8 +2499,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 
-	obj->get_page.sg = obj->pages->sgl;
-	obj->get_page.last = 0;
+	obj->get_page.sg_pos = obj->pages->sgl;
+	obj->get_page.sg_idx = 0;
 
 	return 0;
 }
@@ -4242,6 +4253,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
 	obj->madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
+	mutex_init(&obj->get_page.lock);
 
 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
 }
@@ -4904,21 +4917,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
-{
-	struct page *page;
-
-	/* Only default objects have per-page dirty tracking */
-	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
-		return NULL;
-
-	page = i915_gem_object_get_page(obj, n);
-	set_page_dirty(page);
-	return page;
-}
-
 /* Allocate a new GEM object and fill it with the supplied data */
 struct drm_i915_gem_object *
 i915_gem_object_create_from_data(struct drm_device *dev,
@@ -4959,3 +4957,120 @@ fail:
 	i915_gem_object_put(obj);
 	return ERR_PTR(ret);
 }
+
+static struct scatterlist *
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+			 unsigned long n, unsigned int *offset)
+{
+	struct scatterlist *sg = obj->pages->sgl;
+	int idx = 0;
+
+	while (idx + __sg_page_count(sg) <= n) {
+		idx += __sg_page_count(sg++);
+		if (unlikely(sg_is_chain(sg)))
+			sg = sg_chain_ptr(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+}
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned long n,
+		       unsigned int *offset)
+{
+	struct i915_gem_object_page_iter *iter = &obj->get_page;
+	struct scatterlist *sg;
+
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	GEM_BUG_ON(obj->pages_pin_count == 0);
+
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+	if (n >= iter->sg_idx &&
+	    n < iter->sg_idx + __sg_page_count(iter->sg_pos)) {
+		sg = iter->sg_pos;
+		*offset = n - iter->sg_idx;
+		mutex_unlock(&iter->lock);
+		return sg;
+	}
+
+	while (iter->sg_idx <= n) {
+		unsigned long exception;
+		unsigned int count, i;
+
+		radix_tree_insert(&iter->radix,
+				  iter->sg_idx,
+				  iter->sg_pos);
+
+		exception =
+			RADIX_TREE_EXCEPTIONAL_ENTRY |
+			iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
+		count = __sg_page_count(iter->sg_pos);
+		for (i = 1; i < count; i++)
+			radix_tree_insert(&iter->radix,
+					  iter->sg_idx + i,
+					  (void *)exception);
+
+		iter->sg_idx += count;
+		iter->sg_pos = __sg_next(iter->sg_pos);
+	}
+	mutex_unlock(&iter->lock);
+
+lookup:
+	rcu_read_lock();
+	sg = radix_tree_lookup(&iter->radix, n);
+	rcu_read_unlock();
+
+	if (unlikely(!sg))
+		return __i915_gem_object_get_sg(obj, n, offset);
+
+	*offset = 0;
+	if (unlikely(radix_tree_exception(sg))) {
+		unsigned long base =
+			(unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+		sg = radix_tree_lookup(&iter->radix, base);
+		*offset = n - base;
+	}
+	return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned long n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned long n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 59989e8ee5dc..24bad4e60ef0 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -594,8 +594,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	if (obj->pages == NULL)
 		goto cleanup;
 
-	obj->get_page.sg = obj->pages->sgl;
-	obj->get_page.last = 0;
+	obj->get_page.sg_pos = obj->pages->sgl;
+	obj->get_page.sg_idx = 0;
 
 	i915_gem_object_pin_pages(obj);
 	obj->stolen = stolen;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 1c891b92ac80..cb95789da76e 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			if (ret == 0) {
 				list_add_tail(&obj->global_list,
 					      &to_i915(dev)->mm.unbound_list);
-				obj->get_page.sg = obj->pages->sgl;
-				obj->get_page.last = 0;
+				obj->get_page.sg_pos = obj->pages->sgl;
+				obj->get_page.sg_idx = 0;
 				pinned = 0;
 			}
 		}
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 15/42] drm/i915: Use radixtree to jump start intel_partial_pages()
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (13 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 13:46   ` John Harrison
  2016-10-07  9:46 ` [PATCH 16/42] drm/i915: Refactor object page API Chris Wilson
                   ` (29 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

We can use the radixtree index of the obj->pages to find the start
position of the desired partial range.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 78b692e5b998..7e0c98576e72 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3562,35 +3562,45 @@ intel_partial_pages(const struct i915_ggtt_view *view,
 		    struct drm_i915_gem_object *obj)
 {
 	struct sg_table *st;
-	struct scatterlist *sg;
-	struct sg_page_iter obj_sg_iter;
+	struct scatterlist *sg, *iter;
+	unsigned int count = view->params.partial.size;
+	unsigned int offset;
 	int ret = -ENOMEM;
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
 		goto err_st_alloc;
 
-	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
+	ret = sg_alloc_table(st, count, GFP_KERNEL);
 	if (ret)
 		goto err_sg_alloc;
 
+	iter = i915_gem_object_get_sg(obj,
+				      view->params.partial.offset,
+				      &offset);
+
 	sg = st->sgl;
 	st->nents = 0;
-	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
-		view->params.partial.offset)
-	{
-		if (st->nents >= view->params.partial.size)
-			break;
+	do {
+		unsigned int len =
+			min(iter->length, (count - offset) << PAGE_SHIFT);
 
-		sg_set_page(sg, NULL, PAGE_SIZE, 0);
-		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
-		sg_dma_len(sg) = PAGE_SIZE;
+		sg_set_page(sg, NULL, len, 0);
+		sg_dma_address(sg) =
+			sg_dma_address(iter) + (offset << PAGE_SHIFT);
+		sg_dma_len(sg) = len;
 
-		sg = sg_next(sg);
 		st->nents++;
-	}
+		count -= len >> PAGE_SHIFT;
+		if (count == 0) {
+			sg_mark_end(sg);
+			return st;
+		}
 
-	return st;
+		sg = __sg_next(sg);
+		iter = __sg_next(iter);
+		offset = 0;
+	} while (1);
 
 err_sg_alloc:
 	kfree(st);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 16/42] drm/i915: Refactor object page API
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (14 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 15/42] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-10 10:54   ` John Harrison
                     ` (2 more replies)
  2016-10-07  9:46 ` [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
                   ` (28 subsequent siblings)
  44 siblings, 3 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

The plan is to make obtaining the backing storage for the object avoid
struct_mutex (i.e. use its own locking). The first step is to update the
API so that normal users only call pin/unpin whilst working on the
backing storage.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c       |   2 +-
 drivers/gpu/drm/i915/i915_debugfs.c          |  17 +--
 drivers/gpu/drm/i915/i915_drv.h              |  89 ++++++++----
 drivers/gpu/drm/i915/i915_gem.c              | 207 +++++++++++++--------------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |   3 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.c       |  14 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   2 +-
 drivers/gpu/drm/i915/i915_gem_fence.c        |   4 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c          |  10 +-
 drivers/gpu/drm/i915/i915_gem_internal.c     |  19 +--
 drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
 drivers/gpu/drm/i915/i915_gem_shrinker.c     |  10 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c       |  24 ++--
 drivers/gpu/drm/i915/i915_gem_tiling.c       |   8 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c      |  30 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c        |   4 +-
 drivers/gpu/drm/i915/intel_lrc.c             |   6 +-
 17 files changed, 234 insertions(+), 217 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 70980f82a15b..8d20020cb9f9 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1290,7 +1290,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 	}
 
 	if (ret == 0 && needs_clflush_after)
-		drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len);
+		drm_clflush_virt_range(shadow_batch_obj->mm.mapping, batch_len);
 	i915_gem_object_unpin_map(shadow_batch_obj);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e4b5ba771bea..b807ddf65e04 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -112,7 +112,7 @@ static char get_global_flag(struct drm_i915_gem_object *obj)
 
 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
 {
-	return obj->mapping ? 'M' : ' ';
+	return obj->mm.mapping ? 'M' : ' ';
 }
 
 static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
@@ -158,8 +158,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   i915_gem_active_get_seqno(&obj->last_write,
 					     &obj->base.dev->struct_mutex),
 		   i915_cache_level_str(dev_priv, obj->cache_level),
-		   obj->dirty ? " dirty" : "",
-		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
+		   obj->mm.dirty ? " dirty" : "",
+		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
 	list_for_each_entry(vma, &obj->vma_list, obj_link) {
@@ -411,12 +411,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		size += obj->base.size;
 		++count;
 
-		if (obj->madv == I915_MADV_DONTNEED) {
+		if (obj->mm.madv == I915_MADV_DONTNEED) {
 			purgeable_size += obj->base.size;
 			++purgeable_count;
 		}
 
-		if (obj->mapping) {
+		if (obj->mm.mapping) {
 			mapped_count++;
 			mapped_size += obj->base.size;
 		}
@@ -433,12 +433,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 			++dpy_count;
 		}
 
-		if (obj->madv == I915_MADV_DONTNEED) {
+		if (obj->mm.madv == I915_MADV_DONTNEED) {
 			purgeable_size += obj->base.size;
 			++purgeable_count;
 		}
 
-		if (obj->mapping) {
+		if (obj->mm.mapping) {
 			mapped_count++;
 			mapped_size += obj->base.size;
 		}
@@ -2018,7 +2018,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 		seq_printf(m, "\tBound in GGTT at 0x%08x\n",
 			   i915_ggtt_offset(vma));
 
-	if (i915_gem_object_get_pages(vma->obj)) {
+	if (i915_gem_object_pin_pages(vma->obj)) {
 		seq_puts(m, "\tFailed to get pages for context object\n\n");
 		return;
 	}
@@ -2037,6 +2037,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
 		kunmap_atomic(reg_state);
 	}
 
+	i915_gem_object_unpin_pages(vma->obj);
 	seq_putc(m, '\n');
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a96b446d8db4..3c22d49005fe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2238,17 +2238,6 @@ struct drm_i915_gem_object {
 #define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
 
 	/**
-	 * This is set if the object has been written to since last bound
-	 * to the GTT
-	 */
-	unsigned int dirty:1;
-
-	/**
-	 * Advice: are the backing pages purgeable?
-	 */
-	unsigned int madv:2;
-
-	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
 	 * mappable by accident). Track pin and fault separate for a more
 	 * accurate mappable working set.
@@ -2276,16 +2265,31 @@ struct drm_i915_gem_object {
 	unsigned int bind_count;
 	unsigned int pin_display;
 
-	struct sg_table *pages;
-	int pages_pin_count;
-	struct i915_gem_object_page_iter {
-		struct scatterlist *sg_pos;
-		unsigned long sg_idx;
+	struct {
+		unsigned int pages_pin_count;
+
+		struct sg_table *pages;
+		void *mapping;
+
+		struct i915_gem_object_page_iter {
+			struct scatterlist *sg_pos;
+			unsigned long sg_idx;
 
-		struct radix_tree_root radix;
-		struct mutex lock;
-	} get_page;
-	void *mapping;
+			struct radix_tree_root radix;
+			struct mutex lock;
+		} get_page;
+
+		/**
+		 * Advice: are the backing pages purgeable?
+		 */
+		unsigned int madv:2;
+
+		/**
+		 * This is set if the object has been written to since the
+		 * pages were last acquired.
+		 */
+		unsigned int dirty:1;
+	} mm;
 
 	/** Breadcrumb of last rendering to the buffer.
 	 * There can only be one writer, but we allow for multiple readers.
@@ -3160,13 +3164,10 @@ void i915_vma_close(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
-int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
-int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-
-static inline int __sg_page_count(struct scatterlist *sg)
+static inline int __sg_page_count(const struct scatterlist *sg)
 {
 	return sg->length >> PAGE_SHIFT;
 }
@@ -3187,18 +3188,48 @@ dma_addr_t
 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
 				unsigned long n);
 
-static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	BUG_ON(obj->pages == NULL);
-	obj->pages_pin_count++;
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	if (obj->mm.pages_pin_count++)
+		return 0;
+
+	return __i915_gem_object_get_pages(obj);
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	GEM_BUG_ON(!obj->mm.pages);
+	obj->mm.pages_pin_count++;
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+	return obj->mm.pages_pin_count;
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	GEM_BUG_ON(!obj->mm.pages);
+	obj->mm.pages_pin_count--;
 }
 
 static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
-	BUG_ON(obj->pages_pin_count == 0);
-	obj->pages_pin_count--;
+	__i915_gem_object_unpin_pages(obj);
 }
 
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
+
 enum i915_map_type {
 	I915_MAP_WB = 0,
 	I915_MAP_WC,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index af7d51f16658..df774ddf62ae 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -216,7 +216,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	sg_dma_address(sg) = obj->phys_handle->busaddr;
 	sg_dma_len(sg) = obj->base.size;
 
-	obj->pages = st;
+	obj->mm.pages = st;
 	return 0;
 }
 
@@ -225,7 +225,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
-	BUG_ON(obj->madv == __I915_MADV_PURGED);
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
 	if (WARN_ON(ret)) {
@@ -235,10 +235,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	}
 
-	if (obj->madv == I915_MADV_DONTNEED)
-		obj->dirty = 0;
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
 
-	if (obj->dirty) {
+	if (obj->mm.dirty) {
 		struct address_space *mapping = obj->base.filp->f_mapping;
 		char *vaddr = obj->phys_handle->vaddr;
 		int i;
@@ -257,22 +257,23 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 			kunmap_atomic(dst);
 
 			set_page_dirty(page);
-			if (obj->madv == I915_MADV_WILLNEED)
+			if (obj->mm.madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
 			put_page(page);
 			vaddr += PAGE_SIZE;
 		}
-		obj->dirty = 0;
+		obj->mm.dirty = false;
 	}
 
-	sg_free_table(obj->pages);
-	kfree(obj->pages);
+	sg_free_table(obj->mm.pages);
+	kfree(obj->mm.pages);
 }
 
 static void
 i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
 {
 	drm_pci_free(obj->base.dev, obj->phys_handle);
+	i915_gem_object_unpin_pages(obj);
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
@@ -506,7 +507,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 		return 0;
 	}
 
-	if (obj->madv != I915_MADV_WILLNEED)
+	if (obj->mm.madv != I915_MADV_WILLNEED)
 		return -EFAULT;
 
 	if (obj->base.filp == NULL)
@@ -516,7 +517,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_object_put_pages(obj);
+	ret = __i915_gem_object_put_pages(obj);
 	if (ret)
 		return ret;
 
@@ -528,7 +529,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	obj->phys_handle = phys;
 	obj->ops = &i915_gem_phys_ops;
 
-	return i915_gem_object_get_pages(obj);
+	return i915_gem_object_pin_pages(obj);
 }
 
 static int
@@ -724,12 +725,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ret;
 
-	i915_gem_object_pin_pages(obj);
-
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	/* If we're not in the cpu read domain, set ourself into the gtt
@@ -777,12 +776,10 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ret;
 
-	i915_gem_object_pin_pages(obj);
-
 	i915_gem_object_flush_gtt_write_domain(obj);
 
 	/* If we're not in the cpu write domain, set ourself into the
@@ -812,7 +809,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 		obj->cache_dirty = true;
 
 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-	obj->dirty = 1;
+	obj->mm.dirty = true;
 	/* return with the pages pinned */
 	return 0;
 
@@ -949,13 +946,11 @@ i915_gem_gtt_pread(struct drm_device *dev,
 		if (ret)
 			goto out;
 
-		ret = i915_gem_object_get_pages(obj);
+		ret = i915_gem_object_pin_pages(obj);
 		if (ret) {
 			remove_mappable_node(&node);
 			goto out;
 		}
-
-		i915_gem_object_pin_pages(obj);
 	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
@@ -1062,7 +1057,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
 	offset = args->offset;
 	remain = args->size;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
+	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
 			 offset >> PAGE_SHIFT) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
@@ -1254,13 +1249,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 		if (ret)
 			goto out;
 
-		ret = i915_gem_object_get_pages(obj);
+		ret = i915_gem_object_pin_pages(obj);
 		if (ret) {
 			remove_mappable_node(&node);
 			goto out;
 		}
-
-		i915_gem_object_pin_pages(obj);
 	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
@@ -1268,7 +1261,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 		goto out_unpin;
 
 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-	obj->dirty = true;
+	obj->mm.dirty = true;
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = args->offset;
@@ -1439,7 +1432,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
 	offset = args->offset;
 	remain = args->size;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
+	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
 			 offset >> PAGE_SHIFT) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 		int partial_cacheline_write;
@@ -2228,7 +2221,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	 * backing pages, *now*.
 	 */
 	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
-	obj->madv = __I915_MADV_PURGED;
+	obj->mm.madv = __I915_MADV_PURGED;
 }
 
 /* Try to discard unwanted pages */
@@ -2237,7 +2230,7 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 {
 	struct address_space *mapping;
 
-	switch (obj->madv) {
+	switch (obj->mm.madv) {
 	case I915_MADV_DONTNEED:
 		i915_gem_object_truncate(obj);
 	case __I915_MADV_PURGED:
@@ -2258,7 +2251,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 	struct page *page;
 	int ret;
 
-	BUG_ON(obj->madv == __I915_MADV_PURGED);
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 
 	ret = i915_gem_object_set_to_cpu_domain(obj, true);
 	if (WARN_ON(ret)) {
@@ -2274,22 +2267,22 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_save_bit_17_swizzle(obj);
 
-	if (obj->madv == I915_MADV_DONTNEED)
-		obj->dirty = 0;
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
 
-	for_each_sgt_page(page, sgt_iter, obj->pages) {
-		if (obj->dirty)
+	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
+		if (obj->mm.dirty)
 			set_page_dirty(page);
 
-		if (obj->madv == I915_MADV_WILLNEED)
+		if (obj->mm.madv == I915_MADV_WILLNEED)
 			mark_page_accessed(page);
 
 		put_page(page);
 	}
-	obj->dirty = 0;
+	obj->mm.dirty = false;
 
-	sg_free_table(obj->pages);
-	kfree(obj->pages);
+	sg_free_table(obj->mm.pages);
+	kfree(obj->mm.pages);
 }
 
 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
@@ -2297,21 +2290,20 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 	struct radix_tree_iter iter;
 	void **slot;
 
-	radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
-		radix_tree_delete(&obj->get_page.radix, iter.index);
+	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
 }
 
-int
-i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
 	const struct drm_i915_gem_object_ops *ops = obj->ops;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	if (obj->pages == NULL)
+	if (!obj->mm.pages)
 		return 0;
 
-	if (obj->pages_pin_count)
+	if (i915_gem_object_has_pinned_pages(obj))
 		return -EBUSY;
 
 	GEM_BUG_ON(obj->bind_count);
@@ -2321,22 +2313,22 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 	 * lists early. */
 	list_del(&obj->global_list);
 
-	if (obj->mapping) {
+	if (obj->mm.mapping) {
 		void *ptr;
 
-		ptr = ptr_mask_bits(obj->mapping);
+		ptr = ptr_mask_bits(obj->mm.mapping);
 		if (is_vmalloc_addr(ptr))
 			vunmap(ptr);
 		else
 			kunmap(kmap_to_page(ptr));
 
-		obj->mapping = NULL;
+		obj->mm.mapping = NULL;
 	}
 
 	__i915_gem_object_reset_page_iter(obj);
 
 	ops->put_pages(obj);
-	obj->pages = NULL;
+	obj->mm.pages = NULL;
 
 	i915_gem_object_invalidate(obj);
 
@@ -2431,7 +2423,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	if (!swiotlb_nr_tbl())
 #endif
 		sg_mark_end(sg);
-	obj->pages = st;
+	obj->mm.pages = st;
 
 	ret = i915_gem_gtt_prepare_object(obj);
 	if (ret)
@@ -2442,7 +2434,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 
 	if (i915_gem_object_is_tiled(obj) &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
-		i915_gem_object_pin_pages(obj);
+		__i915_gem_object_pin_pages(obj);
 
 	return 0;
 
@@ -2474,8 +2466,7 @@ err_pages:
  * either as a result of memory pressure (reaping pages under the shrinker)
  * or as the object is itself released.
  */
-int
-i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	const struct drm_i915_gem_object_ops *ops = obj->ops;
@@ -2483,24 +2474,25 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	if (obj->pages)
+	if (obj->mm.pages)
 		return 0;
 
-	if (obj->madv != I915_MADV_WILLNEED) {
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
 		DRM_DEBUG("Attempting to obtain a purgeable object\n");
+		__i915_gem_object_unpin_pages(obj);
 		return -EFAULT;
 	}
 
-	BUG_ON(obj->pages_pin_count);
-
 	ret = ops->get_pages(obj);
-	if (ret)
+	if (ret) {
+		__i915_gem_object_unpin_pages(obj);
 		return ret;
+	}
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 
-	obj->get_page.sg_pos = obj->pages->sgl;
-	obj->get_page.sg_idx = 0;
+	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
 
 	return 0;
 }
@@ -2510,7 +2502,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
 				 enum i915_map_type type)
 {
 	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
-	struct sg_table *sgt = obj->pages;
+	struct sg_table *sgt = obj->mm.pages;
 	struct sgt_iter sgt_iter;
 	struct page *page;
 	struct page *stack_pages[32];
@@ -2564,14 +2556,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
 
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ERR_PTR(ret);
 
-	i915_gem_object_pin_pages(obj);
-	pinned = obj->pages_pin_count > 1;
+	pinned = obj->mm.pages_pin_count > 1;
 
-	ptr = ptr_unpack_bits(obj->mapping, has_type);
+	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
@@ -2583,7 +2574,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 		else
 			kunmap(kmap_to_page(ptr));
 
-		ptr = obj->mapping = NULL;
+		ptr = obj->mm.mapping = NULL;
 	}
 
 	if (!ptr) {
@@ -2593,7 +2584,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			goto err;
 		}
 
-		obj->mapping = ptr_pack_bits(ptr, type);
+		obj->mm.mapping = ptr_pack_bits(ptr, type);
 	}
 
 	return ptr;
@@ -3030,7 +3021,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 		goto destroy;
 
 	GEM_BUG_ON(obj->bind_count == 0);
-	GEM_BUG_ON(!obj->pages);
+	GEM_BUG_ON(!obj->mm.pages);
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
 		/* release the fence reg _after_ flushing */
@@ -3054,7 +3045,7 @@ int i915_vma_unbind(struct i915_vma *vma)
 	drm_mm_remove_node(&vma->node);
 	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
 
-	if (vma->pages != obj->pages) {
+	if (vma->pages != obj->mm.pages) {
 		GEM_BUG_ON(!vma->pages);
 		sg_free_table(vma->pages);
 		kfree(vma->pages);
@@ -3186,12 +3177,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		return -E2BIG;
 	}
 
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ret;
 
-	i915_gem_object_pin_pages(obj);
-
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;
 		if (offset & (alignment - 1) || offset > end - size) {
@@ -3270,7 +3259,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * to GPU, and we can ignore the cache flush because it'll happen
 	 * again at bind time.
 	 */
-	if (obj->pages == NULL)
+	if (!obj->mm.pages)
 		return false;
 
 	/*
@@ -3294,7 +3283,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	}
 
 	trace_i915_gem_object_clflush(obj);
-	drm_clflush_sg(obj->pages);
+	drm_clflush_sg(obj->mm.pages);
 	obj->cache_dirty = false;
 
 	return true;
@@ -3408,7 +3397,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	 * continue to assume that the obj remained out of the CPU cached
 	 * domain.
 	 */
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ret;
 
@@ -3432,7 +3421,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (write) {
 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
-		obj->dirty = 1;
+		obj->mm.dirty = true;
 	}
 
 	trace_i915_gem_object_change_domain(obj,
@@ -3441,6 +3430,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
+	i915_gem_object_unpin_pages(obj);
 
 	return 0;
 }
@@ -4210,23 +4200,23 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		goto unlock;
 	}
 
-	if (obj->pages &&
+	if (obj->mm.pages &&
 	    i915_gem_object_is_tiled(obj) &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
-		if (obj->madv == I915_MADV_WILLNEED)
-			i915_gem_object_unpin_pages(obj);
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			__i915_gem_object_unpin_pages(obj);
 		if (args->madv == I915_MADV_WILLNEED)
-			i915_gem_object_pin_pages(obj);
+			__i915_gem_object_pin_pages(obj);
 	}
 
-	if (obj->madv != __I915_MADV_PURGED)
-		obj->madv = args->madv;
+	if (obj->mm.madv != __I915_MADV_PURGED)
+		obj->mm.madv = args->madv;
 
 	/* if the object is no longer attached, discard its backing storage */
-	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
+	if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
 		i915_gem_object_truncate(obj);
 
-	args->retained = obj->madv != __I915_MADV_PURGED;
+	args->retained = obj->mm.madv != __I915_MADV_PURGED;
 
 	i915_gem_object_put(obj);
 unlock:
@@ -4252,9 +4242,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 	obj->ops = ops;
 
 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	obj->madv = I915_MADV_WILLNEED;
-	INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
-	mutex_init(&obj->get_page.lock);
+
+	obj->mm.madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL);
+	mutex_init(&obj->mm.get_page.lock);
 
 	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
 }
@@ -4331,7 +4322,7 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
 	 * back the contents from the GPU.
 	 */
 
-	if (obj->madv != I915_MADV_WILLNEED)
+	if (obj->mm.madv != I915_MADV_WILLNEED)
 		return false;
 
 	if (obj->base.filp == NULL)
@@ -4373,32 +4364,27 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	}
 	GEM_BUG_ON(obj->bind_count);
 
-	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
-	 * before progressing. */
-	if (obj->stolen)
-		i915_gem_object_unpin_pages(obj);
-
 	WARN_ON(atomic_read(&obj->frontbuffer_bits));
 
-	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
+	if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
 	    i915_gem_object_is_tiled(obj))
-		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_unpin_pages(obj);
 
-	if (WARN_ON(obj->pages_pin_count))
-		obj->pages_pin_count = 0;
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
+	if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
+		obj->mm.pages_pin_count = 0;
 	if (discard_backing_storage(obj))
-		obj->madv = I915_MADV_DONTNEED;
-	i915_gem_object_put_pages(obj);
+		obj->mm.madv = I915_MADV_DONTNEED;
+	__i915_gem_object_put_pages(obj);
 
-	BUG_ON(obj->pages);
+	GEM_BUG_ON(obj->mm.pages);
 
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
-	if (obj->ops->release)
-		obj->ops->release(obj);
-
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4935,14 +4921,13 @@ i915_gem_object_create_from_data(struct drm_device *dev,
 	if (ret)
 		goto fail;
 
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		goto fail;
 
-	i915_gem_object_pin_pages(obj);
-	sg = obj->pages;
+	sg = obj->mm.pages;
 	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
-	obj->dirty = 1;		/* Backing store is now out of date */
+	obj->mm.dirty = true; /* Backing store is now out of date */
 	i915_gem_object_unpin_pages(obj);
 
 	if (WARN_ON(bytes != size)) {
@@ -4962,7 +4947,7 @@ static struct scatterlist *
 __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 			 unsigned long n, unsigned int *offset)
 {
-	struct scatterlist *sg = obj->pages->sgl;
+	struct scatterlist *sg = obj->mm.pages->sgl;
 	int idx = 0;
 
 	while (idx + __sg_page_count(sg) <= n) {
@@ -4980,11 +4965,11 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 		       unsigned long n,
 		       unsigned int *offset)
 {
-	struct i915_gem_object_page_iter *iter = &obj->get_page;
+	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
 	struct scatterlist *sg;
 
 	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
-	GEM_BUG_ON(obj->pages_pin_count == 0);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
 	if (n < READ_ONCE(iter->sg_idx))
 		goto lookup;
@@ -5058,7 +5043,7 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
 	struct page *page;
 
 	page = i915_gem_object_get_page(obj, n);
-	if (!obj->dirty)
+	if (!obj->mm.dirty)
 		set_page_dirty(page);
 
 	return page;
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 3934c9103cf2..6b656822bb3a 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -131,11 +131,10 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 			return obj;
 	}
 
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ERR_PTR(ret);
 
 	list_move_tail(&obj->batch_pool_link, list);
-	i915_gem_object_pin_pages(obj);
 	return obj;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 97c9d68b45df..10441dc72e73 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -48,12 +48,10 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
 	if (ret)
 		goto err;
 
-	ret = i915_gem_object_get_pages(obj);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		goto err_unlock;
 
-	i915_gem_object_pin_pages(obj);
-
 	/* Copy sg so that we make an independent mapping */
 	st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
 	if (st == NULL) {
@@ -61,13 +59,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
 		goto err_unpin;
 	}
 
-	ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
+	ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
 	if (ret)
 		goto err_free;
 
-	src = obj->pages->sgl;
+	src = obj->mm.pages->sgl;
 	dst = st->sgl;
-	for (i = 0; i < obj->pages->nents; i++) {
+	for (i = 0; i < obj->mm.pages->nents; i++) {
 		sg_set_page(dst, sg_page(src), src->length, 0);
 		dst = sg_next(dst);
 		src = sg_next(src);
@@ -299,14 +297,14 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
 	if (IS_ERR(sg))
 		return PTR_ERR(sg);
 
-	obj->pages = sg;
+	obj->mm.pages = sg;
 	return 0;
 }
 
 static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
 {
 	dma_buf_unmap_attachment(obj->base.import_attach,
-				 obj->pages, DMA_BIDIRECTIONAL);
+				 obj->mm.pages, DMA_BIDIRECTIONAL);
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0deecd4e3b6c..062e098b0909 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1290,7 +1290,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	obj->dirty = 1; /* be paranoid  */
+	obj->mm.dirty = true; /* be paranoid  */
 
 	/* Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 8df1fa7234e8..398856160656 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -648,7 +648,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 		return;
 
 	i = 0;
-	for_each_sgt_page(page, sgt_iter, obj->pages) {
+	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
 		char new_bit_17 = page_to_phys(page) >> 17;
 		if ((new_bit_17 & 0x1) !=
 		    (test_bit(i, obj->bit_17) != 0)) {
@@ -687,7 +687,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
 
 	i = 0;
 
-	for_each_sgt_page(page, sgt_iter, obj->pages) {
+	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
 		if (page_to_phys(page) & (1 << 17))
 			__set_bit(i, obj->bit_17);
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7e0c98576e72..3c711e0b8a3f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -175,7 +175,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 {
 	u32 pte_flags = 0;
 
-	vma->pages = vma->obj->pages;
+	vma->pages = vma->obj->mm.pages;
 
 	/* Currently applicable only to VLV */
 	if (vma->obj->gt_ro)
@@ -2295,7 +2295,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 {
 	if (!dma_map_sg(&obj->base.dev->pdev->dev,
-			obj->pages->sgl, obj->pages->nents,
+			obj->mm.pages->sgl, obj->mm.pages->nents,
 			PCI_DMA_BIDIRECTIONAL))
 		return -ENOSPC;
 
@@ -2685,7 +2685,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 		}
 	}
 
-	dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents,
+	dma_unmap_sg(kdev, obj->mm.pages->sgl, obj->mm.pages->nents,
 		     PCI_DMA_BIDIRECTIONAL);
 }
 
@@ -3526,7 +3526,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
 
 	/* Populate source page list from the object. */
 	i = 0;
-	for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
+	for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
 		page_addr_list[i++] = dma_addr;
 
 	GEM_BUG_ON(i != n_pages);
@@ -3617,7 +3617,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 		return 0;
 
 	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
-		vma->pages = vma->obj->pages;
+		vma->pages = vma->obj->mm.pages;
 	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
 		vma->pages =
 			intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 534a61c1aba2..24eb347f9e0a 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -28,10 +28,11 @@
 
 static void internal_free_pages(struct sg_table *st)
 {
-	struct sg_page_iter sg_iter;
+	struct sgt_iter iter;
+	struct page *page;
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
-		put_page(sg_page_iter_page(&sg_iter));
+	for_each_sgt_page(page, iter, st)
+		put_page(page);
 
 	sg_free_table(st);
 	kfree(st);
@@ -94,10 +95,10 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	if (!swiotlb_nr_tbl())
 #endif
 		sg_mark_end(sg);
-	obj->pages = st;
+	obj->mm.pages = st;
 
 	if (i915_gem_gtt_prepare_object(obj)) {
-		obj->pages = NULL;
+		obj->mm.pages = NULL;
 		goto err;
 	}
 
@@ -106,7 +107,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	 * and the caller is expected to repopulate - the contents of this
 	 * object are only valid whilst active and pinned.
 	 */
-	obj->madv = I915_MADV_DONTNEED;
+	obj->mm.madv = I915_MADV_DONTNEED;
 	return 0;
 
 err:
@@ -117,10 +118,10 @@ err:
 
 static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
 {
-	internal_free_pages(obj->pages);
+	internal_free_pages(obj->mm.pages);
 
-	obj->dirty = 0;
-	obj->madv = I915_MADV_WILLNEED;
+	obj->mm.dirty = false;
+	obj->mm.madv = I915_MADV_WILLNEED;
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index adb00c5125ad..64d9712ec789 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -230,7 +230,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
 		return 0;
 
 	/* Recreate the page after shrinking */
-	if (!so->vma->obj->pages)
+	if (!so->vma->obj->mm.pages)
 		so->batch_offset = -1;
 
 	ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 1c237d02f30b..e0e6d68fe470 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -78,7 +78,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	 * to the GPU, simply unbinding from the GPU is not going to succeed
 	 * in releasing our pin count on the pages themselves.
 	 */
-	if (obj->pages_pin_count > obj->bind_count)
+	if (obj->mm.pages_pin_count > obj->bind_count)
 		return false;
 
 	if (any_vma_pinned(obj))
@@ -88,7 +88,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	 * discard the contents (because the user has marked them as being
 	 * purgeable) or if we can move their contents out to swap.
 	 */
-	return swap_available() || obj->madv == I915_MADV_DONTNEED;
+	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
 }
 
 /**
@@ -175,11 +175,11 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			list_move_tail(&obj->global_list, &still_in_list);
 
 			if (flags & I915_SHRINK_PURGEABLE &&
-			    obj->madv != I915_MADV_DONTNEED)
+			    obj->mm.madv != I915_MADV_DONTNEED)
 				continue;
 
 			if (flags & I915_SHRINK_VMAPS &&
-			    !is_vmalloc_addr(obj->mapping))
+			    !is_vmalloc_addr(obj->mm.mapping))
 				continue;
 
 			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
@@ -193,7 +193,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 			/* For the unbound phase, this should be a no-op! */
 			i915_gem_object_unbind(obj);
-			if (i915_gem_object_put_pages(obj) == 0)
+			if (__i915_gem_object_put_pages(obj) == 0)
 				count += obj->base.size >> PAGE_SHIFT;
 
 			i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 24bad4e60ef0..6d139d6b4609 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -554,16 +554,17 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
 {
 	/* Should only be called during free */
-	sg_free_table(obj->pages);
-	kfree(obj->pages);
+	sg_free_table(obj->mm.pages);
+	kfree(obj->mm.pages);
 }
 
-
 static void
 i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 
+	__i915_gem_object_unpin_pages(obj);
+
 	if (obj->stolen) {
 		i915_gem_stolen_remove_node(dev_priv, obj->stolen);
 		kfree(obj->stolen);
@@ -589,15 +590,16 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base, stolen->size);
 	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
 
-	obj->pages = i915_pages_create_for_stolen(dev,
-						  stolen->start, stolen->size);
-	if (obj->pages == NULL)
+	obj->mm.pages = i915_pages_create_for_stolen(dev,
+						     stolen->start,
+						     stolen->size);
+	if (!obj->mm.pages)
 		goto cleanup;
 
-	obj->get_page.sg_pos = obj->pages->sgl;
-	obj->get_page.sg_idx = 0;
+	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
 
-	i915_gem_object_pin_pages(obj);
+	__i915_gem_object_pin_pages(obj);
 	obj->stolen = stolen;
 
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
@@ -717,14 +719,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 		goto err;
 	}
 
-	vma->pages = obj->pages;
+	vma->pages = obj->mm.pages;
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
 	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
 	obj->bind_count++;
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	i915_gem_object_pin_pages(obj);
+	__i915_gem_object_pin_pages(obj);
 
 	return obj;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index a14b1e3d4c78..7286de7bd25e 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -260,13 +260,13 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		if (!err) {
 			struct i915_vma *vma;
 
-			if (obj->pages &&
-			    obj->madv == I915_MADV_WILLNEED &&
+			if (obj->mm.pages &&
+			    obj->mm.madv == I915_MADV_WILLNEED &&
 			    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 				if (args->tiling_mode == I915_TILING_NONE)
-					i915_gem_object_unpin_pages(obj);
+					__i915_gem_object_unpin_pages(obj);
 				if (!i915_gem_object_is_tiled(obj))
-					i915_gem_object_pin_pages(obj);
+					__i915_gem_object_pin_pages(obj);
 			}
 
 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index cb95789da76e..0fdd1d6723d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -73,10 +73,10 @@ static void cancel_userptr(struct work_struct *work)
 	/* Cancel any active worker and force us to re-evaluate gup */
 	obj->userptr.work = NULL;
 
-	if (obj->pages != NULL) {
+	if (obj->mm.pages) {
 		/* We are inside a kthread context and can't be interrupted */
 		WARN_ON(i915_gem_object_unbind(obj));
-		WARN_ON(i915_gem_object_put_pages(obj));
+		WARN_ON(__i915_gem_object_put_pages(obj));
 	}
 
 	i915_gem_object_put(obj);
@@ -432,15 +432,15 @@ __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
 {
 	int ret;
 
-	ret = st_set_pages(&obj->pages, pvec, num_pages);
+	ret = st_set_pages(&obj->mm.pages, pvec, num_pages);
 	if (ret)
 		return ret;
 
 	ret = i915_gem_gtt_prepare_object(obj);
 	if (ret) {
-		sg_free_table(obj->pages);
-		kfree(obj->pages);
-		obj->pages = NULL;
+		sg_free_table(obj->mm.pages);
+		kfree(obj->mm.pages);
+		obj->mm.pages = NULL;
 	}
 
 	return ret;
@@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			if (ret == 0) {
 				list_add_tail(&obj->global_list,
 					      &to_i915(dev)->mm.unbound_list);
-				obj->get_page.sg_pos = obj->pages->sgl;
-				obj->get_page.sg_idx = 0;
+				obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
+				obj->mm.get_page.sg_idx = 0;
 				pinned = 0;
 			}
 		}
@@ -668,22 +668,22 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 	BUG_ON(obj->userptr.work != NULL);
 	__i915_gem_userptr_set_active(obj, false);
 
-	if (obj->madv != I915_MADV_WILLNEED)
-		obj->dirty = 0;
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		obj->mm.dirty = false;
 
 	i915_gem_gtt_finish_object(obj);
 
-	for_each_sgt_page(page, sgt_iter, obj->pages) {
-		if (obj->dirty)
+	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
+		if (obj->mm.dirty)
 			set_page_dirty(page);
 
 		mark_page_accessed(page);
 		put_page(page);
 	}
-	obj->dirty = 0;
+	obj->mm.dirty = false;
 
-	sg_free_table(obj->pages);
-	kfree(obj->pages);
+	sg_free_table(obj->mm.pages);
+	kfree(obj->mm.pages);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6c22be28ba01..628d5cdf9200 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -881,8 +881,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->write_domain = obj->base.write_domain;
 	err->fence_reg = vma->fence ? vma->fence->id : -1;
 	err->tiling = i915_gem_object_get_tiling(obj);
-	err->dirty = obj->dirty;
-	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->dirty = obj->mm.dirty;
+	err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
 	err->userptr = obj->userptr.mm != NULL;
 	err->cache_level = obj->cache_level;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 02c48d390148..6acecfc41f5e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -744,7 +744,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
 	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
 		i915_ggtt_offset(ce->ring->vma);
 
-	ce->state->obj->dirty = true;
+	ce->state->obj->mm.dirty = true;
 
 	/* Invalidate GuC TLB. */
 	if (i915.enable_guc_submission) {
@@ -2042,7 +2042,7 @@ populate_lr_context(struct i915_gem_context *ctx,
 		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
 		return ret;
 	}
-	ctx_obj->dirty = true;
+	ctx_obj->mm.dirty = true;
 
 	/* The second page of the context object contains some fields which must
 	 * be set up prior to the first execution. */
@@ -2179,7 +2179,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			reg[CTX_RING_HEAD+1] = 0;
 			reg[CTX_RING_TAIL+1] = 0;
 
-			ce->state->obj->dirty = true;
+			ce->state->obj->mm.dirty = true;
 			i915_gem_object_unpin_map(ce->state->obj);
 
 			ce->ring->head = ce->ring->tail = 0;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (15 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 16/42] drm/i915: Refactor object page API Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-14  9:12   ` Joonas Lahtinen
  2016-10-14  9:28   ` Tvrtko Ursulin
  2016-10-07  9:46 ` [PATCH 18/42] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
                   ` (27 subsequent siblings)
  44 siblings, 2 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

The plan is to move obj->pages out from under the struct_mutex into its
own per-object lock. We need to prune any assumption of the struct_mutex
from the get_pages/put_pages backends, and to make it easier we pass
around the sg_table to operate on rather than indirectly via the obj.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          |  36 +++++--
 drivers/gpu/drm/i915/i915_gem.c          | 172 +++++++++++++++----------------
 drivers/gpu/drm/i915/i915_gem_dmabuf.c   |  20 ++--
 drivers/gpu/drm/i915/i915_gem_fence.c    |  18 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c      |  19 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.h      |   6 +-
 drivers/gpu/drm/i915/i915_gem_internal.c |  22 ++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  11 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c   |  43 ++++----
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  88 ++++++++--------
 10 files changed, 227 insertions(+), 208 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3c22d49005fe..271e63c8f037 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2175,8 +2175,8 @@ struct drm_i915_gem_object_ops {
 	 * being released or under memory pressure (where we attempt to
 	 * reap pages for the shrinker).
 	 */
-	int (*get_pages)(struct drm_i915_gem_object *);
-	void (*put_pages)(struct drm_i915_gem_object *);
+	struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
+	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
 
 	int (*dmabuf_export)(struct drm_i915_gem_object *);
 	void (*release)(struct drm_i915_gem_object *);
@@ -2313,8 +2313,6 @@ struct drm_i915_gem_object {
 	struct i915_gem_userptr {
 		uintptr_t ptr;
 		unsigned read_only :1;
-		unsigned workers :4;
-#define I915_GEM_USERPTR_MAX_WORKERS 15
 
 		struct i915_mm_struct *mm;
 		struct i915_mmu_object *mmu_object;
@@ -2376,6 +2374,19 @@ __deprecated
 extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
 
 static inline bool
+i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
+{
+	return atomic_read(&obj->base.refcount.refcount) == 0;
+}
+
+#if IS_ENABLED(CONFIG_LOCKDEP)
+#define lockdep_assert_held_unless(lock, cond) \
+	GEM_BUG_ON(!lockdep_is_held(lock) && !(cond))
+#else
+#define lockdep_assert_held_unless(lock, cond)
+#endif
+
+static inline bool
 i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 {
 	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
@@ -3188,6 +3199,8 @@ dma_addr_t
 i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
 				unsigned long n);
 
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages);
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 
 static inline int __must_check
@@ -3203,7 +3216,8 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 static inline void
 __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
+				   i915_gem_object_is_dead(obj));
 	GEM_BUG_ON(!obj->mm.pages);
 	obj->mm.pages_pin_count++;
 }
@@ -3217,7 +3231,8 @@ i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
 static inline void
 __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
+				   i915_gem_object_is_dead(obj));
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 	GEM_BUG_ON(!obj->mm.pages);
 	obj->mm.pages_pin_count--;
@@ -3228,7 +3243,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_unpin_pages(obj);
 }
 
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
 
 enum i915_map_type {
 	I915_MAP_WB = 0,
@@ -3451,8 +3467,10 @@ i915_vma_unpin_fence(struct i915_vma *vma)
 void i915_gem_restore_fences(struct drm_device *dev);
 
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
-void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
-void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+				       struct sg_table *pages);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+					 struct sg_table *pages);
 
 /* i915_gem_context.c */
 int __must_check i915_gem_context_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index df774ddf62ae..620fe8ac6477 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -169,7 +169,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int
+static struct sg_table *
 i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
 	struct address_space *mapping = obj->base.filp->f_mapping;
@@ -179,7 +179,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	int i;
 
 	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
 		struct page *page;
@@ -187,7 +187,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 
 		page = shmem_read_mapping_page(mapping, i);
 		if (IS_ERR(page))
-			return PTR_ERR(page);
+			return ERR_CAST(page);
 
 		src = kmap_atomic(page);
 		memcpy(vaddr, src, PAGE_SIZE);
@@ -202,11 +202,11 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (st == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
 		kfree(st);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	sg = st->sgl;
@@ -216,28 +216,30 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 	sg_dma_address(sg) = obj->phys_handle->busaddr;
 	sg_dma_len(sg) = obj->base.size;
 
-	obj->mm.pages = st;
-	return 0;
+	return st;
 }
 
 static void
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj)
 {
-	int ret;
-
 	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
 
-	ret = i915_gem_object_set_to_cpu_domain(obj, true);
-	if (WARN_ON(ret)) {
-		/* In the event of a disaster, abandon all caches and
-		 * hope for the best.
-		 */
-		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
-	}
-
 	if (obj->mm.madv == I915_MADV_DONTNEED)
 		obj->mm.dirty = false;
 
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
+		i915_gem_clflush_object(obj, false);
+
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
+{
+	__i915_gem_object_release_shmem(obj);
+
 	if (obj->mm.dirty) {
 		struct address_space *mapping = obj->base.filp->f_mapping;
 		char *vaddr = obj->phys_handle->vaddr;
@@ -265,8 +267,8 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
 		obj->mm.dirty = false;
 	}
 
-	sg_free_table(obj->mm.pages);
-	kfree(obj->mm.pages);
+	sg_free_table(pages);
+	kfree(pages);
 }
 
 static void
@@ -517,9 +519,9 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_object_put_pages(obj);
-	if (ret)
-		return ret;
+	__i915_gem_object_put_pages(obj);
+	if (obj->mm.pages)
+		return -EBUSY;
 
 	/* create a new object */
 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
@@ -535,7 +537,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 static int
 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_pwrite *args,
-		     struct drm_file *file_priv)
+		     struct drm_file *file)
 {
 	struct drm_device *dev = obj->base.dev;
 	void *vaddr = obj->phys_handle->vaddr + args->offset;
@@ -551,7 +553,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 				   I915_WAIT_LOCKED |
 				   I915_WAIT_ALL,
 				   MAX_SCHEDULE_TIMEOUT,
-				   to_rps_client(file_priv));
+				   to_rps_client(file));
 	if (ret)
 		return ret;
 
@@ -2225,8 +2227,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 }
 
 /* Try to discard unwanted pages */
-static void
-i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
+void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 {
 	struct address_space *mapping;
 
@@ -2245,32 +2246,20 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 }
 
 static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
+i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
+			      struct sg_table *pages)
 {
 	struct sgt_iter sgt_iter;
 	struct page *page;
-	int ret;
 
-	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
-
-	ret = i915_gem_object_set_to_cpu_domain(obj, true);
-	if (WARN_ON(ret)) {
-		/* In the event of a disaster, abandon all caches and
-		 * hope for the best.
-		 */
-		i915_gem_clflush_object(obj, true);
-		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
-	}
+	__i915_gem_object_release_shmem(obj);
 
-	i915_gem_gtt_finish_object(obj);
+	i915_gem_gtt_finish_pages(obj, pages);
 
 	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_save_bit_17_swizzle(obj);
-
-	if (obj->mm.madv == I915_MADV_DONTNEED)
-		obj->mm.dirty = false;
+		i915_gem_object_save_bit_17_swizzle(obj, pages);
 
-	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
+	for_each_sgt_page(page, sgt_iter, pages) {
 		if (obj->mm.dirty)
 			set_page_dirty(page);
 
@@ -2281,8 +2270,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 	}
 	obj->mm.dirty = false;
 
-	sg_free_table(obj->mm.pages);
-	kfree(obj->mm.pages);
+	sg_free_table(pages);
+	kfree(pages);
 }
 
 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
@@ -2294,24 +2283,22 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
 }
 
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
-	const struct drm_i915_gem_object_ops *ops = obj->ops;
+	struct sg_table *pages;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	if (!obj->mm.pages)
-		return 0;
-
 	if (i915_gem_object_has_pinned_pages(obj))
-		return -EBUSY;
+		return;
 
 	GEM_BUG_ON(obj->bind_count);
 
 	/* ->put_pages might need to allocate memory for the bit17 swizzle
 	 * array, hence protect them from being reaped by removing them from gtt
 	 * lists early. */
-	list_del(&obj->global_list);
+	pages = fetch_and_zero(&obj->mm.pages);
+	GEM_BUG_ON(!pages);
 
 	if (obj->mm.mapping) {
 		void *ptr;
@@ -2327,15 +2314,10 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 
 	__i915_gem_object_reset_page_iter(obj);
 
-	ops->put_pages(obj);
-	obj->mm.pages = NULL;
-
-	i915_gem_object_invalidate(obj);
-
-	return 0;
+	obj->ops->put_pages(obj, pages);
 }
 
-static int
+static struct sg_table *
 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
@@ -2353,17 +2335,17 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
 	 * a GPU cache
 	 */
-	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
-	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (st == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	page_count = obj->base.size / PAGE_SIZE;
 	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
 		kfree(st);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	/* Get the list of pages out of our struct file.  They'll be pinned
@@ -2423,20 +2405,19 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	if (!swiotlb_nr_tbl())
 #endif
 		sg_mark_end(sg);
-	obj->mm.pages = st;
 
-	ret = i915_gem_gtt_prepare_object(obj);
+	ret = i915_gem_gtt_prepare_pages(obj, st);
 	if (ret)
 		goto err_pages;
 
 	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_do_bit_17_swizzle(obj);
+		i915_gem_object_do_bit_17_swizzle(obj, st);
 
 	if (i915_gem_object_is_tiled(obj) &&
 	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		__i915_gem_object_pin_pages(obj);
 
-	return 0;
+	return st;
 
 err_pages:
 	sg_mark_end(sg);
@@ -2456,7 +2437,35 @@ err_pages:
 	if (ret == -ENOSPC)
 		ret = -ENOMEM;
 
-	return ret;
+	return ERR_PTR(ret);
+}
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages)
+{
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	obj->mm.get_page.sg_pos = pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
+
+	obj->mm.pages = pages;
+}
+
+static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *pages;
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		DRM_DEBUG("Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	pages = obj->ops->get_pages(obj);
+	if (unlikely(IS_ERR(pages)))
+		return PTR_ERR(pages);
+
+	__i915_gem_object_set_pages(obj, pages);
+	return 0;
 }
 
 /* Ensure that the associated pages are gathered from the backing storage
@@ -2468,33 +2477,18 @@ err_pages:
  */
 int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	const struct drm_i915_gem_object_ops *ops = obj->ops;
-	int ret;
+	int err;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
 	if (obj->mm.pages)
 		return 0;
 
-	if (obj->mm.madv != I915_MADV_WILLNEED) {
-		DRM_DEBUG("Attempting to obtain a purgeable object\n");
-		__i915_gem_object_unpin_pages(obj);
-		return -EFAULT;
-	}
-
-	ret = ops->get_pages(obj);
-	if (ret) {
+	err = ____i915_gem_object_get_pages(obj);
+	if (err)
 		__i915_gem_object_unpin_pages(obj);
-		return ret;
-	}
 
-	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
-
-	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
-	obj->mm.get_page.sg_idx = 0;
-
-	return 0;
+	return err;
 }
 
 /* The 'mapping' part of i915_gem_object_pin_map() below */
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 10441dc72e73..2abd524aba14 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -289,22 +289,18 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 	return dma_buf;
 }
 
-static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
+static struct sg_table *
+i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
 {
-	struct sg_table *sg;
-
-	sg = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL);
-	if (IS_ERR(sg))
-		return PTR_ERR(sg);
-
-	obj->mm.pages = sg;
-	return 0;
+	return dma_buf_map_attachment(obj->base.import_attach,
+				      DMA_BIDIRECTIONAL);
 }
 
-static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
+static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
+					     struct sg_table *pages)
 {
-	dma_buf_unmap_attachment(obj->base.import_attach,
-				 obj->mm.pages, DMA_BIDIRECTIONAL);
+	dma_buf_unmap_attachment(obj->base.import_attach, pages,
+				 DMA_BIDIRECTIONAL);
 }
 
 static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 398856160656..834d649496f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -628,6 +628,7 @@ i915_gem_swizzle_page(struct page *page)
 /**
  * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
  * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
  *
  * This function fixes up the swizzling in case any page frame number for this
  * object has changed in bit 17 since that state has been saved with
@@ -638,7 +639,8 @@ i915_gem_swizzle_page(struct page *page)
  * by swapping them out and back in again).
  */
 void
-i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
+i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+				  struct sg_table *pages)
 {
 	struct sgt_iter sgt_iter;
 	struct page *page;
@@ -648,10 +650,9 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 		return;
 
 	i = 0;
-	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
+	for_each_sgt_page(page, sgt_iter, pages) {
 		char new_bit_17 = page_to_phys(page) >> 17;
-		if ((new_bit_17 & 0x1) !=
-		    (test_bit(i, obj->bit_17) != 0)) {
+		if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
 			i915_gem_swizzle_page(page);
 			set_page_dirty(page);
 		}
@@ -662,21 +663,22 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 /**
  * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
  * @obj: i915 GEM buffer object
+ * @pages: the scattergather list of physical pages
  *
  * This function saves the bit 17 of each page frame number so that swizzling
  * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
  * be called before the backing storage can be unpinned.
  */
 void
-i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
+i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+				    struct sg_table *pages)
 {
 	struct sgt_iter sgt_iter;
 	struct page *page;
-	int page_count = obj->base.size >> PAGE_SHIFT;
 	int i;
 
 	if (obj->bit_17 == NULL) {
-		obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
+		obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT),
 				      sizeof(long), GFP_KERNEL);
 		if (obj->bit_17 == NULL) {
 			DRM_ERROR("Failed to allocate memory for bit 17 "
@@ -687,7 +689,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
 
 	i = 0;
 
-	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
+	for_each_sgt_page(page, sgt_iter, pages) {
 		if (page_to_phys(page) & (1 << 17))
 			__set_bit(i, obj->bit_17);
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 3c711e0b8a3f..578026584f42 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2292,14 +2292,15 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
 	i915_ggtt_flush(dev_priv);
 }
 
-int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
+int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
 {
-	if (!dma_map_sg(&obj->base.dev->pdev->dev,
-			obj->mm.pages->sgl, obj->mm.pages->nents,
-			PCI_DMA_BIDIRECTIONAL))
-		return -ENOSPC;
+	if (dma_map_sg(&obj->base.dev->pdev->dev,
+		       pages->sgl, pages->nents,
+		       PCI_DMA_BIDIRECTIONAL))
+		return 0;
 
-	return 0;
+	return -ENOSPC;
 }
 
 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
@@ -2671,7 +2672,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 					 true);
 }
 
-void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
+void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct device *kdev = &dev_priv->drm.pdev->dev;
@@ -2685,8 +2687,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
 		}
 	}
 
-	dma_unmap_sg(kdev, obj->mm.pages->sgl, obj->mm.pages->nents,
-		     PCI_DMA_BIDIRECTIONAL);
+	dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
 }
 
 static void i915_gtt_color_adjust(struct drm_mm_node *node,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index bd93fb8f99d2..c34945f21323 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -629,8 +629,10 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 
-int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
-void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
+					    struct sg_table *pages);
+void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages);
 
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT(0)
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 24eb347f9e0a..b0c56b311248 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -38,7 +38,8 @@ static void internal_free_pages(struct sg_table *st)
 	kfree(st);
 }
 
-static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+static struct sg_table *
+i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 {
 	const unsigned int npages = obj->base.size / PAGE_SIZE;
 	struct sg_table *st;
@@ -49,11 +50,11 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
 		kfree(st);
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	sg = st->sgl;
@@ -95,12 +96,9 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	if (!swiotlb_nr_tbl())
 #endif
 		sg_mark_end(sg);
-	obj->mm.pages = st;
 
-	if (i915_gem_gtt_prepare_object(obj)) {
-		obj->mm.pages = NULL;
+	if (i915_gem_gtt_prepare_pages(obj, st))
 		goto err;
-	}
 
 	/* Mark the pages as dontneed whilst they are still pinned. As soon
 	 * as they are unpinned they are allowed to be reaped by the shrinker,
@@ -108,17 +106,19 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	 * object are only valid whilst active and pinned.
 	 */
 	obj->mm.madv = I915_MADV_DONTNEED;
-	return 0;
+	return st;
 
 err:
 	sg_mark_end(sg);
 	internal_free_pages(st);
-	return -ENOMEM;
+	return ERR_PTR(-ENOMEM);
 }
 
-static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
+static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
+					       struct sg_table *pages)
 {
-	internal_free_pages(obj->mm.pages);
+	i915_gem_gtt_finish_pages(obj, pages);
+	internal_free_pages(pages);
 
 	obj->mm.dirty = false;
 	obj->mm.madv = I915_MADV_WILLNEED;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index e0e6d68fe470..a8c3d37b95b9 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -91,6 +91,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
 }
 
+static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
+{
+	if (i915_gem_object_unbind(obj) == 0)
+		__i915_gem_object_put_pages(obj);
+	return !obj->mm.pages;
+}
+
 /**
  * i915_gem_shrink - Shrink buffer object caches
  * @dev_priv: i915 device
@@ -191,9 +198,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 			i915_gem_object_get(obj);
 
-			/* For the unbound phase, this should be a no-op! */
-			i915_gem_object_unbind(obj);
-			if (__i915_gem_object_put_pages(obj) == 0)
+			if (unsafe_drop_pages(obj))
 				count += obj->base.size >> PAGE_SHIFT;
 
 			i915_gem_object_put(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 6d139d6b4609..4166eafa25dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -545,17 +545,20 @@ i915_pages_create_for_stolen(struct drm_device *dev,
 	return st;
 }
 
-static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+static struct sg_table *
+i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 {
-	BUG();
-	return -EINVAL;
+	return i915_pages_create_for_stolen(obj->base.dev,
+					    obj->stolen->start,
+					    obj->stolen->size);
 }
 
-static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
+static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
+					     struct sg_table *pages)
 {
 	/* Should only be called during free */
-	sg_free_table(obj->mm.pages);
-	kfree(obj->mm.pages);
+	sg_free_table(pages);
+	kfree(pages);
 }
 
 static void
@@ -590,21 +593,13 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base, stolen->size);
 	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
 
-	obj->mm.pages = i915_pages_create_for_stolen(dev,
-						     stolen->start,
-						     stolen->size);
-	if (!obj->mm.pages)
-		goto cleanup;
-
-	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
-	obj->mm.get_page.sg_idx = 0;
-
-	__i915_gem_object_pin_pages(obj);
 	obj->stolen = stolen;
-
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
 	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
 
+	if (i915_gem_object_pin_pages(obj))
+		goto cleanup;
+
 	return obj;
 
 cleanup:
@@ -699,10 +694,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 	if (gtt_offset == I915_GTT_OFFSET_NONE)
 		return obj;
 
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
 	vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
-		goto err;
+		goto err_pages;
 	}
 
 	/* To simplify the initialisation sequence between KMS and GTT,
@@ -716,20 +715,20 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 	ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
 	if (ret) {
 		DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
-		goto err;
+		goto err_pages;
 	}
 
 	vma->pages = obj->mm.pages;
 	vma->flags |= I915_VMA_GLOBAL_BIND;
 	__i915_vma_set_map_and_fenceable(vma);
 	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
+	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
 	obj->bind_count++;
 
-	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	__i915_gem_object_pin_pages(obj);
-
 	return obj;
 
+err_pages:
+	i915_gem_object_unpin_pages(obj);
 err:
 	i915_gem_object_put(obj);
 	return NULL;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 0fdd1d6723d1..e97c7b589439 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -73,11 +73,14 @@ static void cancel_userptr(struct work_struct *work)
 	/* Cancel any active worker and force us to re-evaluate gup */
 	obj->userptr.work = NULL;
 
-	if (obj->mm.pages) {
-		/* We are inside a kthread context and can't be interrupted */
-		WARN_ON(i915_gem_object_unbind(obj));
-		WARN_ON(__i915_gem_object_put_pages(obj));
-	}
+	/* We are inside a kthread context and can't be interrupted */
+	if (i915_gem_object_unbind(obj) == 0)
+		__i915_gem_object_put_pages(obj);
+	WARN_ONCE(obj->mm.pages,
+		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n",
+		  obj->bind_count,
+		  obj->mm.pages_pin_count,
+		  obj->pin_display);
 
 	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
@@ -426,24 +429,25 @@ err:
 	return ret;
 }
 
-static int
+static struct sg_table *
 __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
 			     struct page **pvec, int num_pages)
 {
+	struct sg_table *pages;
 	int ret;
 
-	ret = st_set_pages(&obj->mm.pages, pvec, num_pages);
+	ret = st_set_pages(&pages, pvec, num_pages);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
-	ret = i915_gem_gtt_prepare_object(obj);
+	ret = i915_gem_gtt_prepare_pages(obj, pages);
 	if (ret) {
-		sg_free_table(obj->mm.pages);
-		kfree(obj->mm.pages);
-		obj->mm.pages = NULL;
+		sg_free_table(pages);
+		kfree(pages);
+		return ERR_PTR(ret);
 	}
 
-	return ret;
+	return pages;
 }
 
 static int
@@ -521,20 +525,20 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 	mutex_lock(&dev->struct_mutex);
 	if (obj->userptr.work == &work->work) {
+		struct sg_table *pages = ERR_PTR(ret);
+
 		if (pinned == npages) {
-			ret = __i915_gem_userptr_set_pages(obj, pvec, npages);
-			if (ret == 0) {
-				list_add_tail(&obj->global_list,
-					      &to_i915(dev)->mm.unbound_list);
-				obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
-				obj->mm.get_page.sg_idx = 0;
+			pages = __i915_gem_userptr_set_pages(obj, pvec, npages);
+			if (!IS_ERR(pages)) {
+				__i915_gem_object_set_pages(obj, pages);
 				pinned = 0;
+				pages = NULL;
 			}
 		}
-		obj->userptr.work = ERR_PTR(ret);
+
+		obj->userptr.work = ERR_CAST(pages);
 	}
 
-	obj->userptr.workers--;
 	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -545,7 +549,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 	kfree(work);
 }
 
-static int
+static struct sg_table *
 __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
 				      bool *active)
 {
@@ -570,15 +574,11 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
 	 * that error back to this function through
 	 * obj->userptr.work = ERR_PTR.
 	 */
-	if (obj->userptr.workers >= I915_GEM_USERPTR_MAX_WORKERS)
-		return -EAGAIN;
-
 	work = kmalloc(sizeof(*work), GFP_KERNEL);
 	if (work == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	obj->userptr.work = &work->work;
-	obj->userptr.workers++;
 
 	work->obj = i915_gem_object_get(obj);
 
@@ -589,14 +589,15 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
 	schedule_work(&work->work);
 
 	*active = true;
-	return -EAGAIN;
+	return ERR_PTR(-EAGAIN);
 }
 
-static int
+static struct sg_table *
 i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 {
 	const int num_pages = obj->base.size >> PAGE_SHIFT;
 	struct page **pvec;
+	struct sg_table *pages;
 	int pinned, ret;
 	bool active;
 
@@ -620,15 +621,15 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	if (obj->userptr.work) {
 		/* active flag should still be held for the pending work */
 		if (IS_ERR(obj->userptr.work))
-			return PTR_ERR(obj->userptr.work);
+			return ERR_CAST(obj->userptr.work);
 		else
-			return -EAGAIN;
+			return ERR_PTR(-EAGAIN);
 	}
 
 	/* Let the mmu-notifier know that we have begun and need cancellation */
 	ret = __i915_gem_userptr_set_active(obj, true);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	pvec = NULL;
 	pinned = 0;
@@ -637,7 +638,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 				      GFP_TEMPORARY);
 		if (pvec == NULL) {
 			__i915_gem_userptr_set_active(obj, false);
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 		}
 
 		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
@@ -646,21 +647,22 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 
 	active = false;
 	if (pinned < 0)
-		ret = pinned, pinned = 0;
+		pages = ERR_PTR(pinned), pinned = 0;
 	else if (pinned < num_pages)
-		ret = __i915_gem_userptr_get_pages_schedule(obj, &active);
+		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
 	else
-		ret = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
-	if (ret) {
+		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
+	if (IS_ERR(pages)) {
 		__i915_gem_userptr_set_active(obj, active);
 		release_pages(pvec, pinned, 0);
 	}
 	drm_free_large(pvec);
-	return ret;
+	return pages;
 }
 
 static void
-i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
+			   struct sg_table *pages)
 {
 	struct sgt_iter sgt_iter;
 	struct page *page;
@@ -671,9 +673,9 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 	if (obj->mm.madv != I915_MADV_WILLNEED)
 		obj->mm.dirty = false;
 
-	i915_gem_gtt_finish_object(obj);
+	i915_gem_gtt_finish_pages(obj, pages);
 
-	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
+	for_each_sgt_page(page, sgt_iter, pages) {
 		if (obj->mm.dirty)
 			set_page_dirty(page);
 
@@ -682,8 +684,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
 	}
 	obj->mm.dirty = false;
 
-	sg_free_table(obj->mm.pages);
-	kfree(obj->mm.pages);
+	sg_free_table(pages);
+	kfree(pages);
 }
 
 static void
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 18/42] drm/i915: Move object backing storage manipulation to its own locking
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (16 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-13 12:46   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 19/42] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
                   ` (26 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Break the allocation of the backing storage away from struct_mutex into
a per-object lock. This allows parallel page allocation, provided we can
do so outside of struct_mutex (i.e. set-domain-ioctl, pwrite, GTT
fault), i.e. before execbuf! The increased cost of the atomic counters
are hidden behind i915_vma_pin() for the typical case of execbuf, i.e.
as the object is typically bound between execbufs, the page_pin_count is
static. The cost will be felt around set-domain and pwrite, but offset
by the improvement from reduced struct_mutex contention.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 26 ++++------
 drivers/gpu/drm/i915/i915_gem.c          | 89 +++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 51 ++++++++++++------
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  2 +
 drivers/gpu/drm/i915/i915_gem_userptr.c  | 10 ++--
 5 files changed, 110 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 271e63c8f037..7e9df190c891 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2266,7 +2266,8 @@ struct drm_i915_gem_object {
 	unsigned int pin_display;
 
 	struct {
-		unsigned int pages_pin_count;
+		struct mutex lock;
+		atomic_t pages_pin_count;
 
 		struct sg_table *pages;
 		void *mapping;
@@ -3206,8 +3207,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
 static inline int __must_check
 i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-	if (obj->mm.pages_pin_count++)
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
 		return 0;
 
 	return __i915_gem_object_get_pages(obj);
@@ -3216,29 +3216,26 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 static inline void
 __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
-				   i915_gem_object_is_dead(obj));
 	GEM_BUG_ON(!obj->mm.pages);
-	obj->mm.pages_pin_count++;
+	atomic_inc(&obj->mm.pages_pin_count);
 }
 
 static inline bool
 i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
 {
-	return obj->mm.pages_pin_count;
+	return atomic_read(&obj->mm.pages_pin_count);
 }
 
 static inline void
 __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
-				   i915_gem_object_is_dead(obj));
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 	GEM_BUG_ON(!obj->mm.pages);
-	obj->mm.pages_pin_count--;
+	atomic_dec(&obj->mm.pages_pin_count);
 }
 
-static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 {
 	__i915_gem_object_unpin_pages(obj);
 }
@@ -3261,8 +3258,8 @@ enum i915_map_type {
  * the kernel address space. Based on the @type of mapping, the PTE will be
  * set to either WriteBack or WriteCombine (via pgprot_t).
  *
- * The caller must hold the struct_mutex, and is responsible for calling
- * i915_gem_object_unpin_map() when the mapping is no longer required.
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
  *
  * Returns the pointer through which to access the mapped object, or an
  * ERR_PTR() on error.
@@ -3278,12 +3275,9 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
  * with your access, call i915_gem_object_unpin_map() to release the pin
  * upon the mapping. Once the pin count reaches zero, that mapping may be
  * removed.
- *
- * The caller must hold the struct_mutex.
  */
 static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 620fe8ac6477..f4a773ca9333 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2231,6 +2231,9 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
 {
 	struct address_space *mapping;
 
+	lockdep_assert_held(&obj->mm.lock);
+	GEM_BUG_ON(obj->mm.pages);
+
 	switch (obj->mm.madv) {
 	case I915_MADV_DONTNEED:
 		i915_gem_object_truncate(obj);
@@ -2287,12 +2290,17 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 {
 	struct sg_table *pages;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	if (i915_gem_object_has_pinned_pages(obj))
 		return;
 
 	GEM_BUG_ON(obj->bind_count);
+	if (!READ_ONCE(obj->mm.pages))
+		return;
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, SINGLE_DEPTH_NESTING);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
+		goto unlock;
 
 	/* ->put_pages might need to allocate memory for the bit17 swizzle
 	 * array, hence protect them from being reaped by removing them from gtt
@@ -2315,6 +2323,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
 	__i915_gem_object_reset_page_iter(obj);
 
 	obj->ops->put_pages(obj, pages);
+unlock:
+	mutex_unlock(&obj->mm.lock);
 }
 
 static struct sg_table *
@@ -2443,7 +2453,7 @@ err_pages:
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 				 struct sg_table *pages)
 {
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	lockdep_assert_held(&obj->mm.lock);
 
 	obj->mm.get_page.sg_pos = pages->sgl;
 	obj->mm.get_page.sg_idx = 0;
@@ -2469,9 +2479,9 @@ static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 }
 
 /* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_get_pages() may be called
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
  * multiple times before they are released by a single call to
- * i915_gem_object_put_pages() - once the pages are no longer referenced
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
  * either as a result of memory pressure (reaping pages under the shrinker)
  * or as the object is itself released.
  */
@@ -2479,15 +2489,23 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
 	int err;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return err;
 
-	if (obj->mm.pages)
-		return 0;
+	if (likely(obj->mm.pages)) {
+		__i915_gem_object_pin_pages(obj);
+		goto unlock;
+	}
+
+	GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
 
 	err = ____i915_gem_object_get_pages(obj);
-	if (err)
-		__i915_gem_object_unpin_pages(obj);
+	if (!err)
+		atomic_set_release(&obj->mm.pages_pin_count, 1);
 
+unlock:
+	mutex_unlock(&obj->mm.lock);
 	return err;
 }
 
@@ -2547,20 +2565,29 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	void *ptr;
 	int ret;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
 
-	ret = i915_gem_object_pin_pages(obj);
+	ret = mutex_lock_interruptible(&obj->mm.lock);
 	if (ret)
 		return ERR_PTR(ret);
 
-	pinned = obj->mm.pages_pin_count > 1;
+	pinned = true;
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		ret = ____i915_gem_object_get_pages(obj);
+		if (ret)
+			goto err_unlock;
+
+		GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count));
+		atomic_set_release(&obj->mm.pages_pin_count, 1);
+		pinned = false;
+	}
+	GEM_BUG_ON(!obj->mm.pages);
 
 	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
-			goto err;
+			goto err_unpin;
 		}
 
 		if (is_vmalloc_addr(ptr))
@@ -2575,17 +2602,21 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 		ptr = i915_gem_object_map(obj, type);
 		if (!ptr) {
 			ret = -ENOMEM;
-			goto err;
+			goto err_unpin;
 		}
 
 		obj->mm.mapping = ptr_pack_bits(ptr, type);
 	}
 
+out_unlock:
+	mutex_unlock(&obj->mm.lock);
 	return ptr;
 
-err:
-	i915_gem_object_unpin_pages(obj);
-	return ERR_PTR(ret);
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+	ptr = ERR_PTR(ret);
+	goto out_unlock;
 }
 
 static void
@@ -4184,15 +4215,13 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	    return -EINVAL;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
 	obj = i915_gem_object_lookup(file_priv, args->handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto unlock;
-	}
+	if (!obj)
+		return -ENOENT;
+
+	ret = mutex_lock_interruptible(&obj->mm.lock);
+	if (ret)
+		goto err;
 
 	if (obj->mm.pages &&
 	    i915_gem_object_is_tiled(obj) &&
@@ -4211,10 +4240,10 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		i915_gem_object_truncate(obj);
 
 	args->retained = obj->mm.madv != __I915_MADV_PURGED;
+	mutex_unlock(&obj->mm.lock);
 
+err:
 	i915_gem_object_put(obj);
-unlock:
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
@@ -4223,6 +4252,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 {
 	int i;
 
+	mutex_init(&obj->mm.lock);
+
 	INIT_LIST_HEAD(&obj->global_list);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
 		init_request_active(&obj->last_read[i],
@@ -4369,7 +4400,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		obj->ops->release(obj);
 
 	if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
-		obj->mm.pages_pin_count = 0;
+		atomic_set(&obj->mm.pages_pin_count, 0);
 	if (discard_backing_storage(obj))
 		obj->mm.madv = I915_MADV_DONTNEED;
 	__i915_gem_object_put_pages(obj);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index a8c3d37b95b9..868da0bae751 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -48,6 +48,20 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 #endif
 }
 
+static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+{
+	if (!mutex_trylock(&dev->struct_mutex)) {
+		if (!mutex_is_locked_by(&dev->struct_mutex, current))
+			return false;
+
+		*unlock = false;
+	} else {
+		*unlock = true;
+	}
+
+	return true;
+}
+
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -66,6 +80,9 @@ static bool swap_available(void)
 
 static bool can_release_pages(struct drm_i915_gem_object *obj)
 {
+	if (!obj->mm.pages)
+		return false;
+
 	/* Only shmemfs objects are backed by swap */
 	if (!obj->base.filp)
 		return false;
@@ -78,7 +95,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	 * to the GPU, simply unbinding from the GPU is not going to succeed
 	 * in releasing our pin count on the pages themselves.
 	 */
-	if (obj->mm.pages_pin_count > obj->bind_count)
+	if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count)
 		return false;
 
 	if (any_vma_pinned(obj))
@@ -95,7 +112,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 {
 	if (i915_gem_object_unbind(obj) == 0)
 		__i915_gem_object_put_pages(obj);
-	return !obj->mm.pages;
+	return !READ_ONCE(obj->mm.pages);
 }
 
 /**
@@ -135,6 +152,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		{ NULL, 0 },
 	}, *phase;
 	unsigned long count = 0;
+	bool unlock;
+
+	if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+		return 0;
 
 	trace_i915_gem_shrink(dev_priv, target, flags);
 	i915_gem_retire_requests(dev_priv);
@@ -198,8 +219,14 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 
 			i915_gem_object_get(obj);
 
-			if (unsafe_drop_pages(obj))
-				count += obj->base.size >> PAGE_SHIFT;
+			if (unsafe_drop_pages(obj)) {
+				mutex_lock(&obj->mm.lock);
+				if (!obj->mm.pages) {
+					__i915_gem_object_invalidate(obj);
+					count += obj->base.size >> PAGE_SHIFT;
+				}
+				mutex_unlock(&obj->mm.lock);
+			}
 
 			i915_gem_object_put(obj);
 		}
@@ -210,6 +237,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		intel_runtime_pm_put(dev_priv);
 
 	i915_gem_retire_requests(dev_priv);
+	if (unlock)
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+
 	/* expedite the RCU grace period to free some request slabs */
 	synchronize_rcu_expedited();
 
@@ -243,19 +273,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 	return freed;
 }
 
-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
-{
-	if (!mutex_trylock(&dev->struct_mutex)) {
-		if (!mutex_is_locked_by(&dev->struct_mutex, current))
-			return false;
-
-		*unlock = false;
-	} else
-		*unlock = true;
-
-	return true;
-}
-
 static unsigned long
 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 7286de7bd25e..11e2e0f57ac1 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -260,6 +260,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		if (!err) {
 			struct i915_vma *vma;
 
+			mutex_lock(&obj->mm.lock);
 			if (obj->mm.pages &&
 			    obj->mm.madv == I915_MADV_WILLNEED &&
 			    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -268,6 +269,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 				if (!i915_gem_object_is_tiled(obj))
 					__i915_gem_object_pin_pages(obj);
 			}
+			mutex_unlock(&obj->mm.lock);
 
 			list_for_each_entry(vma, &obj->vma_list, obj_link) {
 				if (!vma->fence)
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index e97c7b589439..136c493b15b2 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -79,7 +79,7 @@ static void cancel_userptr(struct work_struct *work)
 	WARN_ONCE(obj->mm.pages,
 		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n",
 		  obj->bind_count,
-		  obj->mm.pages_pin_count,
+		  atomic_read(&obj->mm.pages_pin_count),
 		  obj->pin_display);
 
 	i915_gem_object_put(obj);
@@ -491,7 +491,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 {
 	struct get_pages_work *work = container_of(_work, typeof(*work), work);
 	struct drm_i915_gem_object *obj = work->obj;
-	struct drm_device *dev = obj->base.dev;
 	const int npages = obj->base.size >> PAGE_SHIFT;
 	struct page **pvec;
 	int pinned, ret;
@@ -523,7 +522,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 		}
 	}
 
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&obj->mm.lock);
 	if (obj->userptr.work == &work->work) {
 		struct sg_table *pages = ERR_PTR(ret);
 
@@ -538,13 +537,12 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 		obj->userptr.work = ERR_CAST(pages);
 	}
-
-	i915_gem_object_put(obj);
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&obj->mm.lock);
 
 	release_pages(pvec, pinned, 0);
 	drm_free_large(pvec);
 
+	i915_gem_object_put_unlocked(obj);
 	put_task_struct(work->task);
 	kfree(work);
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 19/42] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (17 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 18/42] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-13 11:54   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 20/42] drm/i915: Implement pread without struct-mutex Chris Wilson
                   ` (25 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Use the per-object mm.lock to allocate the backing storage (and hold a
reference to it across the dmabuf access) without resorting to
struct_mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_dmabuf.c | 51 ++++++++++++++--------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 2abd524aba14..9023110cf1ca 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -44,19 +44,15 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
 	struct scatterlist *src, *dst;
 	int ret, i;
 
-	ret = i915_mutex_lock_interruptible(obj->base.dev);
-	if (ret)
-		goto err;
-
 	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		goto err_unlock;
+		goto err;
 
 	/* Copy sg so that we make an independent mapping */
 	st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
 	if (st == NULL) {
 		ret = -ENOMEM;
-		goto err_unpin;
+		goto err_put_pages;
 	}
 
 	ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
@@ -72,21 +68,18 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
 	}
 
 	if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
-		ret =-ENOMEM;
+		ret = -ENOMEM;
 		goto err_free_sg;
 	}
 
-	mutex_unlock(&obj->base.dev->struct_mutex);
 	return st;
 
 err_free_sg:
 	sg_free_table(st);
 err_free:
 	kfree(st);
-err_unpin:
+err_put_pages:
 	i915_gem_object_unpin_pages(obj);
-err_unlock:
-	mutex_unlock(&obj->base.dev->struct_mutex);
 err:
 	return ERR_PTR(ret);
 }
@@ -101,36 +94,21 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
 	sg_free_table(sg);
 	kfree(sg);
 
-	mutex_lock(&obj->base.dev->struct_mutex);
 	i915_gem_object_unpin_pages(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
 static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
-	void *addr;
-	int ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ERR_PTR(ret);
-
-	addr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	mutex_unlock(&dev->struct_mutex);
-
-	return addr;
+	return i915_gem_object_pin_map(obj, I915_MAP_WB);
 }
 
 static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 
-	mutex_lock(&dev->struct_mutex);
 	i915_gem_object_unpin_map(obj);
-	mutex_unlock(&dev->struct_mutex);
 }
 
 static void *i915_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num)
@@ -177,15 +155,22 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
-	int ret;
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
+	int ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ret;
 
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto err;
+
 	ret = i915_gem_object_set_to_cpu_domain(obj, write);
 	mutex_unlock(&dev->struct_mutex);
+
+err:
+	i915_gem_object_unpin_pages(obj);
 	return ret;
 }
 
@@ -195,13 +180,19 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
 	struct drm_device *dev = obj->base.dev;
 	int ret;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
 		return ret;
 
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto err;
+
 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
 	mutex_unlock(&dev->struct_mutex);
 
+err:
+	i915_gem_object_unpin_pages(obj);
 	return ret;
 }
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 20/42] drm/i915: Implement pread without struct-mutex
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (18 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 19/42] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-12 12:53   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 21/42] drm/i915: Implement pwrite " Chris Wilson
                   ` (24 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

We only need struct_mutex within pread for a brief window where we need
to serialise with rendering and control our cache domains. Elsewhere we
can rely on the backing storage being pinned, and forgive userspace any
races against us.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 364 +++++++++++++++++-----------------------
 1 file changed, 158 insertions(+), 206 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f4a773ca9333..27f91714e82e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -63,13 +63,13 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 }
 
 static int
-insert_mappable_node(struct drm_i915_private *i915,
+insert_mappable_node(struct i915_ggtt *ggtt,
                      struct drm_mm_node *node, u32 size)
 {
 	memset(node, 0, sizeof(*node));
-	return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node,
-						   size, 0, 0, 0,
-						   i915->ggtt.mappable_end,
+	return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
+						   size, 0, -1,
+						   0, ggtt->mappable_end,
 						   DRM_MM_SEARCH_DEFAULT,
 						   DRM_MM_CREATE_DEFAULT);
 }
@@ -820,32 +820,6 @@ err_unpin:
 	return ret;
 }
 
-/* Per-page copy function for the shmem pread fastpath.
- * Flushes invalid cachelines before reading the target if
- * needs_clflush is set. */
-static int
-shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
-		 char __user *user_data,
-		 bool page_do_bit17_swizzling, bool needs_clflush)
-{
-	char *vaddr;
-	int ret;
-
-	if (unlikely(page_do_bit17_swizzling))
-		return -EINVAL;
-
-	vaddr = kmap_atomic(page);
-	if (needs_clflush)
-		drm_clflush_virt_range(vaddr + shmem_page_offset,
-				       page_length);
-	ret = __copy_to_user_inatomic(user_data,
-				      vaddr + shmem_page_offset,
-				      page_length);
-	kunmap_atomic(vaddr);
-
-	return ret ? -EFAULT : 0;
-}
-
 static void
 shmem_clflush_swizzled_range(char *addr, unsigned long length,
 			     bool swizzled)
@@ -871,7 +845,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length,
 /* Only difference to the fast-path function is that this can handle bit17
  * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pread_slow(struct page *page, int offset, int length,
 		 char __user *user_data,
 		 bool page_do_bit17_swizzling, bool needs_clflush)
 {
@@ -880,60 +854,132 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
 
 	vaddr = kmap(page);
 	if (needs_clflush)
-		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
-					     page_length,
+		shmem_clflush_swizzled_range(vaddr + offset, length,
 					     page_do_bit17_swizzling);
 
 	if (page_do_bit17_swizzling)
-		ret = __copy_to_user_swizzled(user_data,
-					      vaddr, shmem_page_offset,
-					      page_length);
+		ret = __copy_to_user_swizzled(user_data, vaddr, offset, length);
 	else
-		ret = __copy_to_user(user_data,
-				     vaddr + shmem_page_offset,
-				     page_length);
+		ret = __copy_to_user(user_data, vaddr + offset, length);
 	kunmap(page);
 
 	return ret ? - EFAULT : 0;
 }
 
-static inline unsigned long
-slow_user_access(struct io_mapping *mapping,
-		 uint64_t page_base, int page_offset,
-		 char __user *user_data,
-		 unsigned long length, bool pwrite)
+static int
+shmem_pread(struct page *page, int offset, int length, char __user *user_data,
+	    bool page_do_bit17_swizzling, bool needs_clflush)
 {
-	void __iomem *ioaddr;
+	int ret;
+
+	ret = -ENODEV;
+	if (!page_do_bit17_swizzling) {
+		char *vaddr = kmap_atomic(page);
+
+		if (needs_clflush)
+			drm_clflush_virt_range(vaddr + offset, length);
+		ret = __copy_to_user_inatomic(user_data, vaddr + offset, length);
+		kunmap_atomic(vaddr);
+	}
+	if (ret == 0)
+		return 0;
+
+	return shmem_pread_slow(page, offset, length, user_data,
+				page_do_bit17_swizzling, needs_clflush);
+}
+
+static int
+i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
+		     struct drm_i915_gem_pread *args)
+{
+	char __user *user_data;
+	u64 remain;
+	unsigned int obj_do_bit17_swizzling;
+	unsigned int needs_clflush;
+	unsigned int idx, offset;
+	int ret;
+
+	obj_do_bit17_swizzling = 0;
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		obj_do_bit17_swizzling = 1 << 17;
+
+	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	remain = args->size;
+	user_data = u64_to_user_ptr(args->data_ptr);
+	offset = offset_in_page(args->offset);
+	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
+		struct page *page = i915_gem_object_get_page(obj, idx);
+		int length;
+
+		length = remain;
+		if (offset + length > PAGE_SIZE)
+			length = PAGE_SIZE - offset;
+
+		ret = shmem_pread(page, offset, length, user_data,
+				  page_to_phys(page) & obj_do_bit17_swizzling,
+				  needs_clflush);
+		if (ret)
+			break;
+
+		remain -= length;
+		user_data += length;
+		offset = 0;
+	}
+
+	i915_gem_obj_finish_shmem_access(obj);
+	return ret;
+}
+
+static inline int
+gtt_user_read(struct io_mapping *mapping,
+	      loff_t page_base, int page_offset,
+	      char __user *user_data,
+	      int length)
+{
+	void __iomem *vaddr_atomic;
 	void *vaddr;
-	uint64_t unwritten;
+	unsigned long unwritten;
 
-	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 	/* We can use the cpu mem copy function because this is X86. */
-	vaddr = (void __force *)ioaddr + page_offset;
-	if (pwrite)
-		unwritten = __copy_from_user(vaddr, user_data, length);
-	else
-		unwritten = __copy_to_user(user_data, vaddr, length);
-
-	io_mapping_unmap(ioaddr);
+	vaddr = (void __force *)vaddr_atomic + page_offset;
+	unwritten = __copy_to_user_inatomic(user_data, vaddr, length);
+	io_mapping_unmap_atomic(vaddr_atomic);
+	if (unwritten) {
+		vaddr_atomic = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+		/* We can use the cpu mem copy function because this is X86. */
+		vaddr = (void __force *)vaddr_atomic + page_offset;
+		unwritten = copy_to_user(user_data, vaddr, length);
+		io_mapping_unmap(vaddr_atomic);
+	}
 	return unwritten;
 }
 
 static int
-i915_gem_gtt_pread(struct drm_device *dev,
-		   struct drm_i915_gem_object *obj, uint64_t size,
-		   uint64_t data_offset, uint64_t data_ptr)
+i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
+		   const struct drm_i915_gem_pread *args)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct i915_vma *vma;
+	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
 	struct drm_mm_node node;
-	char __user *user_data;
-	uint64_t remain;
-	uint64_t offset;
+	struct i915_vma *vma;
+	void __user *user_data;
+	u64 remain, offset;
 	int ret;
 
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE | PIN_NONBLOCK);
 	if (!IS_ERR(vma)) {
 		node.start = i915_ggtt_offset(vma);
 		node.allocated = false;
@@ -944,33 +990,21 @@ i915_gem_gtt_pread(struct drm_device *dev,
 		}
 	}
 	if (IS_ERR(vma)) {
-		ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE);
+		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
-			goto out;
-
-		ret = i915_gem_object_pin_pages(obj);
-		if (ret) {
-			remove_mappable_node(&node);
-			goto out;
-		}
+			goto out_unlock;
+		GEM_BUG_ON(!node.allocated);
 	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, false);
 	if (ret)
 		goto out_unpin;
 
-	user_data = u64_to_user_ptr(data_ptr);
-	remain = size;
-	offset = data_offset;
+	mutex_unlock(&obj->base.dev->struct_mutex);
 
-	mutex_unlock(&dev->struct_mutex);
-	if (likely(!i915.prefault_disable)) {
-		ret = fault_in_multipages_writeable(user_data, remain);
-		if (ret) {
-			mutex_lock(&dev->struct_mutex);
-			goto out_unpin;
-		}
-	}
+	user_data = u64_to_user_ptr(args->data_ptr);
+	remain = args->size;
+	offset = args->offset;
 
 	while (remain > 0) {
 		/* Operation in this page
@@ -987,19 +1021,14 @@ i915_gem_gtt_pread(struct drm_device *dev,
 			wmb();
 			ggtt->base.insert_page(&ggtt->base,
 					       i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
-					       node.start,
-					       I915_CACHE_NONE, 0);
+					       node.start, I915_CACHE_NONE, 0);
 			wmb();
 		} else {
 			page_base += offset & PAGE_MASK;
 		}
-		/* This is a slow read/write as it tries to read from
-		 * and write to user memory which may result into page
-		 * faults, and so we cannot perform this under struct_mutex.
-		 */
-		if (slow_user_access(&ggtt->mappable, page_base,
-				     page_offset, user_data,
-				     page_length, false)) {
+
+		if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
+				  user_data, page_length)) {
 			ret = -EFAULT;
 			break;
 		}
@@ -1009,111 +1038,19 @@ i915_gem_gtt_pread(struct drm_device *dev,
 		offset += page_length;
 	}
 
-	mutex_lock(&dev->struct_mutex);
-	if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
-		/* The user has modified the object whilst we tried
-		 * reading from it, and we now have no idea what domain
-		 * the pages should be in. As we have just been touching
-		 * them directly, flush everything back to the GTT
-		 * domain.
-		 */
-		ret = i915_gem_object_set_to_gtt_domain(obj, false);
-	}
-
+	mutex_lock(&obj->base.dev->struct_mutex);
 out_unpin:
 	if (node.allocated) {
 		wmb();
 		ggtt->base.clear_range(&ggtt->base,
 				       node.start, node.size,
 				       true);
-		i915_gem_object_unpin_pages(obj);
 		remove_mappable_node(&node);
 	} else {
 		i915_vma_unpin(vma);
 	}
-out:
-	return ret;
-}
-
-static int
-i915_gem_shmem_pread(struct drm_device *dev,
-		     struct drm_i915_gem_object *obj,
-		     struct drm_i915_gem_pread *args,
-		     struct drm_file *file)
-{
-	char __user *user_data;
-	ssize_t remain;
-	loff_t offset;
-	int shmem_page_offset, page_length, ret = 0;
-	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
-	int prefaulted = 0;
-	int needs_clflush = 0;
-	struct sg_page_iter sg_iter;
-
-	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
-	if (ret)
-		return ret;
-
-	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-	user_data = u64_to_user_ptr(args->data_ptr);
-	offset = args->offset;
-	remain = args->size;
-
-	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
-			 offset >> PAGE_SHIFT) {
-		struct page *page = sg_page_iter_page(&sg_iter);
-
-		if (remain <= 0)
-			break;
-
-		/* Operation in this page
-		 *
-		 * shmem_page_offset = offset within page in shmem file
-		 * page_length = bytes to copy for this page
-		 */
-		shmem_page_offset = offset_in_page(offset);
-		page_length = remain;
-		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - shmem_page_offset;
-
-		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-			(page_to_phys(page) & (1 << 17)) != 0;
-
-		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
-				       user_data, page_do_bit17_swizzling,
-				       needs_clflush);
-		if (ret == 0)
-			goto next_page;
-
-		mutex_unlock(&dev->struct_mutex);
-
-		if (likely(!i915.prefault_disable) && !prefaulted) {
-			ret = fault_in_multipages_writeable(user_data, remain);
-			/* Userspace is tricking us, but we've already clobbered
-			 * its pages with the prefault and promised to write the
-			 * data up to the first fault. Hence ignore any errors
-			 * and just continue. */
-			(void)ret;
-			prefaulted = 1;
-		}
-
-		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
-				       user_data, page_do_bit17_swizzling,
-				       needs_clflush);
-
-		mutex_lock(&dev->struct_mutex);
-
-		if (ret)
-			goto out;
-
-next_page:
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
-	}
-
-out:
-	i915_gem_obj_finish_shmem_access(obj);
+out_unlock:
+	mutex_unlock(&obj->base.dev->struct_mutex);
 
 	return ret;
 }
@@ -1132,7 +1069,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_pread *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret = 0;
+	int ret;
 
 	if (args->size == 0)
 		return 0;
@@ -1150,7 +1087,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	if (args->offset > obj->base.size ||
 	    args->size > obj->base.size - args->offset) {
 		ret = -EINVAL;
-		goto err;
+		goto out;
 	}
 
 	trace_i915_gem_object_pread(obj, args->offset, args->size);
@@ -1160,28 +1097,21 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 				   MAX_SCHEDULE_TIMEOUT,
 				   to_rps_client(file));
 	if (ret)
-		goto err;
+		goto out;
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		goto err;
-
-	ret = i915_gem_shmem_pread(dev, obj, args, file);
+		goto out;
 
-	/* pread for non shmem backed objects */
+	ret = i915_gem_shmem_pread(obj, args);
 	if (ret == -EFAULT || ret == -ENODEV) {
 		intel_runtime_pm_get(to_i915(dev));
-		ret = i915_gem_gtt_pread(dev, obj, args->size,
-					args->offset, args->data_ptr);
+		ret = i915_gem_gtt_pread(obj, args);
 		intel_runtime_pm_put(to_i915(dev));
 	}
 
-	i915_gem_object_put(obj);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-
-err:
+	i915_gem_object_unpin_pages(obj);
+out:
 	i915_gem_object_put_unlocked(obj);
 	return ret;
 }
@@ -1209,6 +1139,28 @@ fast_user_write(struct io_mapping *mapping,
 	return unwritten;
 }
 
+static inline unsigned long
+slow_user_access(struct io_mapping *mapping,
+		 unsigned long page_base, int page_offset,
+		 char __user *user_data,
+		 unsigned long length, bool pwrite)
+{
+	void __iomem *ioaddr;
+	void *vaddr;
+	unsigned long unwritten;
+
+	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+	/* We can use the cpu mem copy function because this is X86. */
+	vaddr = (void __force *)ioaddr + page_offset;
+	if (pwrite)
+		unwritten = __copy_from_user(vaddr, user_data, length);
+	else
+		unwritten = __copy_to_user(user_data, vaddr, length);
+
+	io_mapping_unmap(ioaddr);
+	return unwritten;
+}
+
 /**
  * This is the fast pwrite path, where we copy the data directly from the
  * user into the GTT, uncached.
@@ -1247,7 +1199,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 		}
 	}
 	if (IS_ERR(vma)) {
-		ret = insert_mappable_node(i915, &node, PAGE_SIZE);
+		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
 			goto out;
 
@@ -1276,8 +1228,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 		 * page_length = bytes to copy for this page
 		 */
 		u32 page_base = node.start;
-		unsigned page_offset = offset_in_page(offset);
-		unsigned page_length = PAGE_SIZE - page_offset;
+		unsigned int page_offset = offset_in_page(offset);
+		unsigned int page_length = PAGE_SIZE - page_offset;
 		page_length = remain < page_length ? remain : page_length;
 		if (node.allocated) {
 			wmb(); /* flush the write before we modify the GGTT */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 21/42] drm/i915: Implement pwrite without struct-mutex
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (19 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 20/42] drm/i915: Implement pread without struct-mutex Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-13 11:17   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 22/42] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
                   ` (23 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

We only need struct_mutex within pwrite for a brief window where we need
to serialise with rendering and control our cache domains. Elsewhere we
can rely on the backing storage being pinned, and forgive userspace any
races against us.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 346 ++++++++++++++--------------------------
 1 file changed, 120 insertions(+), 226 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 27f91714e82e..81f88103e6f5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1121,10 +1121,10 @@ out:
  */
 
 static inline int
-fast_user_write(struct io_mapping *mapping,
-		loff_t page_base, int page_offset,
-		char __user *user_data,
-		int length)
+ggtt_write(struct io_mapping *mapping,
+	   loff_t page_base, int page_offset,
+	   char __user *user_data,
+	   int length)
 {
 	void __iomem *vaddr_atomic;
 	void *vaddr;
@@ -1132,60 +1132,42 @@ fast_user_write(struct io_mapping *mapping,
 
 	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 	/* We can use the cpu mem copy function because this is X86. */
-	vaddr = (void __force*)vaddr_atomic + page_offset;
+	vaddr = (void __force *)vaddr_atomic + page_offset;
 	unwritten = __copy_from_user_inatomic_nocache(vaddr,
 						      user_data, length);
 	io_mapping_unmap_atomic(vaddr_atomic);
-	return unwritten;
-}
 
-static inline unsigned long
-slow_user_access(struct io_mapping *mapping,
-		 unsigned long page_base, int page_offset,
-		 char __user *user_data,
-		 unsigned long length, bool pwrite)
-{
-	void __iomem *ioaddr;
-	void *vaddr;
-	unsigned long unwritten;
-
-	ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
-	/* We can use the cpu mem copy function because this is X86. */
-	vaddr = (void __force *)ioaddr + page_offset;
-	if (pwrite)
-		unwritten = __copy_from_user(vaddr, user_data, length);
-	else
-		unwritten = __copy_to_user(user_data, vaddr, length);
+	if (unwritten) {
+		vaddr_atomic = io_mapping_map_wc(mapping, page_base, PAGE_SIZE);
+		/* We can use the cpu mem copy function because this is X86. */
+		vaddr = (void __force *)vaddr_atomic + page_offset;
+		unwritten = copy_from_user(vaddr, user_data, length);
+		io_mapping_unmap(vaddr_atomic);
+	}
 
-	io_mapping_unmap(ioaddr);
 	return unwritten;
 }
 
 /**
  * This is the fast pwrite path, where we copy the data directly from the
  * user into the GTT, uncached.
- * @i915: i915 device private data
- * @obj: i915 gem object
+ * @obj: i915 GEM object
  * @args: pwrite arguments structure
- * @file: drm file pointer
  */
 static int
-i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
-			 struct drm_i915_gem_object *obj,
-			 struct drm_i915_gem_pwrite *args,
-			 struct drm_file *file)
+i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
+			 const struct drm_i915_gem_pwrite *args)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
-	struct drm_device *dev = obj->base.dev;
-	struct i915_vma *vma;
+	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
 	struct drm_mm_node node;
-	uint64_t remain, offset;
-	char __user *user_data;
+	struct i915_vma *vma;
+	u64 remain, offset;
+	void __user *user_data;
 	int ret;
-	bool hit_slow_path = false;
 
-	if (i915_gem_object_is_tiled(obj))
-		return -EFAULT;
+	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+	if (ret)
+		return ret;
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE | PIN_NONBLOCK);
@@ -1201,21 +1183,17 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 	if (IS_ERR(vma)) {
 		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
 		if (ret)
-			goto out;
-
-		ret = i915_gem_object_pin_pages(obj);
-		if (ret) {
-			remove_mappable_node(&node);
-			goto out;
-		}
+			goto out_unlock;
+		GEM_BUG_ON(!node.allocated);
 	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
 		goto out_unpin;
 
+	mutex_unlock(&obj->base.dev->struct_mutex);
+
 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-	obj->mm.dirty = true;
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	offset = args->offset;
@@ -1246,92 +1224,36 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 		 * If the object is non-shmem backed, we retry again with the
 		 * path that handles page fault.
 		 */
-		if (fast_user_write(&ggtt->mappable, page_base,
-				    page_offset, user_data, page_length)) {
-			hit_slow_path = true;
-			mutex_unlock(&dev->struct_mutex);
-			if (slow_user_access(&ggtt->mappable,
-					     page_base,
-					     page_offset, user_data,
-					     page_length, true)) {
-				ret = -EFAULT;
-				mutex_lock(&dev->struct_mutex);
-				goto out_flush;
-			}
-
-			mutex_lock(&dev->struct_mutex);
+		if (ggtt_write(&ggtt->mappable, page_base, page_offset,
+			       user_data, page_length)) {
+			ret = -EFAULT;
+			break;
 		}
 
 		remain -= page_length;
 		user_data += page_length;
 		offset += page_length;
 	}
-
-out_flush:
-	if (hit_slow_path) {
-		if (ret == 0 &&
-		    (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) {
-			/* The user has modified the object whilst we tried
-			 * reading from it, and we now have no idea what domain
-			 * the pages should be in. As we have just been touching
-			 * them directly, flush everything back to the GTT
-			 * domain.
-			 */
-			ret = i915_gem_object_set_to_gtt_domain(obj, false);
-		}
-	}
-
 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+
+	mutex_lock(&obj->base.dev->struct_mutex);
 out_unpin:
 	if (node.allocated) {
 		wmb();
 		ggtt->base.clear_range(&ggtt->base,
 				       node.start, node.size,
 				       true);
-		i915_gem_object_unpin_pages(obj);
 		remove_mappable_node(&node);
 	} else {
 		i915_vma_unpin(vma);
 	}
-out:
+out_unlock:
+	mutex_unlock(&obj->base.dev->struct_mutex);
 	return ret;
 }
 
-/* Per-page copy function for the shmem pwrite fastpath.
- * Flushes invalid cachelines before writing to the target if
- * needs_clflush_before is set and flushes out any written cachelines after
- * writing if needs_clflush is set. */
-static int
-shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
-		  char __user *user_data,
-		  bool page_do_bit17_swizzling,
-		  bool needs_clflush_before,
-		  bool needs_clflush_after)
-{
-	char *vaddr;
-	int ret;
-
-	if (unlikely(page_do_bit17_swizzling))
-		return -EINVAL;
-
-	vaddr = kmap_atomic(page);
-	if (needs_clflush_before)
-		drm_clflush_virt_range(vaddr + shmem_page_offset,
-				       page_length);
-	ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
-					user_data, page_length);
-	if (needs_clflush_after)
-		drm_clflush_virt_range(vaddr + shmem_page_offset,
-				       page_length);
-	kunmap_atomic(vaddr);
-
-	return ret ? -EFAULT : 0;
-}
-
-/* Only difference to the fast-path function is that this can handle bit17
- * and uses non-atomic copy and kmap functions. */
 static int
-shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
+shmem_pwrite_slow(struct page *page, int offset, int length,
 		  char __user *user_data,
 		  bool page_do_bit17_swizzling,
 		  bool needs_clflush_before,
@@ -1342,124 +1264,113 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
 
 	vaddr = kmap(page);
 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
-		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
-					     page_length,
+		shmem_clflush_swizzled_range(vaddr + offset, length,
 					     page_do_bit17_swizzling);
 	if (page_do_bit17_swizzling)
-		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
-						user_data,
-						page_length);
+		ret = __copy_from_user_swizzled(vaddr, offset, user_data,
+						length);
 	else
-		ret = __copy_from_user(vaddr + shmem_page_offset,
-				       user_data,
-				       page_length);
+		ret = __copy_from_user(vaddr + offset, user_data, length);
 	if (needs_clflush_after)
-		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
-					     page_length,
+		shmem_clflush_swizzled_range(vaddr + offset, length,
 					     page_do_bit17_swizzling);
 	kunmap(page);
 
 	return ret ? -EFAULT : 0;
 }
 
+/* Per-page copy function for the shmem pwrite fastpath.
+ * Flushes invalid cachelines before writing to the target if
+ * needs_clflush_before is set and flushes out any written cachelines after
+ * writing if needs_clflush is set.
+ */
 static int
-i915_gem_shmem_pwrite(struct drm_device *dev,
-		      struct drm_i915_gem_object *obj,
-		      struct drm_i915_gem_pwrite *args,
-		      struct drm_file *file)
+shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
+	     bool page_do_bit17_swizzling,
+	     bool needs_clflush_before,
+	     bool needs_clflush_after)
 {
-	ssize_t remain;
-	loff_t offset;
-	char __user *user_data;
-	int shmem_page_offset, page_length, ret = 0;
-	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
-	int hit_slowpath = 0;
+	int ret;
+
+	ret = -ENODEV;
+	if (!page_do_bit17_swizzling) {
+		char *vaddr = kmap_atomic(page);
+
+		if (needs_clflush_before)
+			drm_clflush_virt_range(vaddr + offset, len);
+		ret = __copy_from_user_inatomic(vaddr + offset, user_data, len);
+		if (needs_clflush_after)
+			drm_clflush_virt_range(vaddr + offset, len);
+
+		kunmap_atomic(vaddr);
+	}
+	if (ret == 0)
+		return ret;
+
+	return shmem_pwrite_slow(page, offset, len, user_data,
+				 page_do_bit17_swizzling,
+				 needs_clflush_before,
+				 needs_clflush_after);
+}
+
+static int
+i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pwrite *args)
+{
+	void __user *user_data;
+	u64 remain;
+	unsigned int obj_do_bit17_swizzling;
+	unsigned int partial_cacheline_write;
 	unsigned int needs_clflush;
-	struct sg_page_iter sg_iter;
+	unsigned int offset, idx;
+	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
-	user_data = u64_to_user_ptr(args->data_ptr);
-	offset = args->offset;
-	remain = args->size;
+	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	mutex_unlock(&obj->base.dev->struct_mutex);
+	if (ret)
+		return ret;
 
-	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
-			 offset >> PAGE_SHIFT) {
-		struct page *page = sg_page_iter_page(&sg_iter);
-		int partial_cacheline_write;
+	obj_do_bit17_swizzling = 0;
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		obj_do_bit17_swizzling = 1 << 17;
 
-		if (remain <= 0)
-			break;
+	/* If we don't overwrite a cacheline completely we need to be
+	 * careful to have up-to-date data by first clflushing. Don't
+	 * overcomplicate things and flush the entire patch.
+	 */
+	partial_cacheline_write = 0;
+	if (needs_clflush & CLFLUSH_BEFORE)
+		partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
 
-		/* Operation in this page
-		 *
-		 * shmem_page_offset = offset within page in shmem file
-		 * page_length = bytes to copy for this page
-		 */
-		shmem_page_offset = offset_in_page(offset);
-
-		page_length = remain;
-		if ((shmem_page_offset + page_length) > PAGE_SIZE)
-			page_length = PAGE_SIZE - shmem_page_offset;
-
-		/* If we don't overwrite a cacheline completely we need to be
-		 * careful to have up-to-date data by first clflushing. Don't
-		 * overcomplicate things and flush the entire patch. */
-		partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE &&
-			((shmem_page_offset | page_length)
-				& (boot_cpu_data.x86_clflush_size - 1));
-
-		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
-			(page_to_phys(page) & (1 << 17)) != 0;
-
-		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush & CLFLUSH_AFTER);
-		if (ret == 0)
-			goto next_page;
-
-		hit_slowpath = 1;
-		mutex_unlock(&dev->struct_mutex);
-		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
-					user_data, page_do_bit17_swizzling,
-					partial_cacheline_write,
-					needs_clflush & CLFLUSH_AFTER);
+	user_data = u64_to_user_ptr(args->data_ptr);
+	remain = args->size;
+	offset = offset_in_page(args->offset);
+	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
+		struct page *page = i915_gem_object_get_page(obj, idx);
+		int length;
 
-		mutex_lock(&dev->struct_mutex);
+		length = remain;
+		if (offset + length > PAGE_SIZE)
+			length = PAGE_SIZE - offset;
 
+		ret = shmem_pwrite(page, offset, length, user_data,
+				   page_to_phys(page) & obj_do_bit17_swizzling,
+				   (offset | length) & partial_cacheline_write,
+				   needs_clflush & CLFLUSH_AFTER);
 		if (ret)
-			goto out;
-
-next_page:
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
-	}
-
-out:
-	i915_gem_obj_finish_shmem_access(obj);
+			break;
 
-	if (hit_slowpath) {
-		/*
-		 * Fixup: Flush cpu caches in case we didn't flush the dirty
-		 * cachelines in-line while writing and the object moved
-		 * out of the cpu write domain while we've dropped the lock.
-		 */
-		if (!(needs_clflush & CLFLUSH_AFTER) &&
-		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-			if (i915_gem_clflush_object(obj, obj->pin_display))
-				needs_clflush |= CLFLUSH_AFTER;
-		}
+		remain -= length;
+		user_data += length;
+		offset = 0;
 	}
 
-	if (needs_clflush & CLFLUSH_AFTER)
-		i915_gem_chipset_flush(to_i915(dev));
-
 	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
+	i915_gem_obj_finish_shmem_access(obj);
 	return ret;
 }
 
@@ -1475,7 +1386,6 @@ int
 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_pwrite *args = data;
 	struct drm_i915_gem_object *obj;
 	int ret;
@@ -1488,13 +1398,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		       args->size))
 		return -EFAULT;
 
-	if (likely(!i915.prefault_disable)) {
-		ret = fault_in_multipages_readable(u64_to_user_ptr(args->data_ptr),
-						   args->size);
-		if (ret)
-			return -EFAULT;
-	}
-
 	obj = i915_gem_object_lookup(file, args->handle);
 	if (!obj)
 		return -ENOENT;
@@ -1516,11 +1419,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto err;
 
-	intel_runtime_pm_get(dev_priv);
-
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = i915_gem_object_pin_pages(obj);
 	if (ret)
-		goto err_rpm;
+		goto err;
 
 	ret = -EFAULT;
 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
@@ -1531,7 +1432,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 */
 	if (!i915_gem_object_has_struct_page(obj) ||
 	    cpu_write_needs_clflush(obj)) {
-		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
+		ret = i915_gem_gtt_pwrite_fast(obj, args);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
 		 * textures). Fallback to the shmem path in that case. */
@@ -1541,17 +1442,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		if (obj->phys_handle)
 			ret = i915_gem_phys_pwrite(obj, args, file);
 		else
-			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+			ret = i915_gem_shmem_pwrite(obj, args);
 	}
 
-	i915_gem_object_put(obj);
-	mutex_unlock(&dev->struct_mutex);
-	intel_runtime_pm_put(dev_priv);
-
-	return ret;
-
-err_rpm:
-	intel_runtime_pm_put(dev_priv);
+	i915_gem_object_unpin_pages(obj);
 err:
 	i915_gem_object_put_unlocked(obj);
 	return ret;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 22/42] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (20 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 21/42] drm/i915: Implement pwrite " Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-13 11:47   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 23/42] drm/i915: Move object release to a freelist + worker Chris Wilson
                   ` (22 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

As we can locklessly (well struct_mutex-lessly) acquire the backing
storage, do so in set-domain-ioctl to reduce the contention on the
struct_mutex.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 83 ++++++++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 81f88103e6f5..80069f0fa9ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1458,6 +1458,30 @@ write_origin(struct drm_i915_gem_object *obj, unsigned domain)
 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915;
+	struct list_head *list;
+	struct i915_vma *vma;
+
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (!i915_vma_is_ggtt(vma))
+			continue;
+
+		if (i915_vma_is_active(vma))
+			continue;
+
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
+	}
+
+	i915 = to_i915(obj->base.dev);
+	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
+	list_move_tail(&obj->global_list, list);
+}
+
 /**
  * Called when user space prepares to use an object with the CPU, either
  * through the mmap ioctl's mapping or a GTT mapping.
@@ -1499,25 +1523,40 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 				   MAX_SCHEDULE_TIMEOUT,
 				   to_rps_client(file));
 	if (ret)
-		goto err;
+		goto err_unlocked;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err_unlocked;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
-		goto err;
+		goto err_pages;
 
 	if (read_domains & I915_GEM_DOMAIN_GTT)
 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
 	else
 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
 
-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+	/* And bump the LRU for this access */
+	i915_gem_object_bump_inactive_ggtt(obj);
 
-	i915_gem_object_put(obj);
 	mutex_unlock(&dev->struct_mutex);
-	return ret;
 
-err:
+	if (write_domain != 0)
+		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+
+err_pages:
+	i915_gem_object_unpin_pages(obj);
+err_unlocked:
 	i915_gem_object_put_unlocked(obj);
 	return ret;
 }
@@ -1740,6 +1779,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 	if (ret)
 		goto err;
 
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
 	intel_runtime_pm_get(dev_priv);
 
 	ret = i915_mutex_lock_interruptible(dev);
@@ -1821,6 +1864,7 @@ err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
 	intel_runtime_pm_put(dev_priv);
+	i915_gem_object_unpin_pages(obj);
 err:
 	switch (ret) {
 	case -EIO:
@@ -3214,24 +3258,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
 					    I915_GEM_DOMAIN_CPU);
 }
 
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (!i915_vma_is_ggtt(vma))
-			continue;
-
-		if (i915_vma_is_active(vma))
-			continue;
-
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-	}
-}
-
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  * @obj: object to act on
@@ -3287,7 +3313,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
-	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
 	if (write) {
 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
@@ -3299,10 +3325,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_read_domains,
 					    old_write_domain);
 
-	/* And bump the LRU for this access */
-	i915_gem_object_bump_inactive_ggtt(obj);
 	i915_gem_object_unpin_pages(obj);
-
 	return 0;
 }
 
@@ -3666,7 +3689,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
 	 */
-	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
 
 	/* If we're writing through the CPU, then the GPU read domains will
 	 * need to be invalidated at next use.
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 23/42] drm/i915: Move object release to a freelist + worker
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (21 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 22/42] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-11  9:52   ` John Harrison
  2016-10-07  9:46 ` [PATCH 24/42] drm/i915: Treat a framebuffer reference as an active reference whilst shrinking Chris Wilson
                   ` (21 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

We want to hide the latency of releasing objects and their backing
storage from the submission, so we move the actual free to a worker.
This allows us to switch to struct_mutex freeing of the object in the
next patch.

Furthermore, if we know that the object we are dereferencing remains valid
for the duration of our access, we can forgo the usual synchronisation
barriers and atomic reference counting. To ensure this we defer freeing
an object til after an RCU grace period, such that any lookup of the
object within an RCU read critical section will remain valid until
after we exit that critical section. We also employ this delay for
rate-limiting the serialisation on reallocation - we have to slow down
object creation in order to prevent resource starvation (in particular,
files).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  15 ++-
 drivers/gpu/drm/i915/i915_drv.c          |   3 +
 drivers/gpu/drm/i915/i915_drv.h          |  44 +++++++-
 drivers/gpu/drm/i915/i915_gem.c          | 166 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  14 ++-
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  21 ++--
 6 files changed, 193 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b807ddf65e04..144013875513 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4872,10 +4872,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
 #define DROP_BOUND 0x2
 #define DROP_RETIRE 0x4
 #define DROP_ACTIVE 0x8
-#define DROP_ALL (DROP_UNBOUND | \
-		  DROP_BOUND | \
-		  DROP_RETIRE | \
-		  DROP_ACTIVE)
+#define DROP_FREED 0x10
+#define DROP_ALL (DROP_UNBOUND	| \
+		  DROP_BOUND	| \
+		  DROP_RETIRE	| \
+		  DROP_ACTIVE	| \
+		  DROP_FREED)
 static int
 i915_drop_caches_get(void *data, u64 *val)
 {
@@ -4919,6 +4921,11 @@ i915_drop_caches_set(void *data, u64 val)
 unlock:
 	mutex_unlock(&dev->struct_mutex);
 
+	if (val & DROP_FREED) {
+		synchronize_rcu();
+		flush_work(&dev_priv->mm.free_work);
+	}
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 89d322215c84..f7d48f97993d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -563,6 +563,9 @@ static void i915_gem_fini(struct drm_device *dev)
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 
+	synchronize_rcu();
+	flush_work(&dev_priv->mm.free_work);
+
 	WARN_ON(!list_empty(&to_i915(dev)->context_list));
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7e9df190c891..e79a5cb78b5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1354,11 +1354,17 @@ struct i915_gem_mm {
 	struct list_head bound_list;
 	/**
 	 * List of objects which are not bound to the GTT (thus
-	 * are idle and not used by the GPU) but still have
-	 * (presumably uncached) pages still attached.
+	 * are idle and not used by the GPU). These objects may or may
+	 * not actually have any pages attached.
 	 */
 	struct list_head unbound_list;
 
+	/**
+	 * List of objects which are pending destruction.
+	 */
+	struct llist_head free_list;
+	struct work_struct free_work;
+
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
@@ -2214,6 +2220,10 @@ struct drm_i915_gem_object {
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
+	union {
+		struct rcu_head rcu;
+		struct llist_node freed;
+	};
 
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
@@ -2333,10 +2343,38 @@ to_intel_bo(struct drm_gem_object *gem)
 	return container_of(gem, struct drm_i915_gem_object, base);
 }
 
+/**
+ * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
+ * @filp: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A pointer to the object named by the handle if such exists on @filp, NULL
+ * otherwise. This object is only valid whilst under the RCU read lock, and
+ * note carefully the object may be in the process of being destroyed.
+ */
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
+{
+#ifdef CONFIG_LOCKDEP
+	WARN_ON(!lock_is_held(&rcu_lock_map));
+#endif
+	return idr_find(&file->object_idr, handle);
+}
+
 static inline struct drm_i915_gem_object *
 i915_gem_object_lookup(struct drm_file *file, u32 handle)
 {
-	return to_intel_bo(drm_gem_object_lookup(file, handle));
+	struct drm_i915_gem_object *obj;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, handle);
+	if (obj && !kref_get_unless_zero(&obj->base.refcount))
+		obj = NULL;
+	rcu_read_unlock();
+
+	return obj;
 }
 
 __deprecated
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 80069f0fa9ac..7cd49dd1d3f8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -42,6 +42,7 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 
+static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
 
@@ -647,6 +648,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_create *args = data;
 
+	i915_gem_flush_free_objects(to_i915(dev));
+
 	return i915_gem_create(file, dev,
 			       args->size, &args->handle);
 }
@@ -3469,10 +3472,14 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_caching *args = data;
 	struct drm_i915_gem_object *obj;
+	int err = 0;
 
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
 
 	switch (obj->cache_level) {
 	case I915_CACHE_LLC:
@@ -3488,9 +3495,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
 		args->caching = I915_CACHING_NONE;
 		break;
 	}
-
-	i915_gem_object_put_unlocked(obj);
-	return 0;
+out:
+	rcu_read_unlock();
+	return err;
 }
 
 int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
@@ -4001,10 +4008,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
 	unsigned long active;
+	int err;
 
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
 
 	args->busy = 0;
 	active = __I915_BO_ACTIVE(obj);
@@ -4034,7 +4045,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 		 * are busy is not completely reliable - we only guarantee
 		 * that the object was busy.
 		 */
-		rcu_read_lock();
 
 		for_each_active(active, idx)
 			args->busy |= busy_check_reader(&obj->last_read[idx]);
@@ -4052,12 +4062,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 		 * the result.
 		 */
 		args->busy |= busy_check_writer(&obj->last_write);
-
-		rcu_read_unlock();
 	}
 
-	i915_gem_object_put_unlocked(obj);
-	return 0;
+out:
+	rcu_read_unlock();
+	return err;
 }
 
 int
@@ -4204,7 +4213,6 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
 
 fail:
 	i915_gem_object_free(obj);
-
 	return ERR_PTR(ret);
 }
 
@@ -4232,16 +4240,69 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
 	return atomic_long_read(&obj->base.filp->f_count) == 1;
 }
 
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+				    struct llist_node *freed)
 {
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_vma *vma, *next;
+	struct drm_i915_gem_object *obj, *on;
 
-	intel_runtime_pm_get(dev_priv);
+	mutex_lock(&i915->drm.struct_mutex);
+	intel_runtime_pm_get(i915);
+	llist_for_each_entry(obj, freed, freed) {
+		struct i915_vma *vma, *vn;
+
+		trace_i915_gem_object_destroy(obj);
+
+		GEM_BUG_ON(i915_gem_object_is_active(obj));
+		list_for_each_entry_safe(vma, vn,
+					 &obj->vma_list, obj_link) {
+			GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+			GEM_BUG_ON(i915_vma_is_active(vma));
+			vma->flags &= ~I915_VMA_PIN_MASK;
+			i915_vma_close(vma);
+		}
+
+		list_del(&obj->global_list);
+	}
+	intel_runtime_pm_put(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	llist_for_each_entry_safe(obj, on, freed, freed) {
+		GEM_BUG_ON(obj->bind_count);
+		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
+
+		if (obj->ops->release)
+			obj->ops->release(obj);
+
+		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
+			atomic_set(&obj->mm.pages_pin_count, 0);
+		__i915_gem_object_put_pages(obj);
+		GEM_BUG_ON(obj->mm.pages);
+
+		if (obj->base.import_attach)
+			drm_prime_gem_destroy(&obj->base, NULL);
+
+		drm_gem_object_release(&obj->base);
+		i915_gem_info_remove_obj(i915, obj->base.size);
+
+		kfree(obj->bit_17);
+		i915_gem_object_free(obj);
+	}
+}
+
+static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+	struct llist_node *freed;
 
-	trace_i915_gem_object_destroy(obj);
+	freed = llist_del_all(&i915->mm.free_list);
+	if (unlikely(freed))
+		__i915_gem_free_objects(i915, freed);
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, mm.free_work);
+	struct llist_node *freed;
 
 	/* All file-owned VMA should have been released by this point through
 	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
@@ -4250,42 +4311,44 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	 * the GTT either for the user or for scanout). Those VMA still need to
 	 * unbound now.
 	 */
-	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
-		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
-		GEM_BUG_ON(i915_vma_is_active(vma));
-		vma->flags &= ~I915_VMA_PIN_MASK;
-		i915_vma_close(vma);
-	}
-	GEM_BUG_ON(obj->bind_count);
 
-	WARN_ON(atomic_read(&obj->frontbuffer_bits));
+	while ((freed = llist_del_all(&i915->mm.free_list)))
+		__i915_gem_free_objects(i915, freed);
+}
 
-	if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED &&
-	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
-	    i915_gem_object_is_tiled(obj))
-		__i915_gem_object_unpin_pages(obj);
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	/* We can't simply use call_rcu() from i915_gem_free_object()
+	 * as we need to block whilst unbinding, and the call_rcu
+	 * task may be called from softirq context. So we take a
+	 * detour through a worker.
+	 */
+	if (llist_add(&obj->freed, &i915->mm.free_list))
+		schedule_work(&i915->mm.free_work);
+}
 
-	if (obj->ops->release)
-		obj->ops->release(obj);
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 
-	if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
-		atomic_set(&obj->mm.pages_pin_count, 0);
 	if (discard_backing_storage(obj))
 		obj->mm.madv = I915_MADV_DONTNEED;
-	__i915_gem_object_put_pages(obj);
 
-	GEM_BUG_ON(obj->mm.pages);
-
-	if (obj->base.import_attach)
-		drm_prime_gem_destroy(&obj->base, NULL);
-
-	drm_gem_object_release(&obj->base);
-	i915_gem_info_remove_obj(dev_priv, obj->base.size);
-
-	kfree(obj->bit_17);
-	i915_gem_object_free(obj);
+	if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED &&
+	    to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
+	    i915_gem_object_is_tiled(obj))
+		__i915_gem_object_unpin_pages(obj);
 
-	intel_runtime_pm_put(dev_priv);
+	/* Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
+	 */
+	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
 }
 
 void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
@@ -4334,6 +4397,7 @@ int i915_gem_suspend(struct drm_device *dev)
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
 	flush_delayed_work(&dev_priv->gt.idle_work);
+	flush_work(&dev_priv->mm.free_work);
 
 	/* Assert that we sucessfully flushed all the work and
 	 * reset the GPU back to its idle, low power state.
@@ -4630,6 +4694,8 @@ i915_gem_load_init(struct drm_device *dev)
 				  NULL);
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
+	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
+	init_llist_head(&dev_priv->mm.free_list);
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 868da0bae751..fa72473dc7f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -201,6 +201,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 						       typeof(*obj),
 						       global_list))) {
 			list_move_tail(&obj->global_list, &still_in_list);
+			if (!obj->mm.pages) {
+				list_del_init(&obj->global_list);
+				continue;
+			}
 
 			if (flags & I915_SHRINK_PURGEABLE &&
 			    obj->mm.madv != I915_MADV_DONTNEED)
@@ -217,8 +221,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			if (!can_release_pages(obj))
 				continue;
 
-			i915_gem_object_get(obj);
-
 			if (unsafe_drop_pages(obj)) {
 				mutex_lock(&obj->mm.lock);
 				if (!obj->mm.pages) {
@@ -227,8 +229,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 				}
 				mutex_unlock(&obj->mm.lock);
 			}
-
-			i915_gem_object_put(obj);
 		}
 		list_splice(&still_in_list, phase->list);
 	}
@@ -395,12 +395,18 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	 */
 	unbound = bound = unevictable = 0;
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
+		if (!obj->mm.pages)
+			continue;
+
 		if (!can_release_pages(obj))
 			unevictable += obj->base.size >> PAGE_SHIFT;
 		else
 			unbound += obj->base.size >> PAGE_SHIFT;
 	}
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		if (!obj->mm.pages)
+			continue;
+
 		if (!can_release_pages(obj))
 			unevictable += obj->base.size >> PAGE_SHIFT;
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 11e2e0f57ac1..ec48e403adfe 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -328,12 +328,17 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
 	struct drm_i915_gem_get_tiling *args = data;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;
+	int err = -ENOENT;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (obj) {
+		args->tiling_mode =
+			READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
+		err = 0;
+	}
+	rcu_read_unlock();
 
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	args->tiling_mode = READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
 	switch (args->tiling_mode) {
 	case I915_TILING_X:
 		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
@@ -341,11 +346,10 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
 	case I915_TILING_Y:
 		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
 		break;
+	default:
 	case I915_TILING_NONE:
 		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
 		break;
-	default:
-		DRM_ERROR("unknown tiling mode\n");
 	}
 
 	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
@@ -358,6 +362,5 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
 	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
 		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
 
-	i915_gem_object_put_unlocked(obj);
-	return 0;
+	return err;
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 24/42] drm/i915: Treat a framebuffer reference as an active reference whilst shrinking
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (22 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 23/42] drm/i915: Move object release to a freelist + worker Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-11  9:54   ` John Harrison
  2016-10-07  9:46 ` [PATCH 25/42] drm/i915: Use lockless object free Chris Wilson
                   ` (20 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Treat a framebuffer reference with the same priority as an active
reference whilst shrinking. Framebuffers are likely to be reused and
typically cost more to migrate to and from GPU memory (on LLC
architectures we need to clflush), so defer the temptation to purge them
during a kswapd run until we have run out of cheap buffers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index fa72473dc7f5..0241658af16b 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -214,8 +214,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 			    !is_vmalloc_addr(obj->mm.mapping))
 				continue;
 
-			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
-			    i915_gem_object_is_active(obj))
+			if (!(flags & I915_SHRINK_ACTIVE) &&
+			    (i915_gem_object_is_active(obj) ||
+			     obj->framebuffer_references))
 				continue;
 
 			if (!can_release_pages(obj))
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 25/42] drm/i915: Use lockless object free
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (23 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 24/42] drm/i915: Treat a framebuffer reference as an active reference whilst shrinking Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-11  9:56   ` John Harrison
  2016-10-07  9:46 ` [PATCH 26/42] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
                   ` (19 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c         |  2 +-
 drivers/gpu/drm/i915/i915_drv.h         | 10 +---------
 drivers/gpu/drm/i915/i915_gem.c         | 20 ++++++++++----------
 drivers/gpu/drm/i915/i915_gem_tiling.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  4 ++--
 drivers/gpu/drm/i915/intel_display.c    |  6 +++---
 drivers/gpu/drm/i915/intel_overlay.c    |  4 ++--
 drivers/gpu/drm/i915/intel_pm.c         |  2 +-
 8 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f7d48f97993d..2e29eedd21b2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2606,7 +2606,7 @@ static struct drm_driver driver = {
 	.set_busid = drm_pci_set_busid,
 
 	.gem_close_object = i915_gem_close_object,
-	.gem_free_object = i915_gem_free_object,
+	.gem_free_object_unlocked = i915_gem_free_object,
 	.gem_vm_ops = &i915_gem_vm_ops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e79a5cb78b5d..89d3b5a16826 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2396,19 +2396,12 @@ __attribute__((nonnull))
 static inline void
 i915_gem_object_put(struct drm_i915_gem_object *obj)
 {
-	drm_gem_object_unreference(&obj->base);
+	__drm_gem_object_unreference(&obj->base);
 }
 
 __deprecated
 extern void drm_gem_object_unreference(struct drm_gem_object *);
 
-__attribute__((nonnull))
-static inline void
-i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
-{
-	drm_gem_object_unreference_unlocked(&obj->base);
-}
-
 __deprecated
 extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
 
@@ -2510,7 +2503,6 @@ static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
 
 static inline void i915_vma_put(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->dev->struct_mutex);
 	i915_gem_object_put(vma->obj);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7cd49dd1d3f8..28e1064baad5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -616,7 +616,7 @@ i915_gem_create(struct drm_file *file,
 
 	ret = drm_gem_handle_create(file, &obj->base, &handle);
 	/* drop reference from allocate - handle holds it now */
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	if (ret)
 		return ret;
 
@@ -1115,7 +1115,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 
 	i915_gem_object_unpin_pages(obj);
 out:
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	return ret;
 }
 
@@ -1450,7 +1450,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 	i915_gem_object_unpin_pages(obj);
 err:
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	return ret;
 }
 
@@ -1560,7 +1560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 err_pages:
 	i915_gem_object_unpin_pages(obj);
 err_unlocked:
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	return ret;
 }
 
@@ -1591,7 +1591,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	return err;
 }
 
@@ -1637,7 +1637,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	 * pages from.
 	 */
 	if (!obj->base.filp) {
-		i915_gem_object_put_unlocked(obj);
+		i915_gem_object_put(obj);
 		return -EINVAL;
 	}
 
@@ -1649,7 +1649,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		struct vm_area_struct *vma;
 
 		if (down_write_killable(&mm->mmap_sem)) {
-			i915_gem_object_put_unlocked(obj);
+			i915_gem_object_put(obj);
 			return -EINTR;
 		}
 		vma = find_vma(mm, addr);
@@ -1663,7 +1663,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		/* This may race, but that's ok, it only gets set */
 		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
 	}
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	if (IS_ERR((void *)addr))
 		return addr;
 
@@ -2073,7 +2073,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
 	if (ret == 0)
 		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
 
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	return ret;
 }
 
@@ -2881,7 +2881,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 			args->timeout_ns = 0;
 	}
 
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index ec48e403adfe..d8fc4782b8db 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -200,7 +200,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 	if (!i915_tiling_ok(dev,
 			    args->stride, obj->base.size, args->tiling_mode)) {
-		i915_gem_object_put_unlocked(obj);
+		i915_gem_object_put(obj);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 136c493b15b2..6f7ac0e783ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -542,7 +542,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 	release_pages(pvec, pinned, 0);
 	drm_free_large(pvec);
 
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	put_task_struct(work->task);
 	kfree(work);
 }
@@ -802,7 +802,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
 		ret = drm_gem_handle_create(file, &obj->base, &handle);
 
 	/* drop reference from allocate - handle holds it now */
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5ee7bab6e560..8beec0b651cd 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11026,7 +11026,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev,
 
 	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
 	if (IS_ERR(fb))
-		i915_gem_object_put_unlocked(obj);
+		i915_gem_object_put(obj);
 
 	return fb;
 }
@@ -12330,7 +12330,7 @@ cleanup:
 	crtc->primary->fb = old_fb;
 	update_state_fb(crtc->primary);
 
-	i915_gem_object_put_unlocked(obj);
+	i915_gem_object_put(obj);
 	drm_framebuffer_unreference(work->old_fb);
 
 	spin_lock_irq(&dev->event_lock);
@@ -15837,7 +15837,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
 
 	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
 	if (IS_ERR(fb))
-		i915_gem_object_put_unlocked(obj);
+		i915_gem_object_put(obj);
 
 	return fb;
 }
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 7c392547711f..58d5c5eed487 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1222,7 +1222,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
 out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	drm_modeset_unlock_all(dev);
-	i915_gem_object_put_unlocked(new_bo);
+	i915_gem_object_put(new_bo);
 out_free:
 	kfree(params);
 
@@ -1466,7 +1466,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
 	 * hardware should be off already */
 	WARN_ON(dev_priv->overlay->active);
 
-	i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
+	i915_gem_object_put(dev_priv->overlay->reg_bo);
 	kfree(dev_priv->overlay);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7f1748a1e614..27fc4c9c8cd0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5864,7 +5864,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
 	if (WARN_ON(!dev_priv->vlv_pctx))
 		return;
 
-	i915_gem_object_put_unlocked(dev_priv->vlv_pctx);
+	i915_gem_object_put(dev_priv->vlv_pctx);
 	dev_priv->vlv_pctx = NULL;
 }
 
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 26/42] drm/i915: Move GEM activity tracking into a common struct reservation_object
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (24 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 25/42] drm/i915: Use lockless object free Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 10:10   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 27/42] drm: Add reference counting to drm_atomic_state Chris Wilson
                   ` (18 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

In preparation to support many distinct timelines, we need to expand the
activity tracking on the GEM object to handle more than just a request
per engine. We already use the struct reservation_object on the dma-buf
to handle many fence contexts, so integrating that into the GEM object
itself is the preferred solution. (For example, we can now share the same
reservation_object between every consumer/producer using this buffer and
skip the manual import/export via dma-buf.)

v2: Reimplement busy-ioctl (by walking the reservation object), postpone
the ABI change for another day. Similarly use the reservation object to
find the last_write request (if active and from i915) for choosing
display CS flips.

Caveats:

 * busy-ioctl: busy-ioctl only reports on the native fences, it will not
warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is
being rendered to by external fences. It also will not report the same
busy state as wait-ioctl (or polling on the dma-buf) in the same
circumstances. On the plus side, it does retain reporting of which
*i915* engines are engaged with this object.

 * non-blocking atomic modesets take a step backwards as the wait for
render completion blocks the ioctl. This is fixed in a subsequent
patch to use a fence instead for awaiting on the rendering, see
"drm/i915: Restore nonblocking awaits for modesetting"

 * dynamic array manipulation for shared-fences in reservation is slower
than the previous lockless static assignment (e.g. gem_exec_lut_handle
runtime on ivb goes from 42s to 66s), mainly due to atomic operations.

 * loss of object-level retirement callbacks, emulated by VMA retirement
tracking.

 * minor loss of object-level last activity information from debugfs,
could be replaced with per-vma information if desired

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  15 +-
 drivers/gpu/drm/i915/i915_drv.h            |  62 +++----
 drivers/gpu/drm/i915/i915_gem.c            | 266 ++++++++---------------------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |   3 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.c     |  53 +-----
 drivers/gpu/drm/i915/i915_gem_dmabuf.h     |  45 -----
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  82 +++------
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  32 ++++
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   1 +
 drivers/gpu/drm/i915/i915_gem_request.c    |  48 +++---
 drivers/gpu/drm/i915/i915_gem_request.h    |  37 +---
 drivers/gpu/drm/i915/i915_gpu_error.c      |   6 +-
 drivers/gpu/drm/i915/intel_atomic_plane.c  |   2 -
 drivers/gpu/drm/i915/intel_display.c       | 114 +++----------
 drivers/gpu/drm/i915/intel_drv.h           |   3 -
 15 files changed, 215 insertions(+), 554 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_dmabuf.h

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 144013875513..b24a027f5040 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -136,11 +136,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 	unsigned int frontbuffer_bits;
 	int pin_count = 0;
-	enum intel_engine_id id;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ",
+	seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
 		   &obj->base,
 		   get_active_flag(obj),
 		   get_pin_flag(obj),
@@ -149,14 +148,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   get_pin_mapped_flag(obj),
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
-		   obj->base.write_domain);
-	for_each_engine_id(engine, dev_priv, id)
-		seq_printf(m, "%x ",
-			   i915_gem_active_get_seqno(&obj->last_read[id],
-						     &obj->base.dev->struct_mutex));
-	seq_printf(m, "] %x %s%s%s",
-		   i915_gem_active_get_seqno(&obj->last_write,
-					     &obj->base.dev->struct_mutex),
+		   obj->base.write_domain,
 		   i915_cache_level_str(dev_priv, obj->cache_level),
 		   obj->mm.dirty ? " dirty" : "",
 		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -196,8 +188,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (%s mappable)", s);
 	}
 
-	engine = i915_gem_active_get_engine(&obj->last_write,
-					    &dev_priv->drm.struct_mutex);
+	engine = i915_gem_object_last_write_engine(obj);
 	if (engine)
 		seq_printf(m, " (%s)", engine->name);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 89d3b5a16826..7756f5c7f1ab 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -41,6 +41,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/reservation.h>
 #include <linux/shmem_fs.h>
 
 #include <drm/drmP.h>
@@ -2231,21 +2232,12 @@ struct drm_i915_gem_object {
 	struct list_head batch_pool_link;
 
 	unsigned long flags;
-	/**
-	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
-	 */
-#define I915_BO_ACTIVE_SHIFT 0
-#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
-#define __I915_BO_ACTIVE(bo) \
-	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
 	 * Have we taken a reference for the object for incomplete GPU
 	 * activity?
 	 */
-#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
+#define I915_BO_ACTIVE_REF 0
 
 	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
@@ -2273,6 +2265,7 @@ struct drm_i915_gem_object {
 
 	/** Count of VMA actually bound by this object */
 	unsigned int bind_count;
+	unsigned int active_count;
 	unsigned int pin_display;
 
 	struct {
@@ -2312,8 +2305,7 @@ struct drm_i915_gem_object {
 	 * read request. This allows for the CPU to read from an active
 	 * buffer by only waiting for the write to complete.
 	 */
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
-	struct i915_gem_active last_write;
+	struct reservation_object *resv;
 
 	/** References from framebuffers, locks out tiling changes. */
 	unsigned long framebuffer_references;
@@ -2332,6 +2324,8 @@ struct drm_i915_gem_object {
 
 	/** for phys allocated objects */
 	struct drm_dma_handle *phys_handle;
+
+	struct reservation_object __builtin_resv;
 };
 
 static inline struct drm_i915_gem_object *
@@ -2424,35 +2418,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
 }
 
-static inline unsigned long
-i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
-{
-	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
-}
-
 static inline bool
 i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
 {
-	return i915_gem_object_get_active(obj);
-}
-
-static inline void
-i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
-{
-	obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT);
-}
-
-static inline void
-i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
-{
-	obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
-}
-
-static inline bool
-i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
-				  int engine)
-{
-	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
+	return obj->active_count;
 }
 
 static inline bool
@@ -2495,6 +2464,23 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
 	return obj->tiling_and_stride & STRIDE_MASK;
 }
 
+static inline struct intel_engine_cs *
+i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
+{
+	struct intel_engine_cs *engine = NULL;
+	struct fence *fence;
+
+	rcu_read_lock();
+	fence = reservation_object_get_excl_rcu(obj->resv);
+	rcu_read_unlock();
+
+	if (fence && fence_is_i915(fence) && !fence_is_signaled(fence))
+		engine = to_request(fence)->engine;
+	fence_put(fence);
+
+	return engine;
+}
+
 static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
 {
 	i915_gem_object_get(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 28e1064baad5..943ec3fe66ad 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,7 +29,6 @@
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
@@ -447,11 +446,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
 		     long timeout,
 		     struct intel_rps_client *rps)
 {
-	struct reservation_object *resv;
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
-
 	might_sleep();
 #if IS_ENABLED(CONFIG_LOCKDEP)
 	GEM_BUG_ON(!!lockdep_is_held(&obj->base.dev->struct_mutex) !=
@@ -459,33 +453,9 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
 #endif
 	GEM_BUG_ON(timeout < 0);
 
-	if (flags & I915_WAIT_ALL) {
-		active = obj->last_read;
-		active_mask = i915_gem_object_get_active(obj);
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-
-		request = i915_gem_active_get_unlocked(&active[idx]);
-		if (request) {
-			timeout = i915_gem_object_wait_fence(&request->fence,
-							     flags, timeout,
-							     rps);
-			i915_gem_request_put(request);
-		}
-		if (timeout < 0)
-			return timeout;
-	}
-
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv)
-		timeout = i915_gem_object_wait_reservation(resv,
-							   flags, timeout,
-							   rps);
+	timeout = i915_gem_object_wait_reservation(obj->resv,
+						   flags, timeout,
+						   rps);
 	return timeout < 0 ? timeout : 0;
 }
 
@@ -2512,44 +2482,6 @@ err_unlock:
 	goto out_unlock;
 }
 
-static void
-i915_gem_object_retire__write(struct i915_gem_active *active,
-			      struct drm_i915_gem_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, struct drm_i915_gem_object, last_write);
-
-	intel_fb_obj_flush(obj, true, ORIGIN_CS);
-}
-
-static void
-i915_gem_object_retire__read(struct i915_gem_active *active,
-			     struct drm_i915_gem_request *request)
-{
-	int idx = request->engine->id;
-	struct drm_i915_gem_object *obj =
-		container_of(active, struct drm_i915_gem_object, last_read[idx]);
-
-	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
-
-	i915_gem_object_clear_active(obj, idx);
-	if (i915_gem_object_is_active(obj))
-		return;
-
-	/* Bump our place on the bound list to keep it roughly in LRU order
-	 * so that we don't steal from recently used but inactive objects
-	 * (unless we are forced to ofc!)
-	 */
-	if (obj->bind_count)
-		list_move_tail(&obj->global_list,
-			       &request->i915->mm.bound_list);
-
-	if (i915_gem_object_has_active_reference(obj)) {
-		i915_gem_object_clear_active_reference(obj);
-		i915_gem_object_put(obj);
-	}
-}
-
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
 {
 	unsigned long elapsed;
@@ -2915,6 +2847,13 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * In order to prevent it from being recursively closed,
 		 * take a pin on the vma so that the second unbind is
 		 * aborted.
+		 *
+		 * Even more scary is that the retire callback may free
+		 * the object (last active vma). To prevent the explosion
+		 * we defer the actual object free to a worker that can
+		 * only proceed once it acquires the struct_mutex (which
+		 * we currently hold, therefore it cannot free this object
+		 * before we are finished).
 		 */
 		__i915_vma_pin(vma);
 
@@ -3922,83 +3861,42 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
 }
 
 static __always_inline unsigned int
-__busy_set_if_active(const struct i915_gem_active *active,
+__busy_set_if_active(const struct fence *fence,
 		     unsigned int (*flag)(unsigned int id))
 {
-	struct drm_i915_gem_request *request;
-
-	request = rcu_dereference(active->request);
-	if (!request || i915_gem_request_completed(request))
-		return 0;
+	struct drm_i915_gem_request *rq;
 
-	/* This is racy. See __i915_gem_active_get_rcu() for an in detail
-	 * discussion of how to handle the race correctly, but for reporting
-	 * the busy state we err on the side of potentially reporting the
-	 * wrong engine as being busy (but we guarantee that the result
-	 * is at least self-consistent).
-	 *
-	 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
-	 * whilst we are inspecting it, even under the RCU read lock as we are.
-	 * This means that there is a small window for the engine and/or the
-	 * seqno to have been overwritten. The seqno will always be in the
-	 * future compared to the intended, and so we know that if that
-	 * seqno is idle (on whatever engine) our request is idle and the
-	 * return 0 above is correct.
-	 *
-	 * The issue is that if the engine is switched, it is just as likely
-	 * to report that it is busy (but since the switch happened, we know
-	 * the request should be idle). So there is a small chance that a busy
-	 * result is actually the wrong engine.
-	 *
-	 * So why don't we care?
+	/* We have to check the current hw status of the fence as the uABI
+	 * guarantees forward progress. We could rely on the idle worker
+	 * to eventually flush us, but to minimise latency just ask the
+	 * hardware.
 	 *
-	 * For starters, the busy ioctl is a heuristic that is by definition
-	 * racy. Even with perfect serialisation in the driver, the hardware
-	 * state is constantly advancing - the state we report to the user
-	 * is stale.
-	 *
-	 * The critical information for the busy-ioctl is whether the object
-	 * is idle as userspace relies on that to detect whether its next
-	 * access will stall, or if it has missed submitting commands to
-	 * the hardware allowing the GPU to stall. We never generate a
-	 * false-positive for idleness, thus busy-ioctl is reliable at the
-	 * most fundamental level, and we maintain the guarantee that a
-	 * busy object left to itself will eventually become idle (and stay
-	 * idle!).
-	 *
-	 * We allow ourselves the leeway of potentially misreporting the busy
-	 * state because that is an optimisation heuristic that is constantly
-	 * in flux. Being quickly able to detect the busy/idle state is much
-	 * more important than accurate logging of exactly which engines were
-	 * busy.
-	 *
-	 * For accuracy in reporting the engine, we could use
-	 *
-	 *	result = 0;
-	 *	request = __i915_gem_active_get_rcu(active);
-	 *	if (request) {
-	 *		if (!i915_gem_request_completed(request))
-	 *			result = flag(request->engine->exec_id);
-	 *		i915_gem_request_put(request);
-	 *	}
-	 *
-	 * but that still remains susceptible to both hardware and userspace
-	 * races. So we accept making the result of that race slightly worse,
-	 * given the rarity of the race and its low impact on the result.
+	 * Note we only report on the status of native fences.
 	 */
-	return flag(READ_ONCE(request->engine->exec_id));
+	if (!fence_is_i915(fence))
+		return 0;
+
+	/* opencode to_request() in order to avoid const warnings */
+	rq = container_of(fence, struct drm_i915_gem_request, fence);
+	if (i915_gem_request_completed(rq))
+		return 0;
+
+	return flag(rq->engine->exec_id);
 }
 
 static __always_inline unsigned int
-busy_check_reader(const struct i915_gem_active *active)
+busy_check_reader(const struct fence *fence)
 {
-	return __busy_set_if_active(active, __busy_read_flag);
+	return __busy_set_if_active(fence, __busy_read_flag);
 }
 
 static __always_inline unsigned int
-busy_check_writer(const struct i915_gem_active *active)
+busy_check_writer(const struct fence *fence)
 {
-	return __busy_set_if_active(active, __busy_write_id);
+	if (!fence)
+		return 0;
+
+	return __busy_set_if_active(fence, __busy_write_id);
 }
 
 int
@@ -4007,63 +3905,52 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	unsigned long active;
+	struct reservation_object_list *list;
+	unsigned int seq;
 	int err;
 
+	err = -ENOENT;
 	rcu_read_lock();
 	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj) {
-		err = -ENOENT;
+	if (!obj)
 		goto out;
-	}
 
-	args->busy = 0;
-	active = __I915_BO_ACTIVE(obj);
-	if (active) {
-		int idx;
-
-		/* Yes, the lookups are intentionally racy.
-		 *
-		 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
-		 * to regard the value as stale and as our ABI guarantees
-		 * forward progress, we confirm the status of each active
-		 * request with the hardware.
-		 *
-		 * Even though we guard the pointer lookup by RCU, that only
-		 * guarantees that the pointer and its contents remain
-		 * dereferencable and does *not* mean that the request we
-		 * have is the same as the one being tracked by the object.
-		 *
-		 * Consider that we lookup the request just as it is being
-		 * retired and freed. We take a local copy of the pointer,
-		 * but before we add its engine into the busy set, the other
-		 * thread reallocates it and assigns it to a task on another
-		 * engine with a fresh and incomplete seqno. Guarding against
-		 * that requires careful serialisation and reference counting,
-		 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
-		 * instead we expect that if the result is busy, which engines
-		 * are busy is not completely reliable - we only guarantee
-		 * that the object was busy.
-		 */
-
-		for_each_active(active, idx)
-			args->busy |= busy_check_reader(&obj->last_read[idx]);
-
-		/* For ABI sanity, we only care that the write engine is in
-		 * the set of read engines. This should be ensured by the
-		 * ordering of setting last_read/last_write in
-		 * i915_vma_move_to_active(), and then in reverse in retire.
-		 * However, for good measure, we always report the last_write
-		 * request as a busy read as well as being a busy write.
-		 *
-		 * We don't care that the set of active read/write engines
-		 * may change during construction of the result, as it is
-		 * equally liable to change before userspace can inspect
-		 * the result.
-		 */
-		args->busy |= busy_check_writer(&obj->last_write);
+	/* A discrepancy here is that we do not report the status of
+	 * non-i915 fences, i.e. even though we may report the object as idle,
+	 * a call to set-domain may still stall waiting for foreign rendering.
+	 * This also means that wait-ioctl may report an object as busy,
+	 * where busy-ioctl considers it idle.
+	 *
+	 * We trade the ability to warn of foreign fences to report on which
+	 * i915 engines are active for the object.
+	 *
+	 * Alternatively, we can trade that extra information on read/write
+	 * activity with
+	 *	args->busy =
+	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
+	 * to report the overall busyness. This is what the wait-ioctl does.
+	 *
+	 */
+retry:
+	seq = raw_read_seqcount(&obj->resv->seq);
+
+	/* Translate the exclusive fence to the READ *and* WRITE engine */
+	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
+
+	/* Translate shared fences to READ set of engines */
+	list = rcu_dereference(obj->resv->fence);
+	if (list) {
+		unsigned int shared_count = list->shared_count, i;
+		for (i = 0; i < shared_count; ++i) {
+			struct fence *fence = rcu_dereference(list->shared[i]);
+			args->busy |= busy_check_reader(fence);
+		}
 	}
 
+	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
+		goto retry;
+
+	err = 0;
 out:
 	rcu_read_unlock();
 	return err;
@@ -4128,22 +4015,18 @@ err:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
-	int i;
-
 	mutex_init(&obj->mm.lock);
 
 	INIT_LIST_HEAD(&obj->global_list);
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		init_request_active(&obj->last_read[i],
-				    i915_gem_object_retire__read);
-	init_request_active(&obj->last_write,
-			    i915_gem_object_retire__write);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
+	reservation_object_init(&obj->__builtin_resv);
+	obj->resv = &obj->__builtin_resv;
+
 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
 
 	obj->mm.madv = I915_MADV_WILLNEED;
@@ -4281,6 +4164,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		if (obj->base.import_attach)
 			drm_prime_gem_destroy(&obj->base, NULL);
 
+		reservation_object_fini(&obj->__builtin_resv);
 		drm_gem_object_release(&obj->base);
 		i915_gem_info_remove_obj(i915, obj->base.size);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 6b656822bb3a..159a868c7cd2 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -114,8 +114,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
 	list_for_each_entry(tmp, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
-		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
-					     &tmp->base.dev->struct_mutex))
+		if (i915_gem_object_is_active(tmp))
 			break;
 
 		if (tmp->base.size >= size) {
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 9023110cf1ca..96141a658611 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -211,60 +211,17 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.end_cpu_access = i915_gem_end_cpu_access,
 };
 
-static void export_fences(struct drm_i915_gem_object *obj,
-			  struct dma_buf *dma_buf)
-{
-	struct reservation_object *resv = dma_buf->resv;
-	struct drm_i915_gem_request *req;
-	unsigned long active;
-	int idx;
-
-	active = __I915_BO_ACTIVE(obj);
-	if (!active)
-		return;
-
-	/* Serialise with execbuf to prevent concurrent fence-loops */
-	mutex_lock(&obj->base.dev->struct_mutex);
-
-	/* Mark the object for future fences before racily adding old fences */
-	obj->base.dma_buf = dma_buf;
-
-	ww_mutex_lock(&resv->lock, NULL);
-
-	for_each_active(active, idx) {
-		req = i915_gem_active_get(&obj->last_read[idx],
-					  &obj->base.dev->struct_mutex);
-		if (!req)
-			continue;
-
-		if (reservation_object_reserve_shared(resv) == 0)
-			reservation_object_add_shared_fence(resv, &req->fence);
-
-		i915_gem_request_put(req);
-	}
-
-	req = i915_gem_active_get(&obj->last_write,
-				  &obj->base.dev->struct_mutex);
-	if (req) {
-		reservation_object_add_excl_fence(resv, &req->fence);
-		i915_gem_request_put(req);
-	}
-
-	ww_mutex_unlock(&resv->lock);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-	struct dma_buf *dma_buf;
 
 	exp_info.ops = &i915_dmabuf_ops;
 	exp_info.size = gem_obj->size;
 	exp_info.flags = flags;
 	exp_info.priv = gem_obj;
+	exp_info.resv = obj->resv;
 
 	if (obj->ops->dmabuf_export) {
 		int ret = obj->ops->dmabuf_export(obj);
@@ -272,12 +229,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 			return ERR_PTR(ret);
 	}
 
-	dma_buf = drm_gem_dmabuf_export(dev, &exp_info);
-	if (IS_ERR(dma_buf))
-		return dma_buf;
-
-	export_fences(obj, dma_buf);
-	return dma_buf;
+	return drm_gem_dmabuf_export(dev, &exp_info);
 }
 
 static struct sg_table *
@@ -335,6 +287,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
 	obj->base.import_attach = attach;
+	obj->resv = dma_buf->resv;
 
 	/* We use GTT as shorthand for a coherent domain, one that is
 	 * neither in the GPU cache nor in the CPU cache, where all
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/i915_gem_dmabuf.h
deleted file mode 100644
index 91315557e421..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef _I915_GEM_DMABUF_H_
-#define _I915_GEM_DMABUF_H_
-
-#include <linux/dma-buf.h>
-
-static inline struct reservation_object *
-i915_gem_object_get_dmabuf_resv(struct drm_i915_gem_object *obj)
-{
-	struct dma_buf *dma_buf;
-
-	if (obj->base.dma_buf)
-		dma_buf = obj->base.dma_buf;
-	else if (obj->base.import_attach)
-		dma_buf = obj->base.import_attach->dmabuf;
-	else
-		return NULL;
-
-	return dma_buf->resv;
-}
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 062e098b0909..b4865bcc8a3e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,7 +34,6 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
@@ -391,6 +390,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 		unsigned int flushes;
 		int ret;
 
+		/* We can't wait for rendering with pagefaults disabled */
+		if (pagefault_disabled() &&
+		    !reservation_object_test_signaled_rcu(obj->resv, true))
+			return ERR_PTR(-EFAULT);
+
 		ret = i915_gem_obj_prepare_shmem_write(obj, &flushes);
 		if (ret)
 			return ERR_PTR(ret);
@@ -437,6 +441,11 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (use_cpu_reloc(obj))
 			return NULL;
 
+		/* We can't wait for rendering with pagefaults disabled */
+		if (pagefault_disabled() &&
+		    !reservation_object_test_signaled_rcu(obj->resv, true))
+			return ERR_PTR(-EFAULT);
+
 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
 		if (ret)
 			return ERR_PTR(ret);
@@ -552,20 +561,6 @@ repeat:
 	return 0;
 }
 
-static bool object_is_idle(struct drm_i915_gem_object *obj)
-{
-	unsigned long active = i915_gem_object_get_active(obj);
-	int idx;
-
-	for_each_active(active, idx) {
-		if (!i915_gem_active_is_idle(&obj->last_read[idx],
-					     &obj->base.dev->struct_mutex))
-			return false;
-	}
-
-	return true;
-}
-
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_vmas *eb,
@@ -649,10 +644,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EINVAL;
 	}
 
-	/* We can't wait for rendering with pagefaults disabled */
-	if (pagefault_disabled() && !object_is_idle(obj))
-		return -EFAULT;
-
 	ret = relocate_entry(obj, reloc, cache, target_offset);
 	if (ret)
 		return ret;
@@ -1111,44 +1102,20 @@ err:
 	return ret;
 }
 
-static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
-{
-	unsigned int mask;
-
-	mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
-	mask <<= I915_BO_ACTIVE_SHIFT;
-
-	return mask;
-}
-
 static int
 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				struct list_head *vmas)
 {
-	const unsigned int other_rings = eb_other_engines(req);
 	struct i915_vma *vma;
 	int ret;
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		struct reservation_object *resv;
 
-		if (obj->flags & other_rings) {
-			ret = i915_gem_request_await_object
-				(req, obj, obj->base.pending_write_domain);
-			if (ret)
-				return ret;
-		}
-
-		resv = i915_gem_object_get_dmabuf_resv(obj);
-		if (resv) {
-			ret = i915_sw_fence_await_reservation
-				(&req->submit, resv, &i915_fence_ops,
-				 obj->base.pending_write_domain, 10*HZ,
-				 GFP_KERNEL | __GFP_NOWARN);
-			if (ret < 0)
-				return ret;
-		}
+		ret = i915_gem_request_await_object
+			(req, obj, obj->base.pending_write_domain);
+		if (ret)
+			return ret;
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
@@ -1290,8 +1257,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	obj->mm.dirty = true; /* be paranoid  */
-
 	/* Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
 	 * vital here: mark_active adds to the start of the callback list,
@@ -1299,11 +1264,14 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	i915_gem_object_set_active(obj, idx);
-	i915_gem_active_set(&obj->last_read[idx], req);
+	if (!i915_vma_is_active(vma))
+		obj->active_count++;
+	i915_vma_set_active(vma, idx);
+	i915_gem_active_set(&vma->last_read[idx], req);
+	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 
 	if (flags & EXEC_OBJECT_WRITE) {
-		i915_gem_active_set(&obj->last_write, req);
+		i915_gem_active_set(&vma->last_write, req);
 
 		intel_fb_obj_invalidate(obj, ORIGIN_CS);
 
@@ -1313,21 +1281,13 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_gem_active_set(&vma->last_fence, req);
-
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], req);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 }
 
 static void eb_export_fence(struct drm_i915_gem_object *obj,
 			    struct drm_i915_gem_request *req,
 			    unsigned int flags)
 {
-	struct reservation_object *resv;
-
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (!resv)
-		return;
+	struct reservation_object *resv = obj->resv;
 
 	/* Ignore errors from failing to allocate the new fence, we can't
 	 * handle an error right now. Worst case should be missed
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 578026584f42..80669719b44b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -31,6 +31,7 @@
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_frontbuffer.h"
 
 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
 
@@ -3321,6 +3322,7 @@ i915_vma_retire(struct i915_gem_active *active,
 	const unsigned int idx = rq->engine->id;
 	struct i915_vma *vma =
 		container_of(active, struct i915_vma, last_read[idx]);
+	struct drm_i915_gem_object *obj = vma->obj;
 
 	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
 
@@ -3331,6 +3333,34 @@ i915_vma_retire(struct i915_gem_active *active,
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
 		WARN_ON(i915_vma_unbind(vma));
+
+	GEM_BUG_ON(!i915_gem_object_is_active(obj));
+	if (--obj->active_count)
+		return;
+
+	/* Bump our place on the bound list to keep it roughly in LRU order
+	 * so that we don't steal from recently used but inactive objects
+	 * (unless we are forced to ofc!)
+	 */
+	if (obj->bind_count)
+		list_move_tail(&obj->global_list, &rq->i915->mm.bound_list);
+
+	obj->mm.dirty = true; /* be paranoid  */
+
+	if (i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_clear_active_reference(obj);
+		i915_gem_object_put(obj);
+	}
+}
+
+static void
+i915_ggtt_retire__write(struct i915_gem_active *active,
+			struct drm_i915_gem_request *request)
+{
+	struct i915_vma *vma =
+		container_of(active, struct i915_vma, last_write);
+
+	intel_fb_obj_flush(vma->obj, true, ORIGIN_CS);
 }
 
 void i915_vma_destroy(struct i915_vma *vma)
@@ -3374,6 +3404,8 @@ __i915_vma_create(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&vma->exec_list);
 	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
 		init_request_active(&vma->last_read[i], i915_vma_retire);
+	init_request_active(&vma->last_write,
+			    i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL);
 	init_request_active(&vma->last_fence, NULL);
 	list_add(&vma->vm_link, &vm->unbound_list);
 	vma->vm = vm;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c34945f21323..737b8d8f21b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -211,6 +211,7 @@ struct i915_vma {
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	struct i915_gem_active last_write;
 	struct i915_gem_active last_fence;
 
 	/**
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 5ad990add14d..e8278b05074b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -196,6 +196,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	}
 
 	i915_gem_context_put(request->ctx);
+
+	fence_signal(&request->fence);
 	i915_gem_request_put(request);
 }
 
@@ -544,33 +546,41 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write)
 {
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
+	struct fence *excl;
+	int ret = 0;
 
 	if (write) {
-		active_mask = i915_gem_object_get_active(obj);
-		active = obj->last_read;
+		struct fence **shared;
+		unsigned int count, i;
+
+		ret = reservation_object_get_fences_rcu(obj->resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			ret = i915_gem_request_await_fence(to, shared[i]);
+			if (ret)
+				break;
+
+			fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			fence_put(shared[i]);
+		kfree(shared);
 	} else {
-		active_mask = 1;
-		active = &obj->last_write;
+		excl = reservation_object_get_excl_rcu(obj->resv);
 	}
 
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-		int ret;
-
-		request = i915_gem_active_peek(&active[idx],
-					       &obj->base.dev->struct_mutex);
-		if (!request)
-			continue;
+	if (excl) {
+		if (ret == 0)
+			ret = i915_gem_request_await_fence(to, excl);
 
-		ret = i915_gem_request_await_request(to, request);
-		if (ret)
-			return ret;
+		fence_put(excl);
 	}
 
-	return 0;
+	return ret;
 }
 
 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 45998eedda2c..b8feff5857a0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -147,7 +147,7 @@ struct drm_i915_gem_request {
 
 extern const struct fence_ops i915_fence_ops;
 
-static inline bool fence_is_i915(struct fence *fence)
+static inline bool fence_is_i915(const struct fence *fence)
 {
 	return fence->ops == &i915_fence_ops;
 }
@@ -554,22 +554,7 @@ i915_gem_active_isset(const struct i915_gem_active *active)
 }
 
 /**
- * i915_gem_active_is_idle - report whether the active tracker is idle
- * @active - the active tracker
- *
- * i915_gem_active_is_idle() returns true if the active tracker is currently
- * unassigned or if the request is complete (but not yet retired). Requires
- * the caller to hold struct_mutex (but that can be relaxed if desired).
- */
-static inline bool
-i915_gem_active_is_idle(const struct i915_gem_active *active,
-			struct mutex *mutex)
-{
-	return !i915_gem_active_peek(active, mutex);
-}
-
-/**
- * i915_gem_active_wait- waits until the request is completed
+ * i915_gem_active_wait - waits until the request is completed
  * @active - the active request on which to wait
  * @flags - how to wait
  * @timeout - how long to wait at most
@@ -639,24 +624,6 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	return 0;
 }
 
-/* Convenience functions for peeking at state inside active's request whilst
- * guarded by the struct_mutex.
- */
-
-static inline uint32_t
-i915_gem_active_get_seqno(const struct i915_gem_active *active,
-			  struct mutex *mutex)
-{
-	return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex));
-}
-
-static inline struct intel_engine_cs *
-i915_gem_active_get_engine(const struct i915_gem_active *active,
-			   struct mutex *mutex)
-{
-	return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex));
-}
-
 #define for_each_active(mask, idx) \
 	for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 628d5cdf9200..f4ad4a3088df 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -872,9 +872,9 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->name = obj->base.name;
 
 	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&obj->last_read[i]);
-	err->wseqno = __active_get_seqno(&obj->last_write);
-	err->engine = __active_get_engine_id(&obj->last_write);
+		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
+	err->wseqno = __active_get_seqno(&vma->last_write);
+	err->engine = __active_get_engine_id(&vma->last_write);
 
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index b82de3072d4f..a8927929c740 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -84,7 +84,6 @@ intel_plane_duplicate_state(struct drm_plane *plane)
 	state = &intel_state->base;
 
 	__drm_atomic_helper_plane_duplicate_state(plane, state);
-	intel_state->wait_req = NULL;
 
 	return state;
 }
@@ -101,7 +100,6 @@ void
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
-	WARN_ON(state && to_intel_plane_state(state)->wait_req);
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 8beec0b651cd..95f972b158ea 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -37,7 +37,6 @@
 #include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -11937,8 +11936,6 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 static bool use_mmio_flip(struct intel_engine_cs *engine,
 			  struct drm_i915_gem_object *obj)
 {
-	struct reservation_object *resv;
-
 	/*
 	 * This is not being used for older platforms, because
 	 * non-availability of flip done interrupt forces us to use
@@ -11960,12 +11957,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine,
 	else if (i915.enable_execlists)
 		return true;
 
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv && !reservation_object_test_signaled_rcu(resv, false))
-		return true;
-
-	return engine != i915_gem_active_get_engine(&obj->last_write,
-						    &obj->base.dev->struct_mutex);
+	return engine != i915_gem_object_last_write_engine(obj);
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
@@ -12038,17 +12030,8 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 	struct intel_framebuffer *intel_fb =
 		to_intel_framebuffer(crtc->base.primary->fb);
 	struct drm_i915_gem_object *obj = intel_fb->obj;
-	struct reservation_object *resv;
 
-	if (work->flip_queued_req)
-		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  0, MAX_SCHEDULE_TIMEOUT) < 0);
-
-	/* For framebuffer backed by dmabuf, wait for fence */
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv)
-		WARN_ON(reservation_object_wait_timeout_rcu(resv, false, false,
-							    MAX_SCHEDULE_TIMEOUT) < 0);
+	WARN_ON(i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT, NULL) < 0);
 
 	intel_pipe_update_start(crtc);
 
@@ -12249,8 +12232,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
 		engine = &dev_priv->engine[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		engine = i915_gem_active_get_engine(&obj->last_write,
-						    &obj->base.dev->struct_mutex);
+		engine = i915_gem_object_last_write_engine(obj);
 		if (engine == NULL || engine->id != RCS)
 			engine = &dev_priv->engine[BCS];
 	} else {
@@ -12282,9 +12264,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 	if (mmio_flip) {
 		INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
-
-		work->flip_queued_req = i915_gem_active_get(&obj->last_write,
-							    &obj->base.dev->struct_mutex);
 		queue_work(system_unbound_wq, &work->mmio_work);
 	} else {
 		request = i915_gem_request_alloc(engine, engine->last_context);
@@ -14056,13 +14035,10 @@ static int intel_atomic_check(struct drm_device *dev,
 }
 
 static int intel_atomic_prepare_commit(struct drm_device *dev,
-				       struct drm_atomic_state *state,
-				       bool nonblock)
+				       struct drm_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_plane_state *plane_state;
 	struct drm_crtc_state *crtc_state;
-	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	int i, ret;
 
@@ -14085,30 +14061,6 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 	ret = drm_atomic_helper_prepare_planes(dev, state);
 	mutex_unlock(&dev->struct_mutex);
 
-	if (!ret && !nonblock) {
-		for_each_plane_in_state(state, plane, plane_state, i) {
-			struct intel_plane_state *intel_plane_state =
-				to_intel_plane_state(plane_state);
-			long timeout;
-
-			if (!intel_plane_state->wait_req)
-				continue;
-
-			timeout = i915_wait_request(intel_plane_state->wait_req,
-						    I915_WAIT_INTERRUPTIBLE,
-						    MAX_SCHEDULE_TIMEOUT);
-			if (timeout < 0) {
-				/* Any hang should be swallowed by the wait */
-				WARN_ON(timeout == -EIO);
-				mutex_lock(&dev->struct_mutex);
-				drm_atomic_helper_cleanup_planes(dev, state);
-				mutex_unlock(&dev->struct_mutex);
-				ret = timeout;
-				break;
-			}
-		}
-	}
-
 	return ret;
 }
 
@@ -14300,26 +14252,11 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *intel_cstate;
-	struct drm_plane *plane;
-	struct drm_plane_state *plane_state;
 	bool hw_check = intel_state->modeset;
 	unsigned long put_domains[I915_MAX_PIPES] = {};
 	unsigned crtc_vblank_mask = 0;
 	int i;
 
-	for_each_plane_in_state(state, plane, plane_state, i) {
-		struct intel_plane_state *intel_plane_state =
-			to_intel_plane_state(plane_state);
-
-		if (!intel_plane_state->wait_req)
-			continue;
-
-		/* EIO should be eaten, and we can't get interrupted in the
-		 * worker, and blocking commits have waited already. */
-		WARN_ON(i915_wait_request(intel_plane_state->wait_req,
-					  0, MAX_SCHEDULE_TIMEOUT) < 0);
-	}
-
 	drm_atomic_helper_wait_for_dependencies(state);
 
 	if (intel_state->modeset) {
@@ -14526,7 +14463,7 @@ static int intel_atomic_commit(struct drm_device *dev,
 
 	INIT_WORK(&state->commit_work, intel_atomic_commit_work);
 
-	ret = intel_atomic_prepare_commit(dev, state, nonblock);
+	ret = intel_atomic_prepare_commit(dev, state);
 	if (ret) {
 		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
 		return ret;
@@ -14658,7 +14595,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	struct drm_framebuffer *fb = new_state->fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
-	struct reservation_object *resv;
+	long lret;
 	int ret = 0;
 
 	if (!obj && !old_obj)
@@ -14696,39 +14633,34 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		return 0;
 
 	/* For framebuffer backed by dmabuf, wait for fence */
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv) {
-		long lret;
+	lret = i915_gem_object_wait(obj,
+				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				    MAX_SCHEDULE_TIMEOUT,
+				    NULL);
+	if (lret == -ERESTARTSYS)
+		return lret;
 
-		lret = reservation_object_wait_timeout_rcu(resv, false, true,
-							   MAX_SCHEDULE_TIMEOUT);
-		if (lret == -ERESTARTSYS)
-			return lret;
-
-		WARN(lret < 0, "waiting returns %li\n", lret);
-	}
+	WARN(lret < 0, "waiting returns %li\n", lret);
 
 	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 	    INTEL_INFO(dev)->cursor_needs_physical) {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
 		ret = i915_gem_object_attach_phys(obj, align);
-		if (ret)
+		if (ret) {
 			DRM_DEBUG_KMS("failed to attach phys object\n");
+			return ret;
+		}
 	} else {
 		struct i915_vma *vma;
 
 		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-		if (IS_ERR(vma))
-			ret = PTR_ERR(vma);
-	}
-
-	if (ret == 0) {
-		to_intel_plane_state(new_state)->wait_req =
-			i915_gem_active_get(&obj->last_write,
-					    &obj->base.dev->struct_mutex);
+		if (IS_ERR(vma)) {
+			DRM_DEBUG_KMS("failed to pin object\n");
+			return PTR_ERR(vma);
+		}
 	}
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -14746,7 +14678,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 {
 	struct drm_device *dev = plane->dev;
 	struct intel_plane_state *old_intel_state;
-	struct intel_plane_state *intel_state = to_intel_plane_state(plane->state);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
 	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
 
@@ -14758,9 +14689,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 	if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR ||
 	    !INTEL_INFO(dev)->cursor_needs_physical))
 		intel_unpin_fb_obj(old_state->fb, old_state->rotation);
-
-	i915_gem_request_assign(&intel_state->wait_req, NULL);
-	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
 }
 
 int
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index f48e79ae2ac6..78210d14b660 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -401,9 +401,6 @@ struct intel_plane_state {
 	int scaler_id;
 
 	struct drm_intel_sprite_colorkey ckey;
-
-	/* async flip related structures */
-	struct drm_i915_gem_request *wait_req;
 };
 
 struct intel_initial_plane_config {
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 27/42] drm: Add reference counting to drm_atomic_state
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (25 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 26/42] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 28/42] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
                   ` (17 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter, dri-devel

drm_atomic_state has a complicated single owner model that tracks the
single reference from allocation through to destruction on another
thread - or perhaps on a local error path. We can simplify this tracking
by using reference counting (at a cost of a few more atomics). This is
even more beneficial when the lifetime of the state becomes more
convoluted than being passed to a single worker thread for the commit.

v2: Double check !intel atomic_commit functions for missing gets
v3: Update kerneldocs

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Eric Engestrom <eric.engestrom@imgtec.com>
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c |  3 +-
 drivers/gpu/drm/drm_atomic.c                 | 25 +++----
 drivers/gpu/drm/drm_atomic_helper.c          | 98 +++++++---------------------
 drivers/gpu/drm/drm_fb_helper.c              |  9 +--
 drivers/gpu/drm/exynos/exynos_drm_drv.c      |  3 +-
 drivers/gpu/drm/i915/i915_debugfs.c          |  5 +-
 drivers/gpu/drm/i915/intel_display.c         | 31 +++++----
 drivers/gpu/drm/i915/intel_sprite.c          |  4 +-
 drivers/gpu/drm/mediatek/mtk_drm_drv.c       |  3 +-
 drivers/gpu/drm/msm/msm_atomic.c             |  3 +-
 drivers/gpu/drm/omapdrm/omap_drv.c           |  3 +-
 drivers/gpu/drm/rcar-du/rcar_du_kms.c        |  3 +-
 drivers/gpu/drm/sti/sti_drv.c                |  3 +-
 drivers/gpu/drm/tegra/drm.c                  |  3 +-
 drivers/gpu/drm/tilcdc/tilcdc_drv.c          |  2 -
 drivers/gpu/drm/vc4/vc4_kms.c                |  3 +-
 include/drm/drm_atomic.h                     | 31 ++++++++-
 include/drm/drm_plane.h                      |  1 -
 18 files changed, 102 insertions(+), 131 deletions(-)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 5f484310bee9..9f6222895212 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -464,7 +464,7 @@ atmel_hlcdc_dc_atomic_complete(struct atmel_hlcdc_dc_commit *commit)
 
 	drm_atomic_helper_cleanup_planes(dev, old_state);
 
-	drm_atomic_state_free(old_state);
+	drm_atomic_state_put(old_state);
 
 	/* Complete the commit, wake up any waiter. */
 	spin_lock(&dc->commit.wait.lock);
@@ -521,6 +521,7 @@ static int atmel_hlcdc_dc_atomic_commit(struct drm_device *dev,
 	/* Swap the state, this is the point of no return. */
 	drm_atomic_helper_swap_state(state, true);
 
+	drm_atomic_state_get(state);
 	if (async)
 		queue_work(dc->wq, &commit->work);
 	else
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 23739609427d..5dd70540219c 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -74,6 +74,8 @@ EXPORT_SYMBOL(drm_atomic_state_default_release);
 int
 drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
 {
+	kref_init(&state->ref);
+
 	/* TODO legacy paths should maybe do a better job about
 	 * setting this appropriately?
 	 */
@@ -215,22 +217,16 @@ void drm_atomic_state_clear(struct drm_atomic_state *state)
 EXPORT_SYMBOL(drm_atomic_state_clear);
 
 /**
- * drm_atomic_state_free - free all memory for an atomic state
- * @state: atomic state to deallocate
+ * __drm_atomic_state_free - free all memory for an atomic state
+ * @ref: This atomic state to deallocate
  *
  * This frees all memory associated with an atomic state, including all the
  * per-object state for planes, crtcs and connectors.
  */
-void drm_atomic_state_free(struct drm_atomic_state *state)
+void __drm_atomic_state_free(struct kref *ref)
 {
-	struct drm_device *dev;
-	struct drm_mode_config *config;
-
-	if (!state)
-		return;
-
-	dev = state->dev;
-	config = &dev->mode_config;
+	struct drm_atomic_state *state = container_of(ref, typeof(*state), ref);
+	struct drm_mode_config *config = &state->dev->mode_config;
 
 	drm_atomic_state_clear(state);
 
@@ -243,7 +239,7 @@ void drm_atomic_state_free(struct drm_atomic_state *state)
 		kfree(state);
 	}
 }
-EXPORT_SYMBOL(drm_atomic_state_free);
+EXPORT_SYMBOL(__drm_atomic_state_free);
 
 /**
  * drm_atomic_get_crtc_state - get crtc state
@@ -1742,7 +1738,7 @@ retry:
 	if (arg->flags & DRM_MODE_ATOMIC_TEST_ONLY) {
 		/*
 		 * Unlike commit, check_only does not clean up state.
-		 * Below we call drm_atomic_state_free for it.
+		 * Below we call drm_atomic_state_put for it.
 		 */
 		ret = drm_atomic_check_only(state);
 	} else if (arg->flags & DRM_MODE_ATOMIC_NONBLOCK) {
@@ -1775,8 +1771,7 @@ out:
 		goto retry;
 	}
 
-	if (ret || arg->flags & DRM_MODE_ATOMIC_TEST_ONLY)
-		drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index c3f83476f996..df6e9b7c377c 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1207,7 +1207,7 @@ static void commit_tail(struct drm_atomic_state *state)
 
 	drm_atomic_helper_commit_cleanup_done(state);
 
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 }
 
 static void commit_work(struct work_struct *work)
@@ -1289,6 +1289,7 @@ int drm_atomic_helper_commit(struct drm_device *dev,
 	 * make sure work items don't artifically stall on each another.
 	 */
 
+	drm_atomic_state_get(state);
 	if (nonblock)
 		queue_work(system_unbound_wq, &state->commit_work);
 	else
@@ -1590,7 +1591,7 @@ EXPORT_SYMBOL(drm_atomic_helper_commit_hw_done);
  *
  * This signals completion of the atomic update @state, including any cleanup
  * work. If used, it must be called right before calling
- * drm_atomic_state_free().
+ * drm_atomic_state_put().
  *
  * This is part of the atomic helper support for nonblocking commits, see
  * drm_atomic_helper_setup_commit() for an overview.
@@ -2113,18 +2114,13 @@ retry:
 		state->legacy_cursor_update = true;
 
 	ret = drm_atomic_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -2186,18 +2182,13 @@ retry:
 		goto fail;
 
 	ret = drm_atomic_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -2326,18 +2317,13 @@ retry:
 		goto fail;
 
 	ret = drm_atomic_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -2479,11 +2465,8 @@ int drm_atomic_helper_disable_all(struct drm_device *dev,
 	}
 
 	err = drm_atomic_commit(state);
-
 free:
-	if (err < 0)
-		drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return err;
 }
 EXPORT_SYMBOL(drm_atomic_helper_disable_all);
@@ -2534,7 +2517,7 @@ retry:
 
 	err = drm_atomic_helper_disable_all(dev, &ctx);
 	if (err < 0) {
-		drm_atomic_state_free(state);
+		drm_atomic_state_put(state);
 		state = ERR_PTR(err);
 		goto unlock;
 	}
@@ -2623,18 +2606,13 @@ retry:
 		goto fail;
 
 	ret = drm_atomic_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -2683,18 +2661,13 @@ retry:
 		goto fail;
 
 	ret = drm_atomic_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -2743,18 +2716,13 @@ retry:
 		goto fail;
 
 	ret = drm_atomic_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -2827,18 +2795,13 @@ retry:
 	}
 
 	ret = drm_atomic_nonblocking_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -2914,19 +2877,14 @@ retry:
 	crtc_state->active = active;
 
 	ret = drm_atomic_commit(state);
-	if (ret != 0)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
 	connector->dpms = old_mode;
-	drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
@@ -3333,7 +3291,7 @@ drm_atomic_helper_duplicate_state(struct drm_device *dev,
 
 free:
 	if (err < 0) {
-		drm_atomic_state_free(state);
+		drm_atomic_state_put(state);
 		state = ERR_PTR(err);
 	}
 
@@ -3448,22 +3406,14 @@ retry:
 		goto fail;
 
 	ret = drm_atomic_commit(state);
-	if (ret)
-		goto fail;
-
-	/* Driver takes ownership of state on successful commit. */
-
-	drm_property_unreference_blob(blob);
-
-	return 0;
 fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 	drm_property_unreference_blob(blob);
-
 	return ret;
+
 backoff:
 	drm_atomic_state_clear(state);
 	drm_atomic_legacy_backoff(state);
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 03414bde1f15..22d4f0e22101 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -367,9 +367,7 @@ fail:
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	if (ret != 0)
-		drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
 
 backoff:
@@ -1361,16 +1359,13 @@ retry:
 	info->var.xoffset = var->xoffset;
 	info->var.yoffset = var->yoffset;
 
-
 fail:
 	drm_atomic_clean_old_fb(dev, plane_mask, ret);
 
 	if (ret == -EDEADLK)
 		goto backoff;
 
-	if (ret != 0)
-		drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 	return ret;
 
 backoff:
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index def78c8c1780..4a21a745c373 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -69,7 +69,7 @@ static void exynos_atomic_commit_complete(struct exynos_atomic_commit *commit)
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 
 	spin_lock(&priv->lock);
 	priv->pending &= ~commit->crtcs;
@@ -254,6 +254,7 @@ int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state,
 
 	drm_atomic_helper_swap_state(state, true);
 
+	drm_atomic_state_get(state);
 	if (nonblock)
 		schedule_work(&commit->work);
 	else
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b24a027f5040..5d7191393cb6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4026,10 +4026,9 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
 
 	ret = drm_atomic_commit(state);
 out:
-	drm_modeset_unlock_all(dev);
 	WARN(ret, "Toggling workaround to %i returns %i\n", enable, ret);
-	if (ret)
-		drm_atomic_state_free(state);
+	drm_modeset_unlock_all(dev);
+	drm_atomic_state_put(state);
 }
 
 static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 95f972b158ea..435754ed7e77 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3583,7 +3583,7 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
 	return;
 
 err:
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 }
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
@@ -3642,6 +3642,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
 		intel_hpd_init(dev_priv);
 	}
 
+	if (state)
+		drm_atomic_state_put(state);
 	drm_modeset_drop_locks(ctx);
 	drm_modeset_acquire_fini(ctx);
 	mutex_unlock(&dev->mode_config.mutex);
@@ -6881,7 +6883,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 
 	dev_priv->display.crtc_disable(crtc_state, state);
 
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 
 	DRM_DEBUG_KMS("[CRTC:%d:%s] hw state adjusted, was enabled, now disabled\n",
 		      crtc->base.id, crtc->name);
@@ -11266,8 +11268,8 @@ found:
 	return true;
 
 fail:
-	drm_atomic_state_free(state);
-	drm_atomic_state_free(restore_state);
+	drm_atomic_state_put(state);
+	drm_atomic_state_put(restore_state);
 	restore_state = state = NULL;
 
 	if (ret == -EDEADLK) {
@@ -11296,10 +11298,9 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 		return;
 
 	ret = drm_atomic_commit(state);
-	if (ret) {
+	if (ret)
 		DRM_DEBUG_KMS("Couldn't release load detect pipe: %i\n", ret);
-		drm_atomic_state_free(state);
-	}
+	drm_atomic_state_put(state);
 }
 
 static int i9xx_pll_refclk(struct drm_device *dev,
@@ -12347,8 +12348,7 @@ retry:
 			goto retry;
 		}
 
-		if (ret)
-			drm_atomic_state_free(state);
+		drm_atomic_state_put(state);
 
 		if (ret == 0 && event) {
 			spin_lock_irq(&dev->event_lock);
@@ -14392,7 +14392,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 
 	drm_atomic_helper_commit_cleanup_done(state);
 
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 
 	/* As one of the primary mmio accessors, KMS has a high likelihood
 	 * of triggering bugs in unclaimed access. After we finish
@@ -14475,6 +14475,7 @@ static int intel_atomic_commit(struct drm_device *dev,
 	intel_shared_dpll_commit(state);
 	intel_atomic_track_fbs(state);
 
+	drm_atomic_state_get(state);
 	if (nonblock)
 		queue_work(system_unbound_wq, &state->commit_work);
 	else
@@ -14516,9 +14517,8 @@ retry:
 		goto retry;
 	}
 
-	if (ret)
 out:
-		drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 }
 
 /*
@@ -16249,8 +16249,8 @@ retry:
 		dev_priv->display.optimize_watermarks(cs);
 	}
 
-	drm_atomic_state_free(state);
 fail:
+	drm_atomic_state_put(state);
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 }
@@ -16887,10 +16887,9 @@ void intel_display_resume(struct drm_device *dev)
 	drm_modeset_acquire_fini(&ctx);
 	mutex_unlock(&dev->mode_config.mutex);
 
-	if (ret) {
+	if (ret)
 		DRM_ERROR("Restoring old state failed with %i\n", ret);
-		drm_atomic_state_free(state);
-	}
+	drm_atomic_state_put(state);
 }
 
 void intel_modeset_gem_init(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 73a521fdf1bd..be3e04623e2a 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -987,9 +987,7 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
 		drm_modeset_backoff(&ctx);
 	}
 
-	if (ret)
-		drm_atomic_state_free(state);
-
+	drm_atomic_state_put(state);
 out:
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index cf83f6507ec8..db61aa5f32ef 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -83,7 +83,7 @@ static void mtk_atomic_complete(struct mtk_drm_private *private,
 	drm_atomic_helper_wait_for_vblanks(drm, state);
 
 	drm_atomic_helper_cleanup_planes(drm, state);
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 }
 
 static void mtk_atomic_work(struct work_struct *work)
@@ -110,6 +110,7 @@ static int mtk_atomic_commit(struct drm_device *drm,
 
 	drm_atomic_helper_swap_state(state, true);
 
+	drm_atomic_state_get(state);
 	if (async)
 		mtk_atomic_schedule(private, state);
 	else
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 73bae382eac3..db193f835298 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -141,7 +141,7 @@ static void complete_commit(struct msm_commit *c, bool async)
 
 	kms->funcs->complete_commit(kms, state);
 
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 
 	commit_destroy(c);
 }
@@ -256,6 +256,7 @@ int msm_atomic_commit(struct drm_device *dev,
 	 * current layout.
 	 */
 
+	drm_atomic_state_get(state);
 	if (nonblock) {
 		queue_work(priv->atomic_wq, &c->work);
 		return 0;
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index e1cfba51cff6..1735c7accf72 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -105,7 +105,7 @@ static void omap_atomic_complete(struct omap_atomic_state_commit *commit)
 
 	dispc_runtime_put();
 
-	drm_atomic_state_free(old_state);
+	drm_atomic_state_put(old_state);
 
 	/* Complete the commit, wake up any waiter. */
 	spin_lock(&priv->commit.lock);
@@ -176,6 +176,7 @@ static int omap_atomic_commit(struct drm_device *dev,
 	/* Swap the state, this is the point of no return. */
 	drm_atomic_helper_swap_state(state, true);
 
+	drm_atomic_state_get(state);
 	if (nonblock)
 		schedule_work(&commit->work);
 	else
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index bd9c3bb9252c..c76ed9ee6a01 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -264,7 +264,7 @@ static void rcar_du_atomic_complete(struct rcar_du_commit *commit)
 
 	drm_atomic_helper_cleanup_planes(dev, old_state);
 
-	drm_atomic_state_free(old_state);
+	drm_atomic_state_put(old_state);
 
 	/* Complete the commit, wake up any waiter. */
 	spin_lock(&rcdu->commit.wait.lock);
@@ -330,6 +330,7 @@ static int rcar_du_atomic_commit(struct drm_device *dev,
 	/* Swap the state, this is the point of no return. */
 	drm_atomic_helper_swap_state(state, true);
 
+	drm_atomic_state_get(state);
 	if (nonblock)
 		schedule_work(&commit->work);
 	else
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index 2784919a7366..7087499969bc 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -184,7 +184,7 @@ static void sti_atomic_complete(struct sti_private *private,
 	drm_atomic_helper_wait_for_vblanks(drm, state);
 
 	drm_atomic_helper_cleanup_planes(drm, state);
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 }
 
 static void sti_atomic_work(struct work_struct *work)
@@ -217,6 +217,7 @@ static int sti_atomic_commit(struct drm_device *drm,
 
 	drm_atomic_helper_swap_state(state, true);
 
+	drm_atomic_state_get(state);
 	if (nonblock)
 		sti_atomic_schedule(private, state);
 	else
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 8ab47b502d83..a9630c2d6cb3 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -63,7 +63,7 @@ static void tegra_atomic_complete(struct tegra_drm *tegra,
 	drm_atomic_helper_wait_for_vblanks(drm, state);
 
 	drm_atomic_helper_cleanup_planes(drm, state);
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 }
 
 static void tegra_atomic_work(struct work_struct *work)
@@ -96,6 +96,7 @@ static int tegra_atomic_commit(struct drm_device *drm,
 
 	drm_atomic_helper_swap_state(state, true);
 
+	drm_atomic_state_get(state);
 	if (nonblock)
 		tegra_atomic_schedule(tegra, state);
 	else
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index a694977c32f4..147fb28287ae 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -143,8 +143,6 @@ static int tilcdc_commit(struct drm_device *dev,
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 
-	drm_atomic_state_free(state);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index c1f65c6c8e60..f31f72af8551 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -61,7 +61,7 @@ vc4_atomic_complete_commit(struct vc4_commit *c)
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 
-	drm_atomic_state_free(state);
+	drm_atomic_state_put(state);
 
 	up(&vc4->async_modeset);
 
@@ -173,6 +173,7 @@ static int vc4_atomic_commit(struct drm_device *dev,
 	 * current layout.
 	 */
 
+	drm_atomic_state_get(state);
 	if (nonblock) {
 		vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
 				   vc4_atomic_complete_commit_seqno_cb);
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index 9701f2dfb784..01e1116f0084 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -153,6 +153,7 @@ struct __drm_connnectors_state {
 
 /**
  * struct drm_atomic_state - the global state object for atomic updates
+ * @ref: count of all references to this state (will not be freed until zero)
  * @dev: parent DRM device
  * @allow_modeset: allow full modeset
  * @legacy_cursor_update: hint to enforce legacy cursor IOCTL semantics
@@ -164,6 +165,8 @@ struct __drm_connnectors_state {
  * @acquire_ctx: acquire context for this atomic modeset state update
  */
 struct drm_atomic_state {
+	struct kref ref;
+
 	struct drm_device *dev;
 	bool allow_modeset : 1;
 	bool legacy_cursor_update : 1;
@@ -193,7 +196,33 @@ static inline void drm_crtc_commit_get(struct drm_crtc_commit *commit)
 struct drm_atomic_state * __must_check
 drm_atomic_state_alloc(struct drm_device *dev);
 void drm_atomic_state_clear(struct drm_atomic_state *state);
-void drm_atomic_state_free(struct drm_atomic_state *state);
+
+/**
+ * drm_atomic_state_get - acquire a reference to the atomic state
+ * @state: The atomic state
+ *
+ * Returns a new reference to the @state
+ */
+static inline struct drm_atomic_state *
+drm_atomic_state_get(struct drm_atomic_state *state)
+{
+	kref_get(&state->ref);
+	return state;
+}
+
+void __drm_atomic_state_free(struct kref *ref);
+
+/**
+ * drm_atomic_state_put - release a reference to the atomic state
+ * @state: The atomic state
+ *
+ * This releases a reference to @state which is freed after removing the
+ * final reference. No locking required and callable from any context.
+ */
+static inline void drm_atomic_state_put(struct drm_atomic_state *state)
+{
+	kref_put(&state->ref, __drm_atomic_state_free);
+}
 
 int  __must_check
 drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state);
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 43cf193e54d6..02353904cdba 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -88,7 +88,6 @@ struct drm_plane_state {
 	struct drm_atomic_state *state;
 };
 
-
 /**
  * struct drm_plane_funcs - driver plane control functions
  */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 28/42] drm/i915: Restore nonblocking awaits for modesetting
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (26 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 27/42] drm: Add reference counting to drm_atomic_state Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 29/42] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
                   ` (16 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

After combining the dma-buf reservation object and the GEM reservation
object, we lost the ability to do a nonblocking wait on the i915 request
(as we blocked upon the reservation object during prepare_fb). We can
instead convert the reservation object into a fence upon which we can
asynchronously wait (including a forced timeout in case the DMA fence is
never signaled).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 82 +++++++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_drv.h     |  2 +
 2 files changed, 55 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 435754ed7e77..5b7db062c776 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14410,12 +14410,33 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 
 static void intel_atomic_commit_work(struct work_struct *work)
 {
-	struct drm_atomic_state *state = container_of(work,
-						      struct drm_atomic_state,
-						      commit_work);
+	struct drm_atomic_state *state =
+		container_of(work, struct drm_atomic_state, commit_work);
+
 	intel_atomic_commit_tail(state);
 }
 
+static int __i915_sw_fence_call
+intel_atomic_commit_ready(struct i915_sw_fence *fence,
+			  enum i915_sw_fence_notify notify)
+{
+	struct intel_atomic_state *state =
+		container_of(fence, struct intel_atomic_state, commit_ready);
+
+	switch (notify) {
+	case FENCE_COMPLETE:
+		if (state->base.commit_work.func)
+			queue_work(system_unbound_wq, &state->base.commit_work);
+		break;
+
+	case FENCE_FREE:
+		drm_atomic_state_put(&state->base);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static void intel_atomic_track_fbs(struct drm_atomic_state *state)
 {
 	struct drm_plane_state *old_plane_state;
@@ -14461,11 +14482,14 @@ static int intel_atomic_commit(struct drm_device *dev,
 	if (ret)
 		return ret;
 
-	INIT_WORK(&state->commit_work, intel_atomic_commit_work);
+	drm_atomic_state_get(state);
+	i915_sw_fence_init(&intel_state->commit_ready,
+			   intel_atomic_commit_ready);
 
 	ret = intel_atomic_prepare_commit(dev, state);
 	if (ret) {
 		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
+		i915_sw_fence_commit(&intel_state->commit_ready);
 		return ret;
 	}
 
@@ -14476,10 +14500,14 @@ static int intel_atomic_commit(struct drm_device *dev,
 	intel_atomic_track_fbs(state);
 
 	drm_atomic_state_get(state);
-	if (nonblock)
-		queue_work(system_unbound_wq, &state->commit_work);
-	else
+	INIT_WORK(&state->commit_work,
+		  nonblock ? intel_atomic_commit_work : NULL);
+
+	i915_sw_fence_commit(&intel_state->commit_ready);
+	if (!nonblock) {
+		i915_sw_fence_wait(&intel_state->commit_ready);
 		intel_atomic_commit_tail(state);
+	}
 
 	return 0;
 }
@@ -14591,19 +14619,21 @@ int
 intel_prepare_plane_fb(struct drm_plane *plane,
 		       struct drm_plane_state *new_state)
 {
+	struct intel_atomic_state *intel_state =
+		to_intel_atomic_state(new_state->state);
 	struct drm_device *dev = plane->dev;
 	struct drm_framebuffer *fb = new_state->fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
-	long lret;
-	int ret = 0;
+	int ret;
 
 	if (!obj && !old_obj)
 		return 0;
 
 	if (old_obj) {
 		struct drm_crtc_state *crtc_state =
-			drm_atomic_get_existing_crtc_state(new_state->state, plane->state->crtc);
+			drm_atomic_get_existing_crtc_state(new_state->state,
+							   plane->state->crtc);
 
 		/* Big Hammer, we also need to ensure that any pending
 		 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
@@ -14616,31 +14646,25 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		 * This should only fail upon a hung GPU, in which case we
 		 * can safely continue.
 		 */
-		if (needs_modeset(crtc_state))
-			ret = i915_gem_object_wait(old_obj,
-						   I915_WAIT_INTERRUPTIBLE |
-						   I915_WAIT_LOCKED,
-						   MAX_SCHEDULE_TIMEOUT,
-						   NULL);
-		if (ret) {
-			/* GPU hangs should have been swallowed by the wait */
-			WARN_ON(ret == -EIO);
-			return ret;
+		if (needs_modeset(crtc_state)) {
+			ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
+							      old_obj->resv, NULL,
+							      false, 0,
+							      GFP_KERNEL);
+			if (ret < 0)
+				return ret;
 		}
 	}
 
 	if (!obj)
 		return 0;
 
-	/* For framebuffer backed by dmabuf, wait for fence */
-	lret = i915_gem_object_wait(obj,
-				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
-				    MAX_SCHEDULE_TIMEOUT,
-				    NULL);
-	if (lret == -ERESTARTSYS)
-		return lret;
-
-	WARN(lret < 0, "waiting returns %li\n", lret);
+	ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
+					      obj->resv, NULL,
+					      false, 10*HZ,
+					      GFP_KERNEL);
+	if (ret < 0)
+		return ret;
 
 	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 	    INTEL_INFO(dev)->cursor_needs_physical) {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 78210d14b660..d1b3101f3aee 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -365,6 +365,8 @@ struct intel_atomic_state {
 
 	/* Gen9+ only */
 	struct skl_wm_values wm_results;
+
+	struct i915_sw_fence commit_ready;
 };
 
 struct intel_plane_state {
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 29/42] drm/i915: Combine seqno + tracking into a global timeline struct
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (27 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 28/42] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 30/42] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
                   ` (15 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Our timelines are more than just a seqno. They also provide an ordered
list of requests to be executed. Due to the restriction of handling
individual address spaces, we are limited to a timeline per address
space but we use a fence context per engine within.

Our first step to introducing independent timelines per context (i.e. to
allow each context to have a queue of requests to execute that have a
defined set of dependencies on other requests) is to provide a timeline
abstraction for the global execution queue.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile              |  1 +
 drivers/gpu/drm/i915/i915_debugfs.c        | 29 ++++-------
 drivers/gpu/drm/i915/i915_drv.c            |  6 ++-
 drivers/gpu/drm/i915/i915_drv.h            |  9 ++--
 drivers/gpu/drm/i915/i915_gem.c            | 80 +++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem.h            |  2 +
 drivers/gpu/drm/i915/i915_gem_request.c    | 81 ++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_request.h    |  1 +
 drivers/gpu/drm/i915/i915_gem_timeline.c   | 64 +++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_timeline.h   | 70 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gpu_error.c      |  6 +--
 drivers/gpu/drm/i915/i915_guc_submission.c |  3 +-
 drivers/gpu/drm/i915/i915_irq.c            |  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c     | 15 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h    | 36 ++-----------
 15 files changed, 284 insertions(+), 121 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b94a90f34d2d..bf07b9de078d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -42,6 +42,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_shrinker.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_timeline.o \
 	  i915_gem_userptr.o \
 	  i915_gpu_error.o \
 	  i915_trace_points.o \
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5d7191393cb6..4e452da96c39 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -560,7 +560,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
 					   engine->name,
 					   i915_gem_request_get_seqno(work->flip_queued_req),
-					   dev_priv->next_seqno,
+					   dev_priv->gt.global_timeline.next_seqno,
 					   intel_engine_get_seqno(engine),
 					   i915_gem_request_completed(work->flip_queued_req));
 			} else
@@ -668,13 +668,13 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 		int count;
 
 		count = 0;
-		list_for_each_entry(req, &engine->request_list, link)
+		list_for_each_entry(req, &engine->timeline->requests, link)
 			count++;
 		if (count == 0)
 			continue;
 
 		seq_printf(m, "%s requests: %d\n", engine->name, count);
-		list_for_each_entry(req, &engine->request_list, link)
+		list_for_each_entry(req, &engine->timeline->requests, link)
 			print_request(m, req, "    ");
 
 		any++;
@@ -1041,15 +1041,8 @@ static int
 i915_next_seqno_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
-	int ret;
-
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	*val = dev_priv->next_seqno;
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	*val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno);
 	return 0;
 }
 
@@ -1064,7 +1057,7 @@ i915_next_seqno_set(void *data, u64 val)
 	if (ret)
 		return ret;
 
-	ret = i915_gem_set_seqno(dev, val);
+	ret = i915_gem_set_global_seqno(dev, val);
 	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
@@ -1353,7 +1346,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
 			   engine->hangcheck.seqno,
 			   seqno[id],
-			   engine->last_submitted_seqno);
+			   engine->timeline->last_submitted_seqno);
 		seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
 			   yesno(intel_engine_has_waiter(engine)),
 			   yesno(test_bit(engine->id,
@@ -3093,7 +3086,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 		seq_printf(m, "%s\n", engine->name);
 		seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [score %d]\n",
 			   intel_engine_get_seqno(engine),
-			   engine->last_submitted_seqno,
+			   engine->timeline->last_submitted_seqno,
 			   engine->hangcheck.seqno,
 			   engine->hangcheck.score);
 
@@ -3101,14 +3094,14 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 		seq_printf(m, "\tRequests:\n");
 
-		rq = list_first_entry(&engine->request_list,
+		rq = list_first_entry(&engine->timeline->requests,
 				struct drm_i915_gem_request, link);
-		if (&rq->link != &engine->request_list)
+		if (&rq->link != &engine->timeline->requests)
 			print_request(m, rq, "\t\tfirst  ");
 
-		rq = list_last_entry(&engine->request_list,
+		rq = list_last_entry(&engine->timeline->requests,
 				struct drm_i915_gem_request, link);
-		if (&rq->link != &engine->request_list)
+		if (&rq->link != &engine->timeline->requests)
 			print_request(m, rq, "\t\tlast   ");
 
 		rq = i915_gem_find_active_request(engine);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2e29eedd21b2..cd00b021bdfb 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -848,7 +848,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_init_display_hooks(dev_priv);
 	intel_init_clock_gating_hooks(dev_priv);
 	intel_init_audio_hooks(dev_priv);
-	i915_gem_load_init(&dev_priv->drm);
+	ret = i915_gem_load_init(&dev_priv->drm);
+	if (ret < 0)
+		goto err_gvt;
 
 	intel_display_crc_init(dev_priv);
 
@@ -858,6 +860,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 
 	return 0;
 
+err_gvt:
+	intel_gvt_cleanup(dev_priv);
 err_workqueues:
 	i915_workqueues_cleanup(dev_priv);
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7756f5c7f1ab..582996ebebe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -63,6 +63,7 @@
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
 #include "i915_gem_request.h"
+#include "i915_gem_timeline.h"
 
 #include "intel_gvt.h"
 
@@ -1816,7 +1817,6 @@ struct drm_i915_private {
 	struct i915_gem_context *kernel_context;
 	struct intel_engine_cs engine[I915_NUM_ENGINES];
 	struct i915_vma *semaphore;
-	u32 next_seqno;
 
 	struct drm_dma_handle *status_page_dmah;
 	struct resource mch_res;
@@ -2074,6 +2074,9 @@ struct drm_i915_private {
 		void (*resume)(struct drm_i915_private *);
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
+		struct list_head timelines;
+		struct i915_gem_timeline global_timeline;
+
 		/**
 		 * Is the GPU currently considered idle, or busy executing
 		 * userspace requests? Whilst idle, we allow runtime power
@@ -3160,7 +3163,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
-void i915_gem_load_init(struct drm_device *dev);
+int i915_gem_load_init(struct drm_device *dev);
 void i915_gem_load_cleanup(struct drm_device *dev);
 void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
@@ -3326,7 +3329,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits);
 
-int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
+int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 943ec3fe66ad..798a087cb5d4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -371,7 +371,7 @@ out:
 	if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
 		i915_gem_request_retire_upto(rq);
 
-	if (rps && rq->fence.seqno == rq->engine->last_submitted_seqno) {
+	if (rps && rq->fence.seqno == rq->timeline->last_submitted_seqno) {
 		/* The GPU is now idle and this client has stalled.
 		 * Since no other client has submitted a request in the
 		 * meantime, assume that this client is the only one
@@ -2526,7 +2526,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 * extra delay for a recent interrupt is pointless. Hence, we do
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
-	list_for_each_entry(request, &engine->request_list, link) {
+	list_for_each_entry(request, &engine->timeline->requests, link) {
 		if (i915_gem_request_completed(request))
 			continue;
 
@@ -2595,7 +2595,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	if (i915_gem_context_is_default(incomplete_ctx))
 		return;
 
-	list_for_each_entry_continue(request, &engine->request_list, link)
+	list_for_each_entry_continue(request, &engine->timeline->requests, link)
 		if (request->ctx == incomplete_ctx)
 			reset_request(request);
 }
@@ -2633,7 +2633,8 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
 	 * (lockless) lookup doesn't try and wait upon the request as we
 	 * reset it.
 	 */
-	intel_engine_init_seqno(engine, engine->last_submitted_seqno);
+	intel_engine_init_global_seqno(engine,
+				       engine->timeline->last_submitted_seqno);
 
 	/*
 	 * Clear the execlists queue up before freeing the requests, as those
@@ -2928,17 +2929,26 @@ destroy:
 	return 0;
 }
 
-int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
-			   unsigned int flags)
+static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
 {
-	struct intel_engine_cs *engine;
-	int ret;
+	int ret, i;
 
-	for_each_engine(engine, dev_priv) {
-		if (engine->last_context == NULL)
-			continue;
+	for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
+		ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
 
-		ret = intel_engine_idle(engine, flags);
+int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
+{
+	struct i915_gem_timeline *tl;
+	int ret;
+
+	list_for_each_entry(tl, &i915->gt.timelines, link) {
+		ret = wait_for_timeline(tl, flags);
 		if (ret)
 			return ret;
 	}
@@ -4515,12 +4525,6 @@ i915_gem_cleanup_engines(struct drm_device *dev)
 		dev_priv->gt.cleanup_engine(engine);
 }
 
-static void
-init_engine_lists(struct intel_engine_cs *engine)
-{
-	INIT_LIST_HEAD(&engine->request_list);
-}
-
 void
 i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 {
@@ -4553,22 +4557,32 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
 	i915_gem_detect_bit_6_swizzle(dev);
 }
 
-void
+int
 i915_gem_load_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int i;
+	int err;
 
 	dev_priv->objects =
 		kmem_cache_create("i915_gem_object",
 				  sizeof(struct drm_i915_gem_object), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
+	if (!dev_priv->objects) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
 	dev_priv->vmas =
 		kmem_cache_create("i915_gem_vma",
 				  sizeof(struct i915_vma), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
+	if (!dev_priv->vmas) {
+		err = -ENOMEM;
+		goto err_objects;
+	}
+
 	dev_priv->requests =
 		kmem_cache_create("i915_gem_request",
 				  sizeof(struct drm_i915_gem_request), 0,
@@ -4576,6 +4590,19 @@ i915_gem_load_init(struct drm_device *dev)
 				  SLAB_RECLAIM_ACCOUNT |
 				  SLAB_DESTROY_BY_RCU,
 				  NULL);
+	if (!dev_priv->requests) {
+		err = -ENOMEM;
+		goto err_vmas;
+	}
+
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	INIT_LIST_HEAD(&dev_priv->gt.timelines);
+	err = i915_gem_timeline_init(dev_priv,
+				     &dev_priv->gt.global_timeline,
+				     "[execution]");
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+	if (err)
+		goto err_requests;
 
 	INIT_LIST_HEAD(&dev_priv->context_list);
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4583,8 +4610,6 @@ i915_gem_load_init(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		init_engine_lists(&dev_priv->engine[i]);
 	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
 			  i915_gem_retire_work_handler);
 	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4601,6 +4626,17 @@ i915_gem_load_init(struct drm_device *dev)
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
 
 	spin_lock_init(&dev_priv->fb_tracking.lock);
+
+	return 0;
+
+err_requests:
+	kmem_cache_destroy(dev_priv->requests);
+err_vmas:
+	kmem_cache_destroy(dev_priv->vmas);
+err_objects:
+	kmem_cache_destroy(dev_priv->objects);
+err_out:
+	return err;
 }
 
 void i915_gem_load_cleanup(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 8292e797d9b5..735580d72eb1 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -31,4 +31,6 @@
 #define GEM_BUG_ON(expr)
 #endif
 
+#define I915_NUM_ENGINES 5
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e8278b05074b..a360729b2301 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -40,7 +40,7 @@ static const char *i915_fence_get_timeline_name(struct fence *fence)
 	 * multiple execution contexts (fence contexts) as we allow
 	 * engines within a single timeline to execute in parallel.
 	 */
-	return "global";
+	return to_request(fence)->timeline->common->name;
 }
 
 static bool i915_fence_signaled(struct fence *fence)
@@ -211,7 +211,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
 		return;
 
 	do {
-		tmp = list_first_entry(&engine->request_list,
+		tmp = list_first_entry(&engine->timeline->requests,
 				       typeof(*tmp), link);
 
 		i915_gem_request_retire(tmp);
@@ -238,36 +238,38 @@ static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno)
+static int i915_gem_init_global_seqno(struct drm_i915_private *dev_priv,
+				      u32 seqno)
 {
+	struct i915_gem_timeline *timeline = &dev_priv->gt.global_timeline;
 	struct intel_engine_cs *engine;
 	int ret;
 
 	/* Carefully retire all requests without writing to the rings */
-	for_each_engine(engine, dev_priv) {
-		ret = intel_engine_idle(engine,
-					I915_WAIT_INTERRUPTIBLE |
-					I915_WAIT_LOCKED);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_wait_for_idle(dev_priv,
+				     I915_WAIT_INTERRUPTIBLE |
+				     I915_WAIT_LOCKED);
+	if (ret)
+		return ret;
+
 	i915_gem_retire_requests(dev_priv);
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) {
+	if (!i915_seqno_passed(seqno, timeline->next_seqno)) {
 		while (intel_kick_waiters(dev_priv) ||
 		       intel_kick_signalers(dev_priv))
 			yield();
+		yield();
 	}
 
 	/* Finally reset hw state */
 	for_each_engine(engine, dev_priv)
-		intel_engine_init_seqno(engine, seqno);
+		intel_engine_init_global_seqno(engine, seqno);
 
 	return 0;
 }
 
-int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
+int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
@@ -280,28 +282,31 @@ int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 	/* HWS page needs to be set less than what we
 	 * will inject to ring
 	 */
-	ret = i915_gem_init_seqno(dev_priv, seqno - 1);
+	ret = i915_gem_init_global_seqno(dev_priv, seqno - 1);
 	if (ret)
 		return ret;
 
-	dev_priv->next_seqno = seqno;
+	dev_priv->gt.global_timeline.next_seqno = seqno;
 	return 0;
 }
 
-static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
+static int i915_gem_get_global_seqno(struct drm_i915_private *dev_priv,
+				     u32 *seqno)
 {
+	struct i915_gem_timeline *tl = &dev_priv->gt.global_timeline;
+
 	/* reserve 0 for non-seqno */
-	if (unlikely(dev_priv->next_seqno == 0)) {
+	if (unlikely(tl->next_seqno == 0)) {
 		int ret;
 
-		ret = i915_gem_init_seqno(dev_priv, 0);
+		ret = i915_gem_init_global_seqno(dev_priv, 0);
 		if (ret)
 			return ret;
 
-		dev_priv->next_seqno = 1;
+		tl->next_seqno = 1;
 	}
 
-	*seqno = dev_priv->next_seqno++;
+	*seqno = tl->next_seqno++;
 	return 0;
 }
 
@@ -310,13 +315,14 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
 	struct drm_i915_gem_request *request =
 		container_of(fence, typeof(*request), submit);
+	struct intel_engine_cs *engine = request->engine;
 
 	/* Will be called from irq-context when using foreign DMA fences */
 
 	switch (state) {
 	case FENCE_COMPLETE:
-		request->engine->last_submitted_seqno = request->fence.seqno;
-		request->engine->submit_request(request);
+		engine->timeline->last_submitted_seqno = request->fence.seqno;
+		engine->submit_request(request);
 		break;
 
 	case FENCE_FREE:
@@ -356,7 +362,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		return ERR_PTR(ret);
 
 	/* Move the oldest request to the slab-cache (if not in use!) */
-	req = list_first_entry_or_null(&engine->request_list,
+	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
 	if (req && i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
@@ -393,15 +399,17 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 
-	ret = i915_gem_get_seqno(dev_priv, &seqno);
+	ret = i915_gem_get_global_seqno(dev_priv, &seqno);
 	if (ret)
 		goto err;
 
+	req->timeline = engine->timeline;
+
 	spin_lock_init(&req->lock);
 	fence_init(&req->fence,
 		   &i915_fence_ops,
 		   &req->lock,
-		   engine->fence_context,
+		   req->timeline->fence_context,
 		   seqno);
 
 	i915_sw_fence_init(&req->submit, submit_notify);
@@ -456,9 +464,16 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 
 	GEM_BUG_ON(to == from);
 
-	if (to->engine == from->engine)
+	if (to->timeline == from->timeline)
 		return 0;
 
+	if (to->engine == from->engine) {
+		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
+						       &from->submit,
+						       GFP_KERNEL);
+		return ret < 0 ? ret : 0;
+	}
+
 	idx = intel_engine_sync_index(from->engine, to->engine);
 	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
 		return 0;
@@ -613,6 +628,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_ring *ring = request->ring;
+	struct intel_timeline *timeline = request->timeline;
 	struct drm_i915_gem_request *prev;
 	u32 request_start;
 	u32 reserved_tail;
@@ -670,17 +686,17 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * see a more recent value in the hws than we are tracking.
 	 */
 
-	prev = i915_gem_active_raw(&engine->last_request,
+	prev = i915_gem_active_raw(&timeline->last_request,
 				   &request->i915->drm.struct_mutex);
 	if (prev)
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 
 	request->emitted_jiffies = jiffies;
-	request->previous_seqno = engine->last_pending_seqno;
-	engine->last_pending_seqno = request->fence.seqno;
-	i915_gem_active_set(&engine->last_request, request);
-	list_add_tail(&request->link, &engine->request_list);
+	request->previous_seqno = timeline->last_pending_seqno;
+	timeline->last_pending_seqno = request->fence.seqno;
+	i915_gem_active_set(&timeline->last_request, request);
+	list_add_tail(&request->link, &timeline->requests);
 	list_add_tail(&request->ring_link, &ring->request_list);
 
 	i915_gem_mark_busy(engine);
@@ -887,7 +903,8 @@ static bool engine_retire_requests(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request, *next;
 
-	list_for_each_entry_safe(request, next, &engine->request_list, link) {
+	list_for_each_entry_safe(request, next,
+				 &engine->timeline->requests, link) {
 		if (!i915_gem_request_completed(request))
 			return false;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index b8feff5857a0..378f523844bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -81,6 +81,7 @@ struct drm_i915_gem_request {
 	struct i915_gem_context *ctx;
 	struct intel_engine_cs *engine;
 	struct intel_ring *ring;
+	struct intel_timeline *timeline;
 	struct intel_signal_node signaling;
 
 	struct i915_sw_fence submit;
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
new file mode 100644
index 000000000000..a4579c109066
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+
+int i915_gem_timeline_init(struct drm_i915_private *i915,
+			   struct i915_gem_timeline *timeline,
+			   const char *name)
+{
+	unsigned int i;
+	u64 fences;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	timeline->i915 = i915;
+	timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
+	if (!timeline->name)
+		return -ENOMEM;
+
+	list_add(&timeline->link, &i915->gt.timelines);
+
+	/* Called during early_init before we know how many engines there are */
+	fences = fence_context_alloc(ARRAY_SIZE(timeline->engine));
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
+		struct intel_timeline *tl = &timeline->engine[i];
+
+		tl->fence_context = fences++;
+		tl->common = timeline;
+
+		init_request_active(&tl->last_request, NULL);
+		INIT_LIST_HEAD(&tl->requests);
+	}
+
+	return 0;
+}
+
+void i915_gem_timeline_fini(struct i915_gem_timeline *tl)
+{
+	lockdep_assert_held(&tl->i915->drm.struct_mutex);
+
+	list_del(&tl->link);
+	kfree(tl->name);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
new file mode 100644
index 000000000000..bfdf0331cc50
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_GEM_TIMELINE_H
+#define I915_GEM_TIMELINE_H
+
+#include <linux/list.h>
+
+#include "i915_gem_request.h"
+
+struct i915_gem_timeline;
+
+struct intel_timeline {
+	u64 fence_context;
+	u32 last_submitted_seqno;
+	u32 last_pending_seqno;
+
+	/**
+	 * List of breadcrumbs associated with GPU requests currently
+	 * outstanding.
+	 */
+	struct list_head requests;
+
+	/* Contains an RCU guarded pointer to the last request. No reference is
+	 * held to the request, users must carefully acquire a reference to
+	 * the request using i915_gem_active_get_request_rcu(), or hold the
+	 * struct_mutex.
+	 */
+	struct i915_gem_active last_request;
+
+	struct i915_gem_timeline *common;
+};
+
+struct i915_gem_timeline {
+	struct list_head link;
+	u32 next_seqno;
+
+	struct drm_i915_private *i915;
+	const char *name;
+
+	struct intel_timeline engine[I915_NUM_ENGINES];
+};
+
+int i915_gem_timeline_init(struct drm_i915_private *i915,
+			   struct i915_gem_timeline *tl,
+			   const char *name);
+void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f4ad4a3088df..d5d38c8361c2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1095,7 +1095,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
 	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
 	ee->acthd = intel_engine_get_active_head(engine);
 	ee->seqno = intel_engine_get_seqno(engine);
-	ee->last_seqno = engine->last_submitted_seqno;
+	ee->last_seqno = engine->timeline->last_submitted_seqno;
 	ee->start = I915_READ_START(engine);
 	ee->head = I915_READ_HEAD(engine);
 	ee->tail = I915_READ_TAIL(engine);
@@ -1166,7 +1166,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->request_list, link)
+	list_for_each_entry_from(request, &engine->timeline->requests, link)
 		count++;
 	if (!count)
 		return;
@@ -1179,7 +1179,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->request_list, link) {
+	list_for_each_entry_from(request, &engine->timeline->requests, link) {
 		struct drm_i915_error_request *erq;
 
 		if (count >= ee->num_requests) {
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 3106dcc06fe9..0a9a35d9401d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -1037,7 +1037,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 		engine->submit_request = i915_guc_submit;
 
 		/* Replay the current set of previously submitted requests */
-		list_for_each_entry(request, &engine->request_list, link) {
+		list_for_each_entry(request,
+				    &engine->timeline->requests, link) {
 			client->wq_rsvd += sizeof(struct guc_wq_item);
 			if (i915_sw_fence_done(&request->submit))
 				i915_guc_submit(request);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bd6c8b0eeaef..b0d1aa6fac27 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3140,7 +3140,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 		acthd = intel_engine_get_active_head(engine);
 		seqno = intel_engine_get_seqno(engine);
-		submit = READ_ONCE(engine->last_submitted_seqno);
+		submit = READ_ONCE(engine->timeline->last_submitted_seqno);
 
 		if (engine->hangcheck.seqno == seqno) {
 			if (i915_seqno_passed(seqno, submit)) {
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index b48c51964cb3..276f3f38ab41 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -161,7 +161,7 @@ cleanup:
 	return ret;
 }
 
-void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
+void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
@@ -197,7 +197,9 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 	if (engine->irq_seqno_barrier)
 		engine->irq_seqno_barrier(engine);
-	engine->last_submitted_seqno = seqno;
+
+	GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
+	engine->timeline->last_submitted_seqno = seqno;
 
 	engine->hangcheck.seqno = seqno;
 
@@ -212,10 +214,9 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
 }
 
-static void intel_engine_init_requests(struct intel_engine_cs *engine)
+static void intel_engine_init_timeline(struct intel_engine_cs *engine)
 {
-	init_request_active(&engine->last_request, NULL);
-	INIT_LIST_HEAD(&engine->request_list);
+	engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
 }
 
 /**
@@ -232,9 +233,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
 	INIT_LIST_HEAD(&engine->execlist_queue);
 	spin_lock_init(&engine->execlist_lock);
 
-	engine->fence_context = fence_context_alloc(1);
-
-	intel_engine_init_requests(engine);
+	intel_engine_init_timeline(engine);
 	intel_engine_init_hangcheck(engine);
 	i915_gem_batch_pool_init(engine, &engine->batch_pool);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4289fcda4b3f..7703044d23db 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -4,6 +4,7 @@
 #include <linux/hashtable.h>
 #include "i915_gem_batch_pool.h"
 #include "i915_gem_request.h"
+#include "i915_gem_timeline.h"
 
 #define I915_CMD_HASH_ORDER 9
 
@@ -169,7 +170,6 @@ struct intel_engine_cs {
 		VCS2,	/* Keep instances of the same type engine together. */
 		VECS
 	} id;
-#define I915_NUM_ENGINES 5
 #define _VCS(n) (VCS + (n))
 	unsigned int exec_id;
 	enum intel_engine_hw_id {
@@ -180,10 +180,10 @@ struct intel_engine_cs {
 		VCS2_HW
 	} hw_id;
 	enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */
-	u64 fence_context;
 	u32		mmio_base;
 	unsigned int irq_shift;
 	struct intel_ring *buffer;
+	struct intel_timeline *timeline;
 
 	struct intel_render_state *render_state;
 
@@ -346,27 +346,6 @@ struct intel_engine_cs {
 	bool preempt_wa;
 	u32 ctx_desc_template;
 
-	/**
-	 * List of breadcrumbs associated with GPU requests currently
-	 * outstanding.
-	 */
-	struct list_head request_list;
-
-	/**
-	 * Seqno of request most recently submitted to request_list.
-	 * Used exclusively by hang checker to avoid grabbing lock while
-	 * inspecting request list.
-	 */
-	u32 last_submitted_seqno;
-	u32 last_pending_seqno;
-
-	/* An RCU guarded pointer to the last request. No reference is
-	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_gem_active_get_rcu(), or hold the
-	 * struct_mutex.
-	 */
-	struct i915_gem_active last_request;
-
 	struct i915_gem_context *last_context;
 
 	struct intel_engine_hangcheck hangcheck;
@@ -522,20 +501,13 @@ static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
 int __intel_ring_space(int head, int tail, int size);
 void intel_ring_update_space(struct intel_ring *ring);
 
-void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
+void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
 void intel_engine_setup_common(struct intel_engine_cs *engine);
 int intel_engine_init_common(struct intel_engine_cs *engine);
 int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-static inline int intel_engine_idle(struct intel_engine_cs *engine,
-				    unsigned int flags)
-{
-	/* Wait upon the last request to be completed */
-	return i915_gem_active_wait(&engine->last_request, flags);
-}
-
 int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
@@ -622,7 +594,7 @@ unsigned int intel_kick_signalers(struct drm_i915_private *i915);
 
 static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
 {
-	return i915_gem_active_isset(&engine->last_request);
+	return i915_gem_active_isset(&engine->timeline->last_request);
 }
 
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 30/42] drm/i915: Queue the idling context switch after all other timelines
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (28 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 29/42] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 31/42] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
                   ` (14 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Before suspend, we wait for the switch to the kernel context. In order
for all the other context images to be complete upon suspend, that
switch must be the last operation by the GPU (i.e. this idling request
must not overtake any pending requests). To make this request execute last,
we make it depend on every other inflight request.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c         |  9 +++++++++
 drivers/gpu/drm/i915/i915_gem_context.c | 23 +++++++++++++++++------
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 798a087cb5d4..2887a318b9c0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4256,6 +4256,14 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 		i915_gem_object_put(obj);
 }
 
+static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, dev_priv)
+		GEM_BUG_ON(engine->last_context != dev_priv->kernel_context);
+}
+
 int i915_gem_suspend(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -4285,6 +4293,7 @@ int i915_gem_suspend(struct drm_device *dev)
 
 	i915_gem_retire_requests(dev_priv);
 
+	assert_kernel_context_is_current(dev_priv);
 	i915_gem_context_lost(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 1d2ab73a8f43..449cf45d94eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -929,21 +929,32 @@ int i915_switch_context(struct drm_i915_gem_request *req)
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
+	struct i915_gem_timeline *timeline;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	for_each_engine(engine, dev_priv) {
 		struct drm_i915_gem_request *req;
 		int ret;
 
-		if (engine->last_context == NULL)
-			continue;
-
-		if (engine->last_context == dev_priv->kernel_context)
-			continue;
-
 		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
 		if (IS_ERR(req))
 			return PTR_ERR(req);
 
+		/* Queue this switch after all other activity */
+		list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
+			struct drm_i915_gem_request *prev;
+			struct intel_timeline *tl;
+
+			tl = &timeline->engine[engine->id];
+			prev = i915_gem_active_raw(&tl->last_request,
+						   &dev_priv->drm.struct_mutex);
+			if (prev)
+				i915_sw_fence_await_sw_fence_gfp(&req->submit,
+								 &prev->submit,
+								 GFP_KERNEL);
+		}
+
 		ret = i915_switch_context(req);
 		i915_add_request_no_flush(req);
 		if (ret)
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 31/42] drm/i915: Wait first for submission, before waiting for request completion
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (29 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 30/42] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 32/42] drm/i915: Introduce a global_seqno for each request Chris Wilson
                   ` (13 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

In future patches, we will no longer be able to wait on a static global
seqno and instead have to break our wait up into phases. First we wait
for the global seqno assignment (upon submission to hardware), and once
submitted we wait for the hardware to complete.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 51 +++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a360729b2301..95748e0a27af 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -780,6 +780,49 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	return false;
 }
 
+static long
+__i915_request_wait_for_submit(struct drm_i915_gem_request *request,
+			       unsigned int flags,
+			       long timeout)
+{
+	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
+		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+	wait_queue_head_t *q = &request->i915->gpu_error.wait_queue;
+	DEFINE_WAIT(reset);
+	DEFINE_WAIT(wait);
+
+	if (flags & I915_WAIT_LOCKED)
+		add_wait_queue(q, &reset);
+
+	do {
+		prepare_to_wait(&request->submit.wait, &wait, state);
+
+		if (i915_sw_fence_done(&request->submit))
+			break;
+
+		if (flags & I915_WAIT_LOCKED &&
+		    i915_reset_in_progress(&request->i915->gpu_error)) {
+			__set_current_state(TASK_RUNNING);
+			i915_reset(request->i915);
+			reset_wait_queue(q, &reset);
+			continue;
+		}
+
+		if (signal_pending_state(state, current)) {
+			timeout = -ERESTARTSYS;
+			break;
+		}
+
+		timeout = io_schedule_timeout(timeout);
+	} while (timeout);
+	finish_wait(&request->submit.wait, &wait);
+
+	if (flags & I915_WAIT_LOCKED)
+		remove_wait_queue(q, &reset);
+
+	return timeout;
+}
+
 /**
  * i915_wait_request - wait until execution of request has finished
  * @req: duh!
@@ -821,6 +864,14 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 	trace_i915_gem_request_wait_begin(req);
 
+	if (!i915_sw_fence_done(&req->submit)) {
+		timeout = __i915_request_wait_for_submit(req, flags, timeout);
+		if (timeout < 0)
+			goto complete;
+
+		GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
+	}
+
 	/* Optimistic short spin before touching IRQs */
 	if (i915_spin_request(req, state, 5))
 		goto complete;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 32/42] drm/i915: Introduce a global_seqno for each request
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (30 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 31/42] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 33/42] drm/i915: Rename ->emit_request to ->emit_breadcrumb Chris Wilson
                   ` (12 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Though we will have multiple timelines, we still have a single timeline
of execution. This we can use to provide an execution and retirement order
of requests. This keeps tracking execution of requests simple, and vital
for preserving a single waiter (i.e. so that we can order the waiters so
that only the earliest to wakeup need be woken). To accomplish this we
distinguish the seqno used to order requests per-context (external) and
that used internally for execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
 drivers/gpu/drm/i915/i915_drv.h            |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c            |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c    | 19 +++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_request.h    | 32 +++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gpu_error.c      |  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |  4 ++--
 drivers/gpu/drm/i915/i915_trace.h          |  8 ++++----
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |  8 +++++---
 drivers/gpu/drm/i915/intel_lrc.c           |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 14 ++++++-------
 11 files changed, 66 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4e452da96c39..77491eb3340f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -644,7 +644,7 @@ static void print_request(struct seq_file *m,
 	rcu_read_lock();
 	task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
 	seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix,
-		   rq->fence.seqno, rq->ctx->hw_id, rq->fence.seqno,
+		   rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
 		   task ? task->comm : "<unknown>",
 		   task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 582996ebebe5..78b54fb4b56b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3991,7 +3991,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
 	/* Before we do the heavier coherent read of the seqno,
 	 * check the value (hopefully) in the CPU cacheline.
 	 */
-	if (i915_gem_request_completed(req))
+	if (__i915_gem_request_completed(req))
 		return true;
 
 	/* Ensure our read of the seqno is coherent so that we
@@ -4042,7 +4042,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req)
 			wake_up_process(tsk);
 		rcu_read_unlock();
 
-		if (i915_gem_request_completed(req))
+		if (__i915_gem_request_completed(req))
 			return true;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2887a318b9c0..f9a70971cb24 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2578,7 +2578,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 		return;
 
 	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-			 engine->name, request->fence.seqno);
+			 engine->name, request->global_seqno);
 
 	/* Setup the CS to resume from the breadcrumb of the hung request */
 	engine->reset_hw(engine, request);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 95748e0a27af..b29dddc35d34 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -375,7 +375,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * of being read by __i915_gem_active_get_rcu(). As such,
 	 * we have to be very careful when overwriting the contents. During
 	 * the RCU lookup, we change chase the request->engine pointer,
-	 * read the request->fence.seqno and increment the reference count.
+	 * read the request->global_seqno and increment the reference count.
 	 *
 	 * The reference count is incremented atomically. If it is zero,
 	 * the lookup knows the request is unallocated and complete. Otherwise,
@@ -417,6 +417,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	INIT_LIST_HEAD(&req->active_list);
 	req->i915 = dev_priv;
 	req->engine = engine;
+	req->global_seqno = seqno;
 	req->ctx = i915_gem_context_get(ctx);
 
 	/* No zalloc, must clear what we need by hand */
@@ -474,8 +475,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 		return ret < 0 ? ret : 0;
 	}
 
+	if (!from->global_seqno) {
+		ret = i915_sw_fence_await_dma_fence(&to->submit,
+						    &from->fence, 0,
+						    GFP_KERNEL);
+		return ret < 0 ? ret : 0;
+	}
+
 	idx = intel_engine_sync_index(from->engine, to->engine);
-	if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx])
+	if (from->global_seqno <= from->engine->semaphore.sync_seqno[idx])
 		return 0;
 
 	trace_i915_gem_ring_sync_to(to, from);
@@ -493,7 +501,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}
 
-	from->engine->semaphore.sync_seqno[idx] = from->fence.seqno;
+	from->engine->semaphore.sync_seqno[idx] = from->global_seqno;
 	return 0;
 }
 
@@ -765,7 +773,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 
 	timeout_us += local_clock_us(&cpu);
 	do {
-		if (i915_gem_request_completed(req))
+		if (__i915_gem_request_completed(req))
 			return true;
 
 		if (signal_pending_state(state, current))
@@ -871,6 +879,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 
 		GEM_BUG_ON(!i915_sw_fence_done(&req->submit));
 	}
+	GEM_BUG_ON(!req->global_seqno);
 
 	/* Optimistic short spin before touching IRQs */
 	if (i915_spin_request(req, state, 5))
@@ -880,7 +889,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
 	if (flags & I915_WAIT_LOCKED)
 		add_wait_queue(&req->i915->gpu_error.wait_queue, &reset);
 
-	intel_wait_init(&wait, req->fence.seqno);
+	intel_wait_init(&wait, req->global_seqno);
 	if (intel_engine_add_wait(req->engine, &wait))
 		/* In order to check that we haven't missed the interrupt
 		 * as we enabled it, we need to kick ourselves to do a
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 378f523844bb..d1439f39420d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -87,6 +87,8 @@ struct drm_i915_gem_request {
 	struct i915_sw_fence submit;
 	wait_queue_t submitq;
 
+	u32 global_seqno;
+
 	/** GEM sequence number associated with the previous request,
 	 * when the HWS breadcrumb is equal to this the GPU is processing
 	 * this request.
@@ -163,7 +165,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req);
 static inline u32
 i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
 {
-	return req ? req->fence.seqno : 0;
+	return req ? req->global_seqno : 0;
 }
 
 static inline struct intel_engine_cs *
@@ -248,17 +250,37 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
 }
 
 static inline bool
-i915_gem_request_started(const struct drm_i915_gem_request *req)
+__i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
+	GEM_BUG_ON(!req->global_seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
 				 req->previous_seqno);
 }
 
 static inline bool
-i915_gem_request_completed(const struct drm_i915_gem_request *req)
+i915_gem_request_started(const struct drm_i915_gem_request *req)
 {
+	if (!req->global_seqno)
+		return false;
+
+	return __i915_gem_request_started(req);
+}
+
+static inline bool
+__i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+	GEM_BUG_ON(!req->global_seqno);
 	return i915_seqno_passed(intel_engine_get_seqno(req->engine),
-				 req->fence.seqno);
+				 req->global_seqno);
+}
+
+static inline bool
+i915_gem_request_completed(const struct drm_i915_gem_request *req)
+{
+	if (!req->global_seqno)
+		return false;
+
+	return __i915_gem_request_completed(req);
 }
 
 bool __i915_spin_request(const struct drm_i915_gem_request *request,
@@ -266,7 +288,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *request,
 static inline bool i915_spin_request(const struct drm_i915_gem_request *request,
 				     int state, unsigned long timeout_us)
 {
-	return (i915_gem_request_started(request) &&
+	return (__i915_gem_request_started(request) &&
 		__i915_spin_request(request, state, timeout_us));
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index d5d38c8361c2..b6bf6fafd849 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1202,7 +1202,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 		}
 
 		erq = &ee->requests[count++];
-		erq->seqno = request->fence.seqno;
+		erq->seqno = request->global_seqno;
 		erq->jiffies = request->emitted_jiffies;
 		erq->head = request->head;
 		erq->tail = request->tail;
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 0a9a35d9401d..bc9ed266f4f1 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -521,7 +521,7 @@ static void guc_wq_item_append(struct i915_guc_client *gc,
 	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 
 	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
-	wqi->fence_id = rq->fence.seqno;
+	wqi->fence_id = rq->global_seqno;
 
 	kunmap_atomic(base);
 }
@@ -616,7 +616,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
 		client->b_fail += 1;
 
 	guc->submissions[engine_id] += 1;
-	guc->last_seqno[engine_id] = rq->fence.seqno;
+	guc->last_seqno[engine_id] = rq->global_seqno;
 	spin_unlock(&client->wq_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 178798002a73..4c46f7c00323 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -466,7 +466,7 @@ TRACE_EVENT(i915_gem_ring_sync_to,
 			   __entry->dev = from->i915->drm.primary->index;
 			   __entry->sync_from = from->engine->id;
 			   __entry->sync_to = to->engine->id;
-			   __entry->seqno = from->fence.seqno;
+			   __entry->seqno = from->global_seqno;
 			   ),
 
 	    TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
@@ -489,7 +489,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->fence.seqno;
+			   __entry->seqno = req->global_seqno;
 			   __entry->flags = flags;
 			   fence_enable_sw_signaling(&req->fence);
 			   ),
@@ -534,7 +534,7 @@ DECLARE_EVENT_CLASS(i915_gem_request,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->fence.seqno;
+			   __entry->seqno = req->global_seqno;
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u",
@@ -596,7 +596,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
 			   __entry->ring = req->engine->id;
-			   __entry->seqno = req->fence.seqno;
+			   __entry->seqno = req->global_seqno;
 			   __entry->blocking =
 				     mutex_is_locked(&req->i915->drm.struct_mutex);
 			   ),
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 495611b7068d..5fd82b17a706 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -504,9 +504,11 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 
 	/* locked by fence_enable_sw_signaling() */
 	assert_spin_locked(&request->lock);
+	if (!request->global_seqno)
+		return;
 
 	request->signaling.wait.tsk = b->signaler;
-	request->signaling.wait.seqno = request->fence.seqno;
+	request->signaling.wait.seqno = request->global_seqno;
 	i915_gem_request_get(request);
 
 	spin_lock(&b->lock);
@@ -530,8 +532,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	p = &b->signals.rb_node;
 	while (*p) {
 		parent = *p;
-		if (i915_seqno_passed(request->fence.seqno,
-				      to_signaler(parent)->fence.seqno)) {
+		if (i915_seqno_passed(request->global_seqno,
+				      to_signaler(parent)->global_seqno)) {
 			p = &parent->rb_right;
 			first = false;
 		} else {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6acecfc41f5e..fb0e020fdfeb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1584,7 +1584,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
 			intel_hws_seqno_address(request->engine) |
 			MI_FLUSH_DW_USE_GTT);
 	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, request->fence.seqno);
+	intel_ring_emit(ring, request->global_seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_emit(ring, MI_NOOP);
 	return intel_logical_ring_advance(request);
@@ -1613,7 +1613,7 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request)
 			 PIPE_CONTROL_QW_WRITE));
 	intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
 	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, i915_gem_request_get_seqno(request));
+	intel_ring_emit(ring, request->global_seqno);
 	/* We're thrashing one dword of HWS. */
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ec06d75a42aa..437eba266794 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1240,7 +1240,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req)
 				PIPE_CONTROL_CS_STALL);
 		intel_ring_emit(ring, lower_32_bits(gtt_offset));
 		intel_ring_emit(ring, upper_32_bits(gtt_offset));
-		intel_ring_emit(ring, req->fence.seqno);
+		intel_ring_emit(ring, req->global_seqno);
 		intel_ring_emit(ring, 0);
 		intel_ring_emit(ring,
 				MI_SEMAPHORE_SIGNAL |
@@ -1276,7 +1276,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req)
 				lower_32_bits(gtt_offset) |
 				MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(ring, upper_32_bits(gtt_offset));
-		intel_ring_emit(ring, req->fence.seqno);
+		intel_ring_emit(ring, req->global_seqno);
 		intel_ring_emit(ring,
 				MI_SEMAPHORE_SIGNAL |
 				MI_SEMAPHORE_TARGET(waiter->hw_id));
@@ -1309,7 +1309,7 @@ static int gen6_signal(struct drm_i915_gem_request *req)
 		if (i915_mmio_reg_valid(mbox_reg)) {
 			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit_reg(ring, mbox_reg);
-			intel_ring_emit(ring, req->fence.seqno);
+			intel_ring_emit(ring, req->global_seqno);
 		}
 	}
 
@@ -1340,7 +1340,7 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, req->fence.seqno);
+	intel_ring_emit(ring, req->global_seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_advance(ring);
 
@@ -1390,7 +1390,7 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req)
 			       PIPE_CONTROL_QW_WRITE));
 	intel_ring_emit(ring, intel_hws_seqno_address(engine));
 	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
+	intel_ring_emit(ring, req->global_seqno);
 	/* We're thrashing one dword of HWS. */
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
@@ -1428,7 +1428,7 @@ gen8_ring_sync_to(struct drm_i915_gem_request *req,
 			MI_SEMAPHORE_WAIT |
 			MI_SEMAPHORE_GLOBAL_GTT |
 			MI_SEMAPHORE_SAD_GTE_SDD);
-	intel_ring_emit(ring, signal->fence.seqno);
+	intel_ring_emit(ring, signal->global_seqno);
 	intel_ring_emit(ring, lower_32_bits(offset));
 	intel_ring_emit(ring, upper_32_bits(offset));
 	intel_ring_advance(ring);
@@ -1466,7 +1466,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req,
 	 * seqno is >= the last seqno executed. However for hardware the
 	 * comparison is strictly greater than.
 	 */
-	intel_ring_emit(ring, signal->fence.seqno - 1);
+	intel_ring_emit(ring, signal->global_seqno - 1);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 33/42] drm/i915: Rename ->emit_request to ->emit_breadcrumb
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (31 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 32/42] drm/i915: Introduce a global_seqno for each request Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 34/42] drm/i915: Record space required for breadcrumb emission Chris Wilson
                   ` (11 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Now that the emission of the request tail and its submission to hardware
are two separate steps, engine->emit_request() is confusing.
engine->emit_request() is called to emit the breadcrumb commands for the
request into the ring, name it such (engine->emit_breadcrumb).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c        | 10 +++++-----
 drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++++++++--------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index b29dddc35d34..bb8349209f61 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -676,8 +676,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	request->postfix = ring->tail;
 
 	/* Not allowed to fail! */
-	ret = engine->emit_request(request);
-	WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret);
+	ret = engine->emit_breadcrumb(request);
+	WARN(ret, "(%s)->emit_breadcrumb failed: %d!\n", engine->name, ret);
 
 	/* Sanity check that the reserved size was large enough. */
 	ret = ring->tail - request_start;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fb0e020fdfeb..2ec7569ea887 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -440,7 +440,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
 		 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
-		 * as we resubmit the request. See gen8_emit_request()
+		 * as we resubmit the request. See gen8_emit_breadcrumb()
 		 * for where we prepare the padding after the end of the
 		 * request.
 		 */
@@ -1567,7 +1567,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
 
-static int gen8_emit_request(struct drm_i915_gem_request *request)
+static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request)
 {
 	struct intel_ring *ring = request->ring;
 	int ret;
@@ -1590,7 +1590,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request)
 	return intel_logical_ring_advance(request);
 }
 
-static int gen8_emit_request_render(struct drm_i915_gem_request *request)
+static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
 {
 	struct intel_ring *ring = request->ring;
 	int ret;
@@ -1694,7 +1694,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->init_hw = gen8_init_common_ring;
 	engine->reset_hw = reset_common_ring;
 	engine->emit_flush = gen8_emit_flush;
-	engine->emit_request = gen8_emit_request;
+	engine->emit_breadcrumb = gen8_emit_breadcrumb;
 	engine->submit_request = execlists_submit_request;
 
 	engine->irq_enable = gen8_logical_ring_enable_irq;
@@ -1816,7 +1816,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 		engine->init_hw = gen8_init_render_ring;
 	engine->init_context = gen8_init_rcs_context;
 	engine->emit_flush = gen8_emit_flush_render;
-	engine->emit_request = gen8_emit_request_render;
+	engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
 
 	ret = intel_engine_create_scratch(engine, 4096);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 437eba266794..e0376e07171f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1329,7 +1329,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 			intel_ring_offset(request->ring, request->tail));
 }
 
-static int i9xx_emit_request(struct drm_i915_gem_request *req)
+static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req)
 {
 	struct intel_ring *ring = req->ring;
 	int ret;
@@ -1350,14 +1350,14 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req)
 }
 
 /**
- * gen6_sema_emit_request - Update the semaphore mailbox registers
+ * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers
  *
  * @request - request to write to the ring
  *
  * Update the mailbox registers in the *other* rings with the current seqno.
  * This acts like a signal in the canonical semaphore.
  */
-static int gen6_sema_emit_request(struct drm_i915_gem_request *req)
+static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req)
 {
 	int ret;
 
@@ -1365,10 +1365,10 @@ static int gen6_sema_emit_request(struct drm_i915_gem_request *req)
 	if (ret)
 		return ret;
 
-	return i9xx_emit_request(req);
+	return i9xx_emit_breadcrumb(req);
 }
 
-static int gen8_render_emit_request(struct drm_i915_gem_request *req)
+static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req)
 {
 	struct intel_engine_cs *engine = req->engine;
 	struct intel_ring *ring = req->ring;
@@ -2638,9 +2638,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->init_hw = init_ring_common;
 	engine->reset_hw = reset_ring_common;
 
-	engine->emit_request = i9xx_emit_request;
+	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
 	if (i915.semaphores)
-		engine->emit_request = gen6_sema_emit_request;
+		engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
 	engine->submit_request = i9xx_submit_request;
 
 	if (INTEL_GEN(dev_priv) >= 8)
@@ -2667,7 +2667,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 
 	if (INTEL_GEN(dev_priv) >= 8) {
 		engine->init_context = intel_rcs_ctx_init;
-		engine->emit_request = gen8_render_emit_request;
+		engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
 		engine->emit_flush = gen8_render_ring_flush;
 		if (i915.semaphores)
 			engine->semaphore.signal = gen8_rcs_signal;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7703044d23db..15e9b1e51dd0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -255,7 +255,7 @@ struct intel_engine_cs {
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
-	int		(*emit_request)(struct drm_i915_gem_request *req);
+	int		(*emit_breadcrumb)(struct drm_i915_gem_request *req);
 
 	/* Pass the request to the hardware queue (e.g. directly into
 	 * the legacy ringbuffer or to the end of an execlist).
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 34/42] drm/i915: Record space required for breadcrumb emission
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (32 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 33/42] drm/i915: Rename ->emit_request to ->emit_breadcrumb Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 35/42] drm/i915: Defer " Chris Wilson
                   ` (10 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

In the next patch, we will use deferred breadcrumb emission. That requires
reserving sufficient space in the ringbuffer to emit the breadcrumb, which
first requires us to know how large the breadcrumb is.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c |  1 +
 drivers/gpu/drm/i915/intel_lrc.c        |  6 ++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 29 +++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bb8349209f61..d7f27fb450a0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -433,6 +433,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * away, e.g. because a GPU scheduler has deferred it.
 	 */
 	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
+	GEM_BUG_ON(req->reserved_space < engine->emit_request_sz);
 
 	if (i915.enable_execlists)
 		ret = intel_logical_ring_alloc_request_extras(req);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2ec7569ea887..2529fb39aa14 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1590,6 +1590,8 @@ static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request)
 	return intel_logical_ring_advance(request);
 }
 
+static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
+
 static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
 {
 	struct intel_ring *ring = request->ring;
@@ -1621,6 +1623,8 @@ static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
 	return intel_logical_ring_advance(request);
 }
 
+static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
+
 static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
 {
 	int ret;
@@ -1695,6 +1699,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 	engine->reset_hw = reset_common_ring;
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb;
+	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
 	engine->submit_request = execlists_submit_request;
 
 	engine->irq_enable = gen8_logical_ring_enable_irq;
@@ -1817,6 +1822,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	engine->init_context = gen8_init_rcs_context;
 	engine->emit_flush = gen8_emit_flush_render;
 	engine->emit_breadcrumb = gen8_emit_breadcrumb_render;
+	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz;
 
 	ret = intel_engine_create_scratch(engine, 4096);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e0376e07171f..9d5c1f4656fd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1349,6 +1349,8 @@ static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req)
 	return 0;
 }
 
+static const int i9xx_emit_breadcrumb_sz = 4;
+
 /**
  * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers
  *
@@ -1402,6 +1404,8 @@ static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req)
 	return 0;
 }
 
+static const int gen8_render_emit_breadcrumb_sz = 8;
+
 /**
  * intel_ring_sync - sync the waiter to the signaller on seqno
  *
@@ -2639,8 +2643,21 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	engine->reset_hw = reset_ring_common;
 
 	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
-	if (i915.semaphores)
+	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
+	if (i915.semaphores) {
+		int num_rings;
+
 		engine->emit_breadcrumb = gen6_sema_emit_breadcrumb;
+
+		num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
+		if (INTEL_GEN(dev_priv) >= 8) {
+			engine->emit_breadcrumb_sz += num_rings * 6;
+		} else {
+			engine->emit_breadcrumb_sz += num_rings * 3;
+			if (num_rings & 1)
+				engine->emit_breadcrumb_sz++;
+		}
+	}
 	engine->submit_request = i9xx_submit_request;
 
 	if (INTEL_GEN(dev_priv) >= 8)
@@ -2668,9 +2685,17 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 	if (INTEL_GEN(dev_priv) >= 8) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_breadcrumb = gen8_render_emit_breadcrumb;
+		engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz;
 		engine->emit_flush = gen8_render_ring_flush;
-		if (i915.semaphores)
+		if (i915.semaphores) {
+			int num_rings;
+
 			engine->semaphore.signal = gen8_rcs_signal;
+
+			num_rings =
+				hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
+			engine->emit_breadcrumb_sz += num_rings * 6;
+		}
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 15e9b1e51dd0..704cd184ae1b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -256,6 +256,7 @@ struct intel_engine_cs {
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
 	int		(*emit_breadcrumb)(struct drm_i915_gem_request *req);
+	int		emit_breadcrumb_sz;
 
 	/* Pass the request to the hardware queue (e.g. directly into
 	 * the legacy ringbuffer or to the end of an execlist).
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 35/42] drm/i915: Defer breadcrumb emission
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (33 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 34/42] drm/i915: Record space required for breadcrumb emission Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 36/42] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
                   ` (9 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Move the actual emission of the breadcrumb for closing the request from
i915_add_request() to the submit callback. (It can be moved later when
required.) This allows us to defer the allocation of the global_seqno
from request construction to actual submission, allowing us to emit the
requests out of order (wrt to the order of their construction, they
still will only be executed one all of their dependencies are resolved
including that all earlier requests on their timeline have been
submitted.) We have to specialise how we then emit the request in order
to write into the preallocated space, rather than at the tail of the
ringbuffer (which will have been advanced by the addition of new
requests).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c |  43 +++-----
 drivers/gpu/drm/i915/intel_lrc.c        | 120 ++++++++---------------
 drivers/gpu/drm/i915/intel_ringbuffer.c | 169 +++++++++++---------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  10 +-
 4 files changed, 119 insertions(+), 223 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index d7f27fb450a0..0b8a26803036 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -317,17 +317,16 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 		container_of(fence, typeof(*request), submit);
 	struct intel_engine_cs *engine = request->engine;
 
+	if (state != FENCE_COMPLETE)
+		return NOTIFY_DONE;
+
 	/* Will be called from irq-context when using foreign DMA fences */
 
-	switch (state) {
-	case FENCE_COMPLETE:
-		engine->timeline->last_submitted_seqno = request->fence.seqno;
-		engine->submit_request(request);
-		break;
+	engine->timeline->last_submitted_seqno = request->fence.seqno;
 
-	case FENCE_FREE:
-		break;
-	}
+	engine->emit_breadcrumb(request,
+				request->ring->vaddr + request->postfix);
+	engine->submit_request(request);
 
 	return NOTIFY_DONE;
 }
@@ -433,7 +432,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * away, e.g. because a GPU scheduler has deferred it.
 	 */
 	req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
-	GEM_BUG_ON(req->reserved_space < engine->emit_request_sz);
+	GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz);
 
 	if (i915.enable_execlists)
 		ret = intel_logical_ring_alloc_request_extras(req);
@@ -639,9 +638,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	struct intel_ring *ring = request->ring;
 	struct intel_timeline *timeline = request->timeline;
 	struct drm_i915_gem_request *prev;
-	u32 request_start;
-	u32 reserved_tail;
-	int ret;
+	int err;
 
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	trace_i915_gem_request_add(request);
@@ -651,8 +648,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
-	request_start = ring->tail;
-	reserved_tail = request->reserved_space;
 	request->reserved_space = 0;
 
 	/*
@@ -663,10 +658,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * what.
 	 */
 	if (flush_caches) {
-		ret = engine->emit_flush(request, EMIT_FLUSH);
+		err = engine->emit_flush(request, EMIT_FLUSH);
 
 		/* Not allowed to fail! */
-		WARN(ret, "engine->emit_flush() failed: %d!\n", ret);
+		WARN(err, "engine->emit_flush() failed: %d!\n", err);
 	}
 
 	/* Record the position of the start of the breadcrumb so that
@@ -674,20 +669,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
+	err = intel_ring_begin(request, engine->emit_breadcrumb_sz);
+	GEM_BUG_ON(err);
 	request->postfix = ring->tail;
-
-	/* Not allowed to fail! */
-	ret = engine->emit_breadcrumb(request);
-	WARN(ret, "(%s)->emit_breadcrumb failed: %d!\n", engine->name, ret);
-
-	/* Sanity check that the reserved size was large enough. */
-	ret = ring->tail - request_start;
-	if (ret < 0)
-		ret += ring->size;
-	WARN_ONCE(ret > reserved_tail,
-		  "Not enough space reserved (%d bytes) "
-		  "for adding the request (%d bytes)\n",
-		  reserved_tail, ret);
+	ring->tail += engine->emit_breadcrumb_sz * sizeof(u32);
 
 	/* Seal the request and mark it as pending execution. Note that
 	 * we may inspect this state, without holding any locks, during
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2529fb39aa14..ce2ae0ecf88d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -365,7 +365,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail);
+	reg_state[CTX_RING_TAIL+1] = rq->tail;
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
@@ -599,6 +599,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 
 	spin_lock_irqsave(&engine->execlist_lock, flags);
 
+	/* We keep the previous context alive until we retire the following
+	 * request. This ensures that any the context object is still pinned
+	 * for any residual writes the HW makes into it on the context switch
+	 * into the next object following the breadcrumb. Otherwise, we may
+	 * retire the context too early.
+	 */
+	request->previous_context = engine->last_context;
+	engine->last_context = request->ctx;
+
 	list_add_tail(&request->execlist_link, &engine->execlist_queue);
 	if (execlists_elsp_idle(engine))
 		tasklet_hi_schedule(&engine->irq_tasklet);
@@ -671,46 +680,6 @@ err_unpin:
 	return ret;
 }
 
-/*
- * intel_logical_ring_advance() - advance the tail and prepare for submission
- * @request: Request to advance the logical ringbuffer of.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-static int
-intel_logical_ring_advance(struct drm_i915_gem_request *request)
-{
-	struct intel_ring *ring = request->ring;
-	struct intel_engine_cs *engine = request->engine;
-
-	intel_ring_advance(ring);
-	request->tail = ring->tail;
-
-	/*
-	 * Here we add two extra NOOPs as padding to avoid
-	 * lite restore of a context with HEAD==TAIL.
-	 *
-	 * Caller must reserve WA_TAIL_DWORDS for us!
-	 */
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
-	request->wa_tail = ring->tail;
-
-	/* We keep the previous context alive until we retire the following
-	 * request. This ensures that any the context object is still pinned
-	 * for any residual writes the HW makes into it on the context switch
-	 * into the next object following the breadcrumb. Otherwise, we may
-	 * retire the context too early.
-	 */
-	request->previous_context = engine->last_context;
-	engine->last_context = request->ctx;
-	return 0;
-}
-
 static int intel_lr_context_pin(struct i915_gem_context *ctx,
 				struct intel_engine_cs *engine)
 {
@@ -1566,41 +1535,35 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
  * used as a workaround for not being allowed to do lite
  * restore with HEAD==TAIL (WaIdleLiteRestore).
  */
-
-static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request)
+static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out)
 {
-	struct intel_ring *ring = request->ring;
-	int ret;
-
-	ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS);
-	if (ret)
-		return ret;
+	*out++ = MI_NOOP;
+	*out++ = MI_NOOP;
+	request->wa_tail = intel_ring_offset(request->ring, out);
+}
 
+static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request,
+				 u32 *out)
+{
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
-	intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
-	intel_ring_emit(ring,
-			intel_hws_seqno_address(request->engine) |
-			MI_FLUSH_DW_USE_GTT);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, request->global_seqno);
-	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	intel_ring_emit(ring, MI_NOOP);
-	return intel_logical_ring_advance(request);
+	*out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	*out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
+	*out++ = 0;
+	*out++ = request->global_seqno;
+	*out++ = MI_USER_INTERRUPT;
+	*out++ = MI_NOOP;
+	request->tail = intel_ring_offset(request->ring, out);
+
+	gen8_emit_wa_tail(request, out);
 }
 
 static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS;
 
-static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
+static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
+					u32 *out)
 {
-	struct intel_ring *ring = request->ring;
-	int ret;
-
-	ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS);
-	if (ret)
-		return ret;
-
 	/* We're using qword write, seqno should be aligned to 8 bytes. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
 
@@ -1608,19 +1571,20 @@ static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request)
 	 * need a prior CS_STALL, which is emitted by the flush
 	 * following the batch.
 	 */
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
-	intel_ring_emit(ring,
-			(PIPE_CONTROL_GLOBAL_GTT_IVB |
-			 PIPE_CONTROL_CS_STALL |
-			 PIPE_CONTROL_QW_WRITE));
-	intel_ring_emit(ring, intel_hws_seqno_address(request->engine));
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, request->global_seqno);
+	*out++ = GFX_OP_PIPE_CONTROL(6);
+	*out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+		  PIPE_CONTROL_CS_STALL |
+		  PIPE_CONTROL_QW_WRITE);
+	*out++ = intel_hws_seqno_address(request->engine);
+	*out++ = 0;
+	*out++ = request->global_seqno;
 	/* We're thrashing one dword of HWS. */
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	intel_ring_emit(ring, MI_NOOP);
-	return intel_logical_ring_advance(request);
+	*out++ = 0;
+	*out++ = MI_USER_INTERRUPT;
+	*out++ = MI_NOOP;
+	request->tail = intel_ring_offset(request->ring, out);
+
+	gen8_emit_wa_tail(request, out);
 }
 
 static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9d5c1f4656fd..921614b43d79 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1215,89 +1215,61 @@ static void render_ring_cleanup(struct intel_engine_cs *engine)
 	i915_vma_unpin_and_release(&dev_priv->semaphore);
 }
 
-static int gen8_rcs_signal(struct drm_i915_gem_request *req)
+static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out)
 {
-	struct intel_ring *ring = req->ring;
 	struct drm_i915_private *dev_priv = req->i915;
 	struct intel_engine_cs *waiter;
 	enum intel_engine_id id;
-	int ret, num_rings;
-
-	num_rings = INTEL_INFO(dev_priv)->num_rings;
-	ret = intel_ring_begin(req, (num_rings-1) * 8);
-	if (ret)
-		return ret;
 
 	for_each_engine_id(waiter, dev_priv, id) {
 		u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
 		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
 			continue;
 
-		intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
-		intel_ring_emit(ring,
-				PIPE_CONTROL_GLOBAL_GTT_IVB |
-				PIPE_CONTROL_QW_WRITE |
-				PIPE_CONTROL_CS_STALL);
-		intel_ring_emit(ring, lower_32_bits(gtt_offset));
-		intel_ring_emit(ring, upper_32_bits(gtt_offset));
-		intel_ring_emit(ring, req->global_seqno);
-		intel_ring_emit(ring, 0);
-		intel_ring_emit(ring,
-				MI_SEMAPHORE_SIGNAL |
-				MI_SEMAPHORE_TARGET(waiter->hw_id));
-		intel_ring_emit(ring, 0);
+		*out++ = GFX_OP_PIPE_CONTROL(6);
+		*out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
+			  PIPE_CONTROL_QW_WRITE |
+			  PIPE_CONTROL_CS_STALL);
+		*out++ = lower_32_bits(gtt_offset);
+		*out++ = upper_32_bits(gtt_offset);
+		*out++ = req->global_seqno;
+		*out++ = 0;
+		*out++ = (MI_SEMAPHORE_SIGNAL |
+			  MI_SEMAPHORE_TARGET(waiter->hw_id));
+		*out++ = 0;
 	}
-	intel_ring_advance(ring);
 
-	return 0;
+	return out;
 }
 
-static int gen8_xcs_signal(struct drm_i915_gem_request *req)
+static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out)
 {
-	struct intel_ring *ring = req->ring;
 	struct drm_i915_private *dev_priv = req->i915;
 	struct intel_engine_cs *waiter;
 	enum intel_engine_id id;
-	int ret, num_rings;
-
-	num_rings = INTEL_INFO(dev_priv)->num_rings;
-	ret = intel_ring_begin(req, (num_rings-1) * 6);
-	if (ret)
-		return ret;
 
 	for_each_engine_id(waiter, dev_priv, id) {
 		u64 gtt_offset = req->engine->semaphore.signal_ggtt[id];
 		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
 			continue;
 
-		intel_ring_emit(ring,
-				(MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
-		intel_ring_emit(ring,
-				lower_32_bits(gtt_offset) |
-				MI_FLUSH_DW_USE_GTT);
-		intel_ring_emit(ring, upper_32_bits(gtt_offset));
-		intel_ring_emit(ring, req->global_seqno);
-		intel_ring_emit(ring,
-				MI_SEMAPHORE_SIGNAL |
-				MI_SEMAPHORE_TARGET(waiter->hw_id));
-		intel_ring_emit(ring, 0);
+		*out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+		*out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT;
+		*out++ = upper_32_bits(gtt_offset);
+		*out++ = req->global_seqno;
+		*out++ = (MI_SEMAPHORE_SIGNAL |
+			  MI_SEMAPHORE_TARGET(waiter->hw_id));
+		*out++ = 0;
 	}
-	intel_ring_advance(ring);
 
-	return 0;
+	return out;
 }
 
-static int gen6_signal(struct drm_i915_gem_request *req)
+static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out)
 {
-	struct intel_ring *ring = req->ring;
 	struct drm_i915_private *dev_priv = req->i915;
 	struct intel_engine_cs *engine;
-	int ret, num_rings;
-
-	num_rings = INTEL_INFO(dev_priv)->num_rings;
-	ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2));
-	if (ret)
-		return ret;
+	int num_rings = 0;
 
 	for_each_engine(engine, dev_priv) {
 		i915_reg_t mbox_reg;
@@ -1307,46 +1279,34 @@ static int gen6_signal(struct drm_i915_gem_request *req)
 
 		mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id];
 		if (i915_mmio_reg_valid(mbox_reg)) {
-			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-			intel_ring_emit_reg(ring, mbox_reg);
-			intel_ring_emit(ring, req->global_seqno);
+			*out++ = MI_LOAD_REGISTER_IMM(1);
+			*out++ = i915_mmio_reg_offset(mbox_reg);
+			*out++ = req->global_seqno;
+			num_rings++;
 		}
 	}
+	if (num_rings & 1)
+		*out++ = MI_NOOP;
 
-	/* If num_dwords was rounded, make sure the tail pointer is correct */
-	if (num_rings % 2 == 0)
-		intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
-
-	return 0;
+	return out;
 }
 
 static void i9xx_submit_request(struct drm_i915_gem_request *request)
 {
 	struct drm_i915_private *dev_priv = request->i915;
 
-	I915_WRITE_TAIL(request->engine,
-			intel_ring_offset(request->ring, request->tail));
+	I915_WRITE_TAIL(request->engine, request->tail);
 }
 
-static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req)
+static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req,
+				 u32 *out)
 {
-	struct intel_ring *ring = req->ring;
-	int ret;
-
-	ret = intel_ring_begin(req, 4);
-	if (ret)
-		return ret;
-
-	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
-	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, req->global_seqno);
-	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	intel_ring_advance(ring);
-
-	req->tail = ring->tail;
+	*out++ = MI_STORE_DWORD_INDEX;
+	*out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT;
+	*out++ = req->global_seqno;
+	*out++ = MI_USER_INTERRUPT;
 
-	return 0;
+	req->tail = intel_ring_offset(req->ring, out);
 }
 
 static const int i9xx_emit_breadcrumb_sz = 4;
@@ -1359,49 +1319,34 @@ static const int i9xx_emit_breadcrumb_sz = 4;
  * Update the mailbox registers in the *other* rings with the current seqno.
  * This acts like a signal in the canonical semaphore.
  */
-static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req)
+static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req,
+				      u32 *out)
 {
-	int ret;
-
-	ret = req->engine->semaphore.signal(req);
-	if (ret)
-		return ret;
-
-	return i9xx_emit_breadcrumb(req);
+	return i9xx_emit_breadcrumb(req,
+				    req->engine->semaphore.signal(req, out));
 }
 
-static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req)
+static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
+					u32 *out)
 {
 	struct intel_engine_cs *engine = req->engine;
-	struct intel_ring *ring = req->ring;
-	int ret;
 
-	if (engine->semaphore.signal) {
-		ret = engine->semaphore.signal(req);
-		if (ret)
-			return ret;
-	}
-
-	ret = intel_ring_begin(req, 8);
-	if (ret)
-		return ret;
+	if (engine->semaphore.signal)
+		out = engine->semaphore.signal(req, out);
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
-	intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB |
+	*out++ = GFX_OP_PIPE_CONTROL(6);
+	*out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB |
 			       PIPE_CONTROL_CS_STALL |
-			       PIPE_CONTROL_QW_WRITE));
-	intel_ring_emit(ring, intel_hws_seqno_address(engine));
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, req->global_seqno);
+			       PIPE_CONTROL_QW_WRITE);
+	*out++ = intel_hws_seqno_address(engine);
+	*out++ = 0;
+	*out++ = req->global_seqno;
 	/* We're thrashing one dword of HWS. */
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
-
-	req->tail = ring->tail;
+	*out++ = 0;
+	*out++ = MI_USER_INTERRUPT;
+	*out++ = MI_NOOP;
 
-	return 0;
+	req->tail = intel_ring_offset(req->ring, out);
 }
 
 static const int gen8_render_emit_breadcrumb_sz = 8;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 704cd184ae1b..871bddd85e4a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -255,7 +255,8 @@ struct intel_engine_cs {
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
-	int		(*emit_breadcrumb)(struct drm_i915_gem_request *req);
+	void		(*emit_breadcrumb)(struct drm_i915_gem_request *req,
+					   u32 *out);
 	int		emit_breadcrumb_sz;
 
 	/* Pass the request to the hardware queue (e.g. directly into
@@ -331,7 +332,7 @@ struct intel_engine_cs {
 		/* AKA wait() */
 		int	(*sync_to)(struct drm_i915_gem_request *req,
 				   struct drm_i915_gem_request *signal);
-		int	(*signal)(struct drm_i915_gem_request *req);
+		u32	*(*signal)(struct drm_i915_gem_request *req, u32 *out);
 	} semaphore;
 
 	/* Execlists */
@@ -493,10 +494,11 @@ static inline void intel_ring_advance(struct intel_ring *ring)
 	 */
 }
 
-static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value)
+static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
 {
 	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-	return value & (ring->size - 1);
+	u32 offset = addr - ring->vaddr;
+	return offset & (ring->size - 1);
 }
 
 int __intel_ring_space(int head, int tail, int size);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 36/42] drm/i915: Move the global sync optimisation to the timeline
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (34 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 35/42] drm/i915: Defer " Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 37/42] drm/i915: Create a unique name for the context Chris Wilson
                   ` (8 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Currently we try to reduce the number of synchronisations (now the
number of requests we need to wait upon) by noting that if we have
earlier waited upon a request, all subsequent requests in the timeline
will be after the wait. This only applies to requests in this timeline,
as other timelines will not be ordered by that waiter.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  9 ------
 drivers/gpu/drm/i915/i915_drv.h          |  1 -
 drivers/gpu/drm/i915/i915_gem_request.c  | 30 ++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_timeline.h |  1 +
 drivers/gpu/drm/i915/i915_gpu_error.c    | 48 +++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_engine_cs.c   |  2 --
 drivers/gpu/drm/i915/intel_ringbuffer.c  |  3 --
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 23 ---------------
 8 files changed, 48 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 77491eb3340f..8640d2d805bd 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3257,15 +3257,6 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
 		seq_putc(m, '\n');
 	}
 
-	seq_puts(m, "\nSync seqno:\n");
-	for_each_engine(engine, dev_priv) {
-		for (j = 0; j < num_rings; j++)
-			seq_printf(m, "  0x%08x ",
-				   engine->semaphore.sync_seqno[j]);
-		seq_putc(m, '\n');
-	}
-	seq_putc(m, '\n');
-
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 78b54fb4b56b..1620703f4a64 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -798,7 +798,6 @@ struct drm_i915_error_state {
 		u32 cpu_ring_tail;
 
 		u32 last_seqno;
-		u32 semaphore_seqno[I915_NUM_ENGINES - 1];
 
 		/* Register state */
 		u32 start;
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 0b8a26803036..e0f627a94fde 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -238,34 +238,41 @@ static int i915_gem_check_wedge(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-static int i915_gem_init_global_seqno(struct drm_i915_private *dev_priv,
-				      u32 seqno)
+static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
-	struct i915_gem_timeline *timeline = &dev_priv->gt.global_timeline;
+	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
 	struct intel_engine_cs *engine;
 	int ret;
 
 	/* Carefully retire all requests without writing to the rings */
-	ret = i915_gem_wait_for_idle(dev_priv,
+	ret = i915_gem_wait_for_idle(i915,
 				     I915_WAIT_INTERRUPTIBLE |
 				     I915_WAIT_LOCKED);
 	if (ret)
 		return ret;
 
-	i915_gem_retire_requests(dev_priv);
+	i915_gem_retire_requests(i915);
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 	if (!i915_seqno_passed(seqno, timeline->next_seqno)) {
-		while (intel_kick_waiters(dev_priv) ||
-		       intel_kick_signalers(dev_priv))
+		while (intel_kick_waiters(i915) || intel_kick_signalers(i915))
 			yield();
 		yield();
 	}
 
 	/* Finally reset hw state */
-	for_each_engine(engine, dev_priv)
+	for_each_engine(engine, i915)
 		intel_engine_init_global_seqno(engine, seqno);
 
+	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+		for_each_engine(engine, i915) {
+			struct intel_timeline *tl =
+				&timeline->engine[engine->id];
+
+			memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
+		}
+	}
+
 	return 0;
 }
 
@@ -461,7 +468,7 @@ static int
 i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			       struct drm_i915_gem_request *from)
 {
-	int idx, ret;
+	int ret;
 
 	GEM_BUG_ON(to == from);
 
@@ -482,8 +489,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 		return ret < 0 ? ret : 0;
 	}
 
-	idx = intel_engine_sync_index(from->engine, to->engine);
-	if (from->global_seqno <= from->engine->semaphore.sync_seqno[idx])
+	if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id])
 		return 0;
 
 	trace_i915_gem_ring_sync_to(to, from);
@@ -501,7 +507,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}
 
-	from->engine->semaphore.sync_seqno[idx] = from->global_seqno;
+	to->timeline->sync_seqno[from->engine->id] = from->global_seqno;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index bfdf0331cc50..767b23914ec5 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -48,6 +48,7 @@ struct intel_timeline {
 	 * struct_mutex.
 	 */
 	struct i915_gem_active last_request;
+	u32 sync_seqno[I915_NUM_ENGINES];
 
 	struct i915_gem_timeline *common;
 };
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b6bf6fafd849..6bbe84e732b3 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -403,17 +403,13 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	if (INTEL_GEN(m->i915) >= 6) {
 		err_printf(m, "  RC PSMI: 0x%08x\n", ee->rc_psmi);
 		err_printf(m, "  FAULT_REG: 0x%08x\n", ee->fault_reg);
-		err_printf(m, "  SYNC_0: 0x%08x [last synced 0x%08x]\n",
-			   ee->semaphore_mboxes[0],
-			   ee->semaphore_seqno[0]);
-		err_printf(m, "  SYNC_1: 0x%08x [last synced 0x%08x]\n",
-			   ee->semaphore_mboxes[1],
-			   ee->semaphore_seqno[1]);
-		if (HAS_VEBOX(m->i915)) {
-			err_printf(m, "  SYNC_2: 0x%08x [last synced 0x%08x]\n",
-				   ee->semaphore_mboxes[2],
-				   ee->semaphore_seqno[2]);
-		}
+		err_printf(m, "  SYNC_0: 0x%08x\n",
+			   ee->semaphore_mboxes[0]);
+		err_printf(m, "  SYNC_1: 0x%08x\n",
+			   ee->semaphore_mboxes[1]);
+		if (HAS_VEBOX(m->i915))
+			err_printf(m, "  SYNC_2: 0x%08x\n",
+				   ee->semaphore_mboxes[2]);
 	}
 	if (USES_PPGTT(m->i915)) {
 		err_printf(m, "  GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode);
@@ -957,6 +953,26 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv,
 	}
 }
 
+static inline u32
+gen8_engine_sync_index(struct intel_engine_cs *engine,
+		       struct intel_engine_cs *other)
+{
+	int idx;
+
+	/*
+	 * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
+	 * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
+	 * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
+	 * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
+	 * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
+	 */
+
+	idx = (other - engine) - 1;
+	if (idx < 0)
+		idx += I915_NUM_ENGINES;
+
+	return idx;
+}
 
 static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
 					struct intel_engine_cs *engine,
@@ -980,10 +996,9 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error,
 		signal_offset =
 			(GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4;
 		tmp = error->semaphore->pages[0];
-		idx = intel_engine_sync_index(engine, to);
+		idx = gen8_engine_sync_index(engine, to);
 
 		ee->semaphore_mboxes[idx] = tmp[signal_offset];
-		ee->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx];
 	}
 }
 
@@ -994,14 +1009,9 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
 
 	ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base));
 	ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base));
-	ee->semaphore_seqno[0] = engine->semaphore.sync_seqno[0];
-	ee->semaphore_seqno[1] = engine->semaphore.sync_seqno[1];
-
-	if (HAS_VEBOX(dev_priv)) {
+	if (HAS_VEBOX(dev_priv))
 		ee->semaphore_mboxes[2] =
 			I915_READ(RING_SYNC_2(engine->mmio_base));
-		ee->semaphore_seqno[2] = engine->semaphore.sync_seqno[2];
-	}
 }
 
 static void error_record_engine_waiters(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 276f3f38ab41..755f1a8b76d8 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -191,8 +191,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 				       I915_NUM_ENGINES * gen8_semaphore_seqno_size);
 		kunmap(page);
 	}
-	memset(engine->semaphore.sync_seqno, 0,
-	       sizeof(engine->semaphore.sync_seqno));
 
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 	if (engine->irq_seqno_barrier)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 921614b43d79..3abfbe3cfed9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2004,9 +2004,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 
 	intel_engine_setup_common(engine);
 
-	memset(engine->semaphore.sync_seqno, 0,
-	       sizeof(engine->semaphore.sync_seqno));
-
 	ret = intel_engine_init_common(engine);
 	if (ret)
 		goto error;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 871bddd85e4a..652e37c9e0c9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -314,8 +314,6 @@ struct intel_engine_cs {
 	 *  ie. transpose of f(x, y)
 	 */
 	struct {
-		u32	sync_seqno[I915_NUM_ENGINES-1];
-
 		union {
 #define GEN6_SEMAPHORE_LAST	VECS_HW
 #define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)
@@ -391,27 +389,6 @@ intel_engine_flag(const struct intel_engine_cs *engine)
 	return 1 << engine->id;
 }
 
-static inline u32
-intel_engine_sync_index(struct intel_engine_cs *engine,
-			struct intel_engine_cs *other)
-{
-	int idx;
-
-	/*
-	 * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
-	 * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
-	 * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
-	 * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
-	 * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
-	 */
-
-	idx = (other - engine) - 1;
-	if (idx < 0)
-		idx += I915_NUM_ENGINES;
-
-	return idx;
-}
-
 static inline void
 intel_flush_status_page(struct intel_engine_cs *engine, int reg)
 {
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 37/42] drm/i915: Create a unique name for the context
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (35 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 36/42] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 38/42] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
                   ` (7 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

This will be used for communicating issues with this context to
userspace, so we want to identify the parent process and the individual
context. Note that the name isn't quite unique, it makes the presumption
of there only being a single device fd per process.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 11 ++---------
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_gem_context.c | 23 ++++++++++++++++++-----
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8640d2d805bd..1188bc879e9e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -638,17 +638,10 @@ static void print_request(struct seq_file *m,
 			  struct drm_i915_gem_request *rq,
 			  const char *prefix)
 {
-	struct pid *pid = rq->ctx->pid;
-	struct task_struct *task;
-
-	rcu_read_lock();
-	task = pid ? pid_task(pid, PIDTYPE_PID) : NULL;
-	seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix,
+	seq_printf(m, "%s%x [%x:%x] @ %d: %s\n", prefix,
 		   rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
-		   task ? task->comm : "<unknown>",
-		   task ? task->pid : -1);
-	rcu_read_unlock();
+		   rq->timeline->common->name);
 }
 
 static int i915_gem_request_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1620703f4a64..774e3fe025bb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -932,6 +932,7 @@ struct i915_gem_context {
 	struct drm_i915_file_private *file_priv;
 	struct i915_hw_ppgtt *ppgtt;
 	struct pid *pid;
+	const char *name;
 
 	struct i915_ctx_hang_stats hang_stats;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 449cf45d94eb..3496e589cdba 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -158,6 +158,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 		__i915_gem_object_release_unless_active(ce->state->obj);
 	}
 
+	kfree(ctx->name);
 	put_pid(ctx->pid);
 	list_del(&ctx->link);
 
@@ -303,19 +304,28 @@ __create_hw_context(struct drm_device *dev,
 	}
 
 	/* Default context will never have a file_priv */
-	if (file_priv != NULL) {
+	ret = DEFAULT_CONTEXT_HANDLE;
+	if (file_priv) {
 		ret = idr_alloc(&file_priv->context_idr, ctx,
 				DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
 		if (ret < 0)
 			goto err_out;
-	} else
-		ret = DEFAULT_CONTEXT_HANDLE;
+	}
+	ctx->user_handle = ret;
 
 	ctx->file_priv = file_priv;
-	if (file_priv)
+	if (file_priv) {
 		ctx->pid = get_task_pid(current, PIDTYPE_PID);
+		ctx->name = kasprintf(GFP_KERNEL, "%s[%d]/%x",
+				      current->comm,
+				      pid_nr(ctx->pid),
+				      ctx->user_handle);
+		if (!ctx->name) {
+			ret = -ENOMEM;
+			goto err_pid;
+		}
+	}
 
-	ctx->user_handle = ret;
 	/* NB: Mark all slices as needing a remap so that when the context first
 	 * loads it will restore whatever remap state already exists. If there
 	 * is no remap info, it will be a NOP. */
@@ -329,6 +339,9 @@ __create_hw_context(struct drm_device *dev,
 
 	return ctx;
 
+err_pid:
+	put_pid(ctx->pid);
+	idr_remove(&file_priv->context_idr, ctx->user_handle);
 err_out:
 	context_close(ctx);
 	return ERR_PTR(ret);
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 38/42] drm/i915: Reserve space in the global seqno during request allocation
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (36 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 37/42] drm/i915: Create a unique name for the context Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
                   ` (6 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

A restriction on our global seqno is that they cannot wrap, and that we
cannot use the value 0. This allows us to detect when a request has not
yet been submitted, its global seqno is still 0, and ensures that
hardware semaphores are monotonic as required by older hardware. To
meet these restrictions when we defer the assignment of the global
seqno, we must check that we have an available slot in the global seqno
space during request construction. If that test fails, we wait for all
requests to be completed and reset the hardware back to 0.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      | 10 ++--
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/i915_gem.c          |  7 ++-
 drivers/gpu/drm/i915/i915_gem_request.c  | 85 +++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_timeline.h |  2 +-
 5 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1188bc879e9e..cd6b9efd94a1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -560,7 +560,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data)
 				seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n",
 					   engine->name,
 					   i915_gem_request_get_seqno(work->flip_queued_req),
-					   dev_priv->gt.global_timeline.next_seqno,
+					   atomic_read(&dev_priv->gt.global_timeline.next_seqno),
 					   intel_engine_get_seqno(engine),
 					   i915_gem_request_completed(work->flip_queued_req));
 			} else
@@ -1035,7 +1035,7 @@ i915_next_seqno_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
 
-	*val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno);
+	*val = atomic_read(&dev_priv->gt.global_timeline.next_seqno);
 	return 0;
 }
 
@@ -2269,8 +2269,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct drm_file *file;
 
 	seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
-	seq_printf(m, "GPU busy? %s [%x]\n",
-		   yesno(dev_priv->gt.awake), dev_priv->gt.active_engines);
+	seq_printf(m, "GPU busy? %s [%d requests]\n",
+		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
 	seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
 	seq_printf(m, "Frequency requested %d\n",
 		   intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq));
@@ -2305,7 +2305,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 
 	if (INTEL_GEN(dev_priv) >= 6 &&
 	    dev_priv->rps.enabled &&
-	    dev_priv->gt.active_engines) {
+	    dev_priv->gt.active_requests) {
 		u32 rpup, rpupei;
 		u32 rpdown, rpdownei;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 774e3fe025bb..2711753e7c5a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2076,6 +2076,7 @@ struct drm_i915_private {
 
 		struct list_head timelines;
 		struct i915_gem_timeline global_timeline;
+		u32 active_requests;
 
 		/**
 		 * Is the GPU currently considered idle, or busy executing
@@ -2084,7 +2085,6 @@ struct drm_i915_private {
 		 * In order to reduce the effect on performance, there
 		 * is a slight delay before we do so.
 		 */
-		unsigned int active_engines;
 		bool awake;
 
 		/**
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f9a70971cb24..fa6a8ec6fcd7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2650,8 +2650,6 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine)
 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
 		spin_unlock(&engine->execlist_lock);
 	}
-
-	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
@@ -2706,7 +2704,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (!READ_ONCE(dev_priv->gt.awake))
 		return;
 
-	if (READ_ONCE(dev_priv->gt.active_engines))
+	if (READ_ONCE(dev_priv->gt.active_requests))
 		return;
 
 	rearm_hangcheck =
@@ -2720,7 +2718,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		goto out_rearm;
 	}
 
-	if (dev_priv->gt.active_engines)
+	if (dev_priv->gt.active_requests)
 		goto out_unlock;
 
 	for_each_engine(engine, dev_priv)
@@ -4292,6 +4290,7 @@ int i915_gem_suspend(struct drm_device *dev)
 		goto err;
 
 	i915_gem_retire_requests(dev_priv);
+	GEM_BUG_ON(dev_priv->gt.active_requests);
 
 	assert_kernel_context_is_current(dev_priv);
 	i915_gem_context_lost(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e0f627a94fde..38e32fa2e97b 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -159,6 +159,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	 */
 	list_del(&request->ring_link);
 	request->ring->last_retired_head = request->postfix;
+	request->i915->gt.active_requests--;
 
 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -252,13 +253,15 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 		return ret;
 
 	i915_gem_retire_requests(i915);
+	GEM_BUG_ON(i915->gt.active_requests > 1);
 
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
-	if (!i915_seqno_passed(seqno, timeline->next_seqno)) {
+	if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) {
 		while (intel_kick_waiters(i915) || intel_kick_signalers(i915))
 			yield();
 		yield();
 	}
+	atomic_set(&timeline->next_seqno, seqno);
 
 	/* Finally reset hw state */
 	for_each_engine(engine, i915)
@@ -279,7 +282,6 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
 int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
@@ -289,34 +291,33 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
 	/* HWS page needs to be set less than what we
 	 * will inject to ring
 	 */
-	ret = i915_gem_init_global_seqno(dev_priv, seqno - 1);
-	if (ret)
-		return ret;
-
-	dev_priv->gt.global_timeline.next_seqno = seqno;
-	return 0;
+	return i915_gem_init_global_seqno(dev_priv, seqno - 1);
 }
 
-static int i915_gem_get_global_seqno(struct drm_i915_private *dev_priv,
-				     u32 *seqno)
+static int reserve_global_seqno(struct drm_i915_private *i915)
 {
-	struct i915_gem_timeline *tl = &dev_priv->gt.global_timeline;
-
-	/* reserve 0 for non-seqno */
-	if (unlikely(tl->next_seqno == 0)) {
-		int ret;
+	u32 active_requests = ++i915->gt.active_requests;
+	u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno);
+	int ret;
 
-		ret = i915_gem_init_global_seqno(dev_priv, 0);
-		if (ret)
-			return ret;
+	/* Reservation is fine until we need to wrap around */
+	if (likely(next_seqno + active_requests > next_seqno))
+		return 0;
 
-		tl->next_seqno = 1;
+	ret = i915_gem_init_global_seqno(i915, 0);
+	if (ret) {
+		i915->gt.active_requests--;
+		return ret;
 	}
 
-	*seqno = tl->next_seqno++;
 	return 0;
 }
 
+static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
+{
+	return atomic_inc_return(&tl->next_seqno);
+}
+
 static int __i915_sw_fence_call
 submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 {
@@ -356,9 +357,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_gem_request *req;
-	u32 seqno;
 	int ret;
 
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
 	/* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
 	 * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
 	 * and restart.
@@ -367,6 +369,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		return ERR_PTR(ret);
 
+	ret = reserve_global_seqno(dev_priv);
+	if (ret)
+		return ERR_PTR(ret);
+
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
@@ -402,12 +408,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * Do not use kmem_cache_zalloc() here!
 	 */
 	req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	ret = i915_gem_get_global_seqno(dev_priv, &seqno);
-	if (ret)
-		goto err;
+	if (!req) {
+		ret = -ENOMEM;
+		goto err_unreserve;
+	}
 
 	req->timeline = engine->timeline;
 
@@ -416,14 +420,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		   &i915_fence_ops,
 		   &req->lock,
 		   req->timeline->fence_context,
-		   seqno);
+		   timeline_get_seqno(req->timeline->common));
 
 	i915_sw_fence_init(&req->submit, submit_notify);
 
 	INIT_LIST_HEAD(&req->active_list);
 	req->i915 = dev_priv;
 	req->engine = engine;
-	req->global_seqno = seqno;
+	req->global_seqno = req->fence.seqno;
 	req->ctx = i915_gem_context_get(ctx);
 
 	/* No zalloc, must clear what we need by hand */
@@ -459,8 +463,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 
 err_ctx:
 	i915_gem_context_put(ctx);
-err:
 	kmem_cache_free(dev_priv->requests, req);
+err_unreserve:
+	dev_priv->gt.active_requests--;
 	return ERR_PTR(ret);
 }
 
@@ -616,7 +621,6 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 
-	dev_priv->gt.active_engines |= intel_engine_flag(engine);
 	if (dev_priv->gt.awake)
 		return;
 
@@ -692,6 +696,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 
+	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
+				     request->fence.seqno));
+
 	request->emitted_jiffies = jiffies;
 	request->previous_seqno = timeline->last_pending_seqno;
 	timeline->last_pending_seqno = request->fence.seqno;
@@ -951,38 +958,34 @@ complete:
 	return timeout;
 }
 
-static bool engine_retire_requests(struct intel_engine_cs *engine)
+static void engine_retire_requests(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request, *next;
 
 	list_for_each_entry_safe(request, next,
 				 &engine->timeline->requests, link) {
 		if (!i915_gem_request_completed(request))
-			return false;
+			return;
 
 		i915_gem_request_retire(request);
 	}
-
-	return true;
 }
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
-	unsigned int tmp;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	if (dev_priv->gt.active_engines == 0)
+	if (!dev_priv->gt.active_requests)
 		return;
 
 	GEM_BUG_ON(!dev_priv->gt.awake);
 
-	for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp)
-		if (engine_retire_requests(engine))
-			dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
+	for_each_engine(engine, dev_priv)
+		engine_retire_requests(engine);
 
-	if (dev_priv->gt.active_engines == 0)
+	if (!dev_priv->gt.active_requests)
 		queue_delayed_work(dev_priv->wq,
 				   &dev_priv->gt.idle_work,
 				   msecs_to_jiffies(100));
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 767b23914ec5..18e603980dd9 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -55,7 +55,7 @@ struct intel_timeline {
 
 struct i915_gem_timeline {
 	struct list_head link;
-	u32 next_seqno;
+	atomic_t next_seqno;
 
 	struct drm_i915_private *i915;
 	const char *name;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (37 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 38/42] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 10:25   ` Joonas Lahtinen
  2016-10-07 10:27   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 40/42] drm/i915: Enable multiple timelines Chris Wilson
                   ` (5 subsequent siblings)
  44 siblings, 2 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Defer the assignment of the global seqno on a request to its submission.
In the next patch, we will only allocate the global seqno at that time,
here we are just enabling the wait-for-submission before wait-for-seqno
paths.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_request.c  | 29 ++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 12 ++++++++----
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 38e32fa2e97b..eb7e572ca3d4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -324,14 +324,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 	struct drm_i915_gem_request *request =
 		container_of(fence, typeof(*request), submit);
 	struct intel_engine_cs *engine = request->engine;
+	struct intel_timeline *timeline;
+	u32 seqno;
 
 	if (state != FENCE_COMPLETE)
 		return NOTIFY_DONE;
 
 	/* Will be called from irq-context when using foreign DMA fences */
 
-	engine->timeline->last_submitted_seqno = request->fence.seqno;
+	timeline = request->timeline;
 
+	seqno = request->fence.seqno;
+	GEM_BUG_ON(!seqno);
+	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
+
+	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
+	request->previous_seqno = timeline->last_submitted_seqno;
+	timeline->last_submitted_seqno = seqno;
+
+	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+	request->global_seqno = seqno;
+	if (test_bit(FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
+		intel_engine_enable_signaling(request);
+	spin_unlock(&request->lock);
+
+	GEM_BUG_ON(!request->global_seqno);
 	engine->emit_breadcrumb(request,
 				request->ring->vaddr + request->postfix);
 	engine->submit_request(request);
@@ -427,10 +444,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	INIT_LIST_HEAD(&req->active_list);
 	req->i915 = dev_priv;
 	req->engine = engine;
-	req->global_seqno = req->fence.seqno;
 	req->ctx = i915_gem_context_get(ctx);
 
 	/* No zalloc, must clear what we need by hand */
+	req->global_seqno = 0;
 	req->previous_context = NULL;
 	req->file_priv = NULL;
 	req->batch = NULL;
@@ -696,15 +713,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 
-	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
-				     request->fence.seqno));
+	list_add_tail(&request->link, &timeline->requests);
 
-	request->emitted_jiffies = jiffies;
-	request->previous_seqno = timeline->last_pending_seqno;
 	timeline->last_pending_seqno = request->fence.seqno;
 	i915_gem_active_set(&timeline->last_request, request);
-	list_add_tail(&request->link, &timeline->requests);
+
 	list_add_tail(&request->ring_link, &ring->request_list);
+	request->emitted_jiffies = jiffies;
 
 	i915_gem_mark_busy(engine);
 
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 5fd82b17a706..fa68b4e4b6fe 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -77,22 +77,26 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
 
 static void irq_enable(struct intel_engine_cs *engine)
 {
+	unsigned long flags;
+
 	/* Enabling the IRQ may miss the generation of the interrupt, but
 	 * we still need to force the barrier before reading the seqno,
 	 * just in case.
 	 */
 	engine->breadcrumbs.irq_posted = true;
 
-	spin_lock_irq(&engine->i915->irq_lock);
+	spin_lock_irqsave(&engine->i915->irq_lock, flags);
 	engine->irq_enable(engine);
-	spin_unlock_irq(&engine->i915->irq_lock);
+	spin_unlock_irqrestore(&engine->i915->irq_lock, flags);
 }
 
 static void irq_disable(struct intel_engine_cs *engine)
 {
-	spin_lock_irq(&engine->i915->irq_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->i915->irq_lock, flags);
 	engine->irq_disable(engine);
-	spin_unlock_irq(&engine->i915->irq_lock);
+	spin_unlock_irqrestore(&engine->i915->irq_lock, flags);
 
 	engine->breadcrumbs.irq_posted = false;
 }
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 40/42] drm/i915: Enable multiple timelines
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (38 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 10:29   ` Joonas Lahtinen
  2016-10-07  9:46 ` [PATCH 41/42] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
                   ` (4 subsequent siblings)
  44 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

With the infrastructure converted over to tracking multiple timelines in
the GEM API whilst preserving the efficiency of using a single execution
timeline internally, we can now assign a separate timeline to every
context with full-ppgtt.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h          | 10 ++++++
 drivers/gpu/drm/i915/i915_gem.c          | 10 +++---
 drivers/gpu/drm/i915/i915_gem_context.c  |  4 +--
 drivers/gpu/drm/i915/i915_gem_evict.c    | 11 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c      | 19 ++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h      |  4 ++-
 drivers/gpu/drm/i915/i915_gem_request.c  | 59 +++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_timeline.c |  1 +
 drivers/gpu/drm/i915/i915_gem_timeline.h |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  5 ---
 10 files changed, 76 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2711753e7c5a..33dadda90fe1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3526,6 +3526,16 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_free);
 }
 
+static inline struct intel_timeline *
+i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
+				 struct intel_engine_cs *engine)
+{
+	struct i915_address_space *vm;
+
+	vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
+	return &vm->timeline.engine[engine->id];
+}
+
 static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
 {
 	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fa6a8ec6fcd7..00e6613e3b01 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2527,12 +2527,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
 	list_for_each_entry(request, &engine->timeline->requests, link) {
-		if (i915_gem_request_completed(request))
+		if (__i915_gem_request_completed(request))
 			continue;
 
-		if (!i915_sw_fence_done(&request->submit))
-			break;
-
 		return request;
 	}
 
@@ -2560,6 +2557,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
 	struct i915_gem_context *incomplete_ctx;
+	struct intel_timeline *timeline;
 	bool ring_hung;
 
 	if (engine->irq_seqno_barrier)
@@ -2598,6 +2596,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	list_for_each_entry_continue(request, &engine->timeline->requests, link)
 		if (request->ctx == incomplete_ctx)
 			reset_request(request);
+
+	timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
+	list_for_each_entry(request, &timeline->requests, link)
+		reset_request(request);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3496e589cdba..a9acd4a71809 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -365,9 +365,9 @@ i915_gem_create_context(struct drm_device *dev,
 		return ctx;
 
 	if (USES_FULL_PPGTT(dev)) {
-		struct i915_hw_ppgtt *ppgtt =
-			i915_ppgtt_create(to_i915(dev), file_priv);
+		struct i915_hw_ppgtt *ppgtt;
 
+		ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name);
 		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 61f716c8768c..dcab3da29b04 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -33,13 +33,16 @@
 #include "intel_drv.h"
 #include "i915_trace.h"
 
-static bool
-gpu_is_idle(struct drm_i915_private *dev_priv)
+static bool ggtt_is_idle(struct drm_i915_private *dev_priv)
 {
+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct intel_engine_cs *engine;
 
 	for_each_engine(engine, dev_priv) {
-		if (intel_engine_is_active(engine))
+		struct intel_timeline *tl;
+
+		tl = &ggtt->base.timeline.engine[engine->id];
+		if (i915_gem_active_isset(&tl->last_request))
 			return false;
 	}
 
@@ -153,7 +156,7 @@ search_again:
 	if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
 		return -ENOSPC;
 
-	if (gpu_is_idle(dev_priv)) {
+	if (ggtt_is_idle(dev_priv)) {
 		/* If we still have pending pageflip completions, drop
 		 * back to userspace to give our workqueues time to
 		 * acquire our locks and unpin the old scanouts.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 80669719b44b..6493e23053cc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2112,8 +2112,10 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 }
 
 static void i915_address_space_init(struct i915_address_space *vm,
-				    struct drm_i915_private *dev_priv)
+				    struct drm_i915_private *dev_priv,
+				    const char *name)
 {
+	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
 	drm_mm_init(&vm->mm, vm->start, vm->total);
 	INIT_LIST_HEAD(&vm->active_list);
 	INIT_LIST_HEAD(&vm->inactive_list);
@@ -2142,14 +2144,15 @@ static void gtt_write_workarounds(struct drm_device *dev)
 
 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 			   struct drm_i915_private *dev_priv,
-			   struct drm_i915_file_private *file_priv)
+			   struct drm_i915_file_private *file_priv,
+			   const char *name)
 {
 	int ret;
 
 	ret = __hw_ppgtt_init(ppgtt, dev_priv);
 	if (ret == 0) {
 		kref_init(&ppgtt->ref);
-		i915_address_space_init(&ppgtt->base, dev_priv);
+		i915_address_space_init(&ppgtt->base, dev_priv, name);
 		ppgtt->base.file = file_priv;
 	}
 
@@ -2183,7 +2186,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
 
 struct i915_hw_ppgtt *
 i915_ppgtt_create(struct drm_i915_private *dev_priv,
-		  struct drm_i915_file_private *fpriv)
+		  struct drm_i915_file_private *fpriv,
+		  const char *name)
 {
 	struct i915_hw_ppgtt *ppgtt;
 	int ret;
@@ -2192,7 +2196,7 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv,
 	if (!ppgtt)
 		return ERR_PTR(-ENOMEM);
 
-	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
+	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name);
 	if (ret) {
 		kfree(ppgtt);
 		return ERR_PTR(ret);
@@ -2215,6 +2219,7 @@ void  i915_ppgtt_release(struct kref *kref)
 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
 	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
 
+	i915_gem_timeline_fini(&ppgtt->base.timeline);
 	list_del(&ppgtt->base.global_link);
 	drm_mm_takedown(&ppgtt->base.mm);
 
@@ -3209,11 +3214,13 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	/* Subtract the guard page before address space initialization to
 	 * shrink the range used by drm_mm.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	ggtt->base.total -= PAGE_SIZE;
-	i915_address_space_init(&ggtt->base, dev_priv);
+	i915_address_space_init(&ggtt->base, dev_priv, "[global]");
 	ggtt->base.total += PAGE_SIZE;
 	if (!HAS_LLC(dev_priv))
 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
 				dev_priv->ggtt.mappable_base,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 737b8d8f21b4..57207ef799ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -342,6 +342,7 @@ struct i915_pml4 {
 
 struct i915_address_space {
 	struct drm_mm mm;
+	struct i915_gem_timeline timeline;
 	struct drm_device *dev;
 	/* Every address space belongs to a struct file - except for the global
 	 * GTT that is owned by the driver (and so @file is set to NULL). In
@@ -614,7 +615,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
 int i915_ppgtt_init_hw(struct drm_device *dev);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
-					struct drm_i915_file_private *fpriv);
+					struct drm_i915_file_private *fpriv,
+					const char *name);
 static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index eb7e572ca3d4..d9a8c637c9ba 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -34,12 +34,6 @@ static const char *i915_fence_get_driver_name(struct fence *fence)
 
 static const char *i915_fence_get_timeline_name(struct fence *fence)
 {
-	/* Timelines are bound by eviction to a VM. However, since
-	 * we only have a global seqno at the moment, we only have
-	 * a single timeline. Note that each timeline will have
-	 * multiple execution contexts (fence contexts) as we allow
-	 * engines within a single timeline to execute in parallel.
-	 */
 	return to_request(fence)->timeline->common->name;
 }
 
@@ -64,18 +58,6 @@ static signed long i915_fence_wait(struct fence *fence,
 	return i915_wait_request(to_request(fence), interruptible, timeout);
 }
 
-static void i915_fence_value_str(struct fence *fence, char *str, int size)
-{
-	snprintf(str, size, "%u", fence->seqno);
-}
-
-static void i915_fence_timeline_value_str(struct fence *fence, char *str,
-					  int size)
-{
-	snprintf(str, size, "%u",
-		 intel_engine_get_seqno(to_request(fence)->engine));
-}
-
 static void i915_fence_release(struct fence *fence)
 {
 	struct drm_i915_gem_request *req = to_request(fence);
@@ -90,8 +72,6 @@ const struct fence_ops i915_fence_ops = {
 	.signaled = i915_fence_signaled,
 	.wait = i915_fence_wait,
 	.release = i915_fence_release,
-	.fence_value_str = i915_fence_value_str,
-	.timeline_value_str = i915_fence_timeline_value_str,
 };
 
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
@@ -147,7 +127,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(!i915_gem_request_completed(request));
 
 	trace_i915_gem_request_retire(request);
+
+	spin_lock_irq(&request->engine->timeline->lock);
 	list_del_init(&request->link);
+	spin_unlock_irq(&request->engine->timeline->lock);
 
 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
@@ -313,6 +296,12 @@ static int reserve_global_seqno(struct drm_i915_private *i915)
 	return 0;
 }
 
+static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
+{
+	/* next_seqno only incremented under a mutex */
+	return ++tl->next_seqno.counter;
+}
+
 static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
 {
 	return atomic_inc_return(&tl->next_seqno);
@@ -325,6 +314,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 		container_of(fence, typeof(*request), submit);
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_timeline *timeline;
+	unsigned long flags;
 	u32 seqno;
 
 	if (state != FENCE_COMPLETE)
@@ -332,9 +322,12 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 
 	/* Will be called from irq-context when using foreign DMA fences */
 
-	timeline = request->timeline;
+	timeline = engine->timeline;
+	GEM_BUG_ON(timeline == request->timeline);
 
-	seqno = request->fence.seqno;
+	spin_lock_irqsave(&timeline->lock, flags);
+
+	seqno = timeline_get_seqno(timeline->common);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
@@ -353,6 +346,12 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 				request->ring->vaddr + request->postfix);
 	engine->submit_request(request);
 
+	spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
+	list_move_tail(&request->link, &timeline->requests);
+	spin_unlock(&request->timeline->lock);
+
+	spin_unlock_irqrestore(&timeline->lock, flags);
+
 	return NOTIFY_DONE;
 }
 
@@ -393,7 +392,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
-	if (req && i915_gem_request_completed(req))
+	if (req && __i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
 
 	/* Beware: Dragons be flying overhead.
@@ -430,14 +429,15 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		goto err_unreserve;
 	}
 
-	req->timeline = engine->timeline;
+	req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
+	GEM_BUG_ON(req->timeline == engine->timeline);
 
 	spin_lock_init(&req->lock);
 	fence_init(&req->fence,
 		   &i915_fence_ops,
 		   &req->lock,
 		   req->timeline->fence_context,
-		   timeline_get_seqno(req->timeline->common));
+		   __timeline_get_seqno(req->timeline->common));
 
 	i915_sw_fence_init(&req->submit, submit_notify);
 
@@ -713,9 +713,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 
+	spin_lock_irq(&timeline->lock);
 	list_add_tail(&request->link, &timeline->requests);
+	spin_unlock_irq(&timeline->lock);
+
+	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
+				     request->fence.seqno));
 
-	timeline->last_pending_seqno = request->fence.seqno;
+	timeline->last_submitted_seqno = request->fence.seqno;
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
@@ -979,7 +984,7 @@ static void engine_retire_requests(struct intel_engine_cs *engine)
 
 	list_for_each_entry_safe(request, next,
 				 &engine->timeline->requests, link) {
-		if (!i915_gem_request_completed(request))
+		if (!__i915_gem_request_completed(request))
 			return;
 
 		i915_gem_request_retire(request);
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index a4579c109066..40d9f009673f 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -48,6 +48,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 		tl->fence_context = fences++;
 		tl->common = timeline;
 
+		spin_lock_init(&tl->lock);
 		init_request_active(&tl->last_request, NULL);
 		INIT_LIST_HEAD(&tl->requests);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 18e603980dd9..f2bf7b1d49a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -34,7 +34,8 @@ struct i915_gem_timeline;
 struct intel_timeline {
 	u64 fence_context;
 	u32 last_submitted_seqno;
-	u32 last_pending_seqno;
+
+	spinlock_t lock;
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 652e37c9e0c9..a888f68d63d9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -572,9 +572,4 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned int intel_kick_waiters(struct drm_i915_private *i915);
 unsigned int intel_kick_signalers(struct drm_i915_private *i915);
 
-static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
-{
-	return i915_gem_active_isset(&engine->timeline->last_request);
-}
-
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 41/42] drm/i915: Enable userspace to opt-out of implicit fencing
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (39 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 40/42] drm/i915: Enable multiple timelines Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07  9:46 ` [PATCH 42/42] drm/i915: Support explicit fencing for execbuf Chris Wilson
                   ` (3 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Userspace is faced with a dilemma. The kernel requires implicit fencing
to manage resource usage (we always must wait for the GPU to finish
before releasing its PTE) and for third parties. However, userspace may
wish to avoid this serialisation if it is either using explicit fencing
between parties and wants more fine-grained access to buffers (e.g. it
may partition the buffer between uses and track fences on ranges rather
than the implicit fences tracking the whole object). It follows that
userspace needs a mechanism to avoid the kernel's serialisation on its
implicit fences before execbuf execution.

The next question is whether this is an object, execbuf or context flag.
Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on
shared winsys buffers, but implicit fencing on internal surfaces)
require a per-object level flag. Given that this flag need to be only
set once for the lifetime of the object, this reduces the convenience of
having an execbuf or context level flag (and avoids having multiple
pieces of uABI controlling the same feature).

Incorrect use of this flag will result in rendering corruption and GPU
hangs - but will not result in use-after-free or similar resource
tracking issues.

Serious caveat: write ordering is not strictly correct after setting
this flag on a render target on multiple engines. This affects all
subsequent GEM operations (execbuf, set-domain, pread) and shared
dma-buf operations. A fix is possible - but costly (both in terms of
further ABI changes and runtime overhead).

Testcase: igt/gem_exec_async
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  3 +++
 include/uapi/drm/i915_drm.h                | 27 ++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index cd00b021bdfb..bf61e3c4caa3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -332,6 +332,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
+	case I915_PARAM_HAS_EXEC_ASYNC:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b4865bcc8a3e..1fde95dc2e4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1112,6 +1112,9 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
+			continue;
+
 		ret = i915_gem_request_await_object
 			(req, obj, obj->base.pending_write_domain);
 		if (ret)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 03725fe89859..a2fa511b46b3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -388,6 +388,10 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_POOLED_EU	 38
 #define I915_PARAM_MIN_EU_IN_POOL	 39
 #define I915_PARAM_MMAP_GTT_VERSION	 40
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
+ * synchronisation with implicit fencing on individual objects.
+ */
+#define I915_PARAM_HAS_EXEC_ASYNC	 41
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -729,8 +733,29 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
 #define EXEC_OBJECT_PINNED		 (1<<4)
 #define EXEC_OBJECT_PAD_TO_SIZE		 (1<<5)
+/* The kernel implicitly tracks GPU activity on all GEM objects, and
+ * synchronises operations with outstanding rendering. This includes
+ * rendering on other devices if exported via dma-buf. However, sometimes
+ * this tracking is too coarse and the user knows better. For example,
+ * if the object is split into non-overlapping ranges shared between different
+ * clients or engines (i.e. suballocating objects), the implicit tracking
+ * by kernel assumes that each operation affects the whole object rather
+ * than an individual range, causing needless synchronisation between clients.
+ * The kernel will also forgo any CPU cache flushes prior to rendering from
+ * the object as the client is expected to be also handling such domain
+ * tracking.
+ *
+ * The kernel maintains the implicit tracking in order to manage resources
+ * used by the GPU - this flag only disables the synchronisation prior to
+ * rendering with this object in this execbuf.
+ *
+ * Opting out of implicit synhronisation requires the user to do its own
+ * explicit tracking to avoid rendering corruption. See, for example,
+ * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
+ */
+#define EXEC_OBJECT_ASYNC		 (1<<6)
 /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
 	__u64 flags;
 
 	union {
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* [PATCH 42/42] drm/i915: Support explicit fencing for execbuf
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (40 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 41/42] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
@ 2016-10-07  9:46 ` Chris Wilson
  2016-10-07 10:19 ` ✗ Fi.CI.BAT: warning for series starting with [01/42] drm/i915: Allow disabling error capture Patchwork
                   ` (2 subsequent siblings)
  44 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-07  9:46 UTC (permalink / raw)
  To: intel-gfx

Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  1 +
 drivers/gpu/drm/i915/i915_drv.c            |  3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 53 +++++++++++++++++++++++++++---
 include/uapi/drm/i915_drm.h                | 36 +++++++++++++++++++-
 4 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 92ecced1bc8f..ca23fd17c3ce 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -19,6 +19,7 @@ config DRM_I915
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
+	select SYNC_FILE
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bf61e3c4caa3..b6d6dbc50c4f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -333,6 +333,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
+	case I915_PARAM_HAS_EXEC_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -2562,7 +2563,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1fde95dc2e4b..cdcdab2b0370 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
 #include <drm/drmP.h>
@@ -1590,6 +1591,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_execbuffer_params *params = &params_master;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 dispatch_flags;
+	struct fence *in_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
 	int ret;
 	bool need_relocs;
 
@@ -1633,6 +1637,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		dispatch_flags |= I915_DISPATCH_RS;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_IN) {
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto pre_mutex_err;
+		}
+	}
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			out_fence_fd = -1;
+			goto pre_mutex_err;
+		}
+	}
+
 	/* Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
 	 * process. Upon first dispatch, we acquire another prolonged
@@ -1777,6 +1798,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	if (in_fence) {
+		ret = i915_gem_request_await_fence(params->request, in_fence);
+		if (ret < 0)
+			goto err_request;
+	}
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(fence_get(&params->request->fence));
+		if (!out_fence) {
+			ret = -ENOMEM;
+			goto err_request;
+		}
+	}
+
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
 	 * request is retired will the the batch_obj be moved onto the
@@ -1804,6 +1839,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
 	__i915_add_request(params->request, ret == 0);
+	if (out_fence) {
+		if (ret == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
 
 err_batch_unpin:
 	/*
@@ -1825,6 +1870,9 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+	fence_put(in_fence);
 	return ret;
 }
 
@@ -1932,11 +1980,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->rsvd2 != 0) {
-		DRM_DEBUG("dirty rvsd2 field\n");
-		return -EINVAL;
-	}
-
 	exec2_list = drm_malloc_gfp(args->buffer_count,
 				    sizeof(*exec2_list),
 				    GFP_TEMPORARY);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2fa511b46b3..a1d04d8bc80a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR	DRM_I915_GEM_EXECBUFFER2
 #define DRM_I915_GET_SPRITE_COLORKEY	0x2a
 #define DRM_I915_SET_SPRITE_COLORKEY	0x2b
 #define DRM_I915_GEM_WAIT	0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -393,6 +395,13 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_ASYNC	 41
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE	 42
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -845,7 +854,32 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN		(1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ *
+ * Using I915_EXEC_FENCE_OUT requires use of
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
+ * back to userspace. Failure to do so will cause the out-fence to always
+ * be reported as zero, and the real fence fd to be leaked.
+ */
+#define I915_EXEC_FENCE_OUT		(1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-07  9:45 ` [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
@ 2016-10-07  9:56   ` Joonas Lahtinen
  2016-10-07 15:51   ` Tvrtko Ursulin
  1 sibling, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07  9:56 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:45 +0100, Chris Wilson wrote:
> +	if (!fence_is_array(fence)) {
> +		ret = i915_sw_fence_await_dma_fence(&req->submit,
> +						    fence, 10*HZ,
> +						    GFP_KERNEL);
> +		return ret < 0 ? ret : 0;
> +	}
> +

With the magic removed in two spots,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 08/42] drm/i915: Rearrange i915_wait_request() accounting with callers
  2016-10-07  9:46 ` [PATCH 08/42] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
@ 2016-10-07  9:58   ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07  9:58 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> Our low-level wait routine has evolved from our generic wait interface
> that handled unlocked, RPS boosting, waits with time tracking. If we
> push our GEM fence tracking to use reservation_objects (required for
> handling multiple timelines), we lose the ability to pass the required
> information down to i915_wait_request(). However, if we push the extra
> functionality from i915_wait_request() to the individual callsites
> (i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use
> of those extras, we can both simplify our low level wait and prepare for
> extending the GEM interface for use of reservation_objects.

No changelog, no re-review.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-07  9:46 ` [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
@ 2016-10-07 10:01   ` Joonas Lahtinen
  2016-10-07 16:52   ` Tvrtko Ursulin
  1 sibling, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 10:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> Quite a few of our objects used for internal hardware programming do not
> benefit from being swappable or from being zero initialised. As such
> they do not benefit from using a shmemfs backing storage and since they
> are internal and never directly exposed to the user, we do not need to
> worry about providing a filp. For these we can use an
> drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> are not swap backed and not automatically pinned. If they are reaped
> by the shrinker, the pages are released and the contents discarded. For
> the internal use case, this is fine as for example, ringbuffers are
> pinned from being written by a request to be read by the hardware. Once
> they are idle, they can be discarded entirely. As such they are a good
> match for execlist ringbuffers and a small variety of other internal
> objects.
> 
> In the first iteration, this is limited to the scratch batch buffers we
> use (for command parsing and state initialisation).
> 

No changelog, assuming otherwise intact but the typo fixed;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 26/42] drm/i915: Move GEM activity tracking into a common struct reservation_object
  2016-10-07  9:46 ` [PATCH 26/42] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
@ 2016-10-07 10:10   ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 10:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> In preparation to support many distinct timelines, we need to expand the
> activity tracking on the GEM object to handle more than just a request
> per engine. We already use the struct reservation_object on the dma-buf
> to handle many fence contexts, so integrating that into the GEM object
> itself is the preferred solution. (For example, we can now share the same
> reservation_object between every consumer/producer using this buffer and
> skip the manual import/export via dma-buf.)
> 
> v2: Reimplement busy-ioctl (by walking the reservation object), postpone
> the ABI change for another day. Similarly use the reservation object to
> find the last_write request (if active and from i915) for choosing
> display CS flips.
> 
> Caveats:
> 
>  * busy-ioctl: busy-ioctl only reports on the native fences, it will not
> warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is
> being rendered to by external fences. It also will not report the same
> busy state as wait-ioctl (or polling on the dma-buf) in the same
> circumstances. On the plus side, it does retain reporting of which
> *i915* engines are engaged with this object.
> 
>  * non-blocking atomic modesets take a step backwards as the wait for
> render completion blocks the ioctl. This is fixed in a subsequent
> patch to use a fence instead for awaiting on the rendering, see
> "drm/i915: Restore nonblocking awaits for modesetting"
> 
>  * dynamic array manipulation for shared-fences in reservation is slower
> than the previous lockless static assignment (e.g. gem_exec_lut_handle
> runtime on ivb goes from 42s to 66s), mainly due to atomic operations.
> 
>  * loss of object-level retirement callbacks, emulated by VMA retirement
> tracking.
> 
>  * minor loss of object-level last activity information from debugfs,
> could be replaced with per-vma information if desired
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Code duplication is gone, busy-ioctl back, so keeping my;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 02/42] drm/i915: Stop the machine whilst capturing the GPU crash dump
  2016-10-07  9:45 ` [PATCH 02/42] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
@ 2016-10-07 10:11   ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 10:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter

On pe, 2016-10-07 at 10:45 +0100, Chris Wilson wrote:
> The error state is purposefully racy as we expect it to be called at any
> time and so have avoided any locking whilst capturing the crash dump.
> However, with multi-engine GPUs and multiple CPUs, those races can
> manifest into OOPSes as we attempt to chase dangling pointers freed on
> other CPUs. Under discussion are lots of ways to slow down normal
> operation in order to protect the post-mortem error capture, but what it
> we take the opposite approach and freeze the machine whilst the error
> capture runs (note the GPU may still running, but as long as we don't
> process any of the results the driver's bookkeeping will be static).
> 
> Note that by of itself, this is not a complete fix. It also depends on
> the compiler barriers in list_add/list_del to prevent traversing the
> lists into the void. We also depend that we only require state from
> carefully controlled sources - i.e. all the state we require for
> post-mortem debugging should be reachable from the request itself so
> that we only have to worry about retrieving the request carefully. Once
> we have the request, we know that all pointers from it are intact.
> 
> v2: Avoid drm_clflush_pages() inside stop_machine() as it may use
> stop_machine() itself for its wbinvd fallback.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

CC'in Daniel to add A-b.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
  2016-10-07  9:46 ` [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
@ 2016-10-07 10:12   ` Joonas Lahtinen
  2016-10-07 11:05     ` Chris Wilson
  2016-10-07 13:36   ` John Harrison
  2016-10-11  9:32   ` Tvrtko Ursulin
  2 siblings, 1 reply; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 10:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

Patches 14-25 to a separate series. Will continue their review there.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [01/42] drm/i915: Allow disabling error capture
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (41 preceding siblings ...)
  2016-10-07  9:46 ` [PATCH 42/42] drm/i915: Support explicit fencing for execbuf Chris Wilson
@ 2016-10-07 10:19 ` Patchwork
  2016-10-10  7:23 ` Patchwork
  2016-10-10 15:31 ` ✗ Fi.CI.BAT: failure for series starting with [01/42] drm/i915: Allow disabling error capture (rev2) Patchwork
  44 siblings, 0 replies; 107+ messages in thread
From: Patchwork @ 2016-10-07 10:19 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/42] drm/i915: Allow disabling error capture
URL   : https://patchwork.freedesktop.org/series/13436/
State : warning

== Summary ==

Series 13436v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/13436/revisions/1/mbox/

Test drv_module_reload_basic:
                dmesg-warn -> PASS       (fi-ilk-650)
                pass       -> DMESG-WARN (fi-skl-6770hq)
Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-skl-6700k)
Test vgem_basic:
        Subgroup unload:
                skip       -> PASS       (fi-hsw-4770)
                pass       -> SKIP       (fi-skl-6770hq)
                skip       -> PASS       (fi-bsw-n3050)

fi-bdw-5557u     total:248  pass:231  dwarn:0   dfail:0   fail:0   skip:17 
fi-bsw-n3050     total:248  pass:205  dwarn:0   dfail:0   fail:0   skip:43 
fi-bxt-t5700     total:248  pass:217  dwarn:0   dfail:0   fail:0   skip:31 
fi-byt-j1900     total:248  pass:214  dwarn:1   dfail:0   fail:1   skip:32 
fi-hsw-4770      total:248  pass:225  dwarn:0   dfail:0   fail:0   skip:23 
fi-hsw-4770r     total:248  pass:224  dwarn:0   dfail:0   fail:0   skip:24 
fi-ilk-650       total:248  pass:185  dwarn:0   dfail:0   fail:2   skip:61 
fi-ivb-3520m     total:248  pass:221  dwarn:0   dfail:0   fail:0   skip:27 
fi-ivb-3770      total:248  pass:207  dwarn:0   dfail:0   fail:0   skip:41 
fi-skl-6260u     total:248  pass:232  dwarn:0   dfail:0   fail:0   skip:16 
fi-skl-6700hq    total:248  pass:224  dwarn:1   dfail:0   fail:0   skip:23 
fi-skl-6700k     total:248  pass:221  dwarn:2   dfail:0   fail:0   skip:25 
fi-skl-6770hq    total:248  pass:229  dwarn:2   dfail:0   fail:1   skip:16 
fi-snb-2520m     total:248  pass:211  dwarn:0   dfail:0   fail:0   skip:37 
fi-snb-2600      total:248  pass:209  dwarn:0   dfail:0   fail:0   skip:39 

Results at /archive/results/CI_IGT_test/Patchwork_2647/

81b22c4383bfe6290f7b9d821bf56768567c1718 drm-intel-nightly: 2016y-10m-07d-07h-29m-30s UTC integration manifest
33cf896 drm/i915: Support explicit fencing for execbuf
01f0787 drm/i915: Enable userspace to opt-out of implicit fencing
0815015 drm/i915: Enable multiple timelines
6409884 drm/i915: Defer setting of global seqno on request to submission
3f9e348 drm/i915: Reserve space in the global seqno during request allocation
d9ec193 drm/i915: Create a unique name for the context
b3bebb7 drm/i915: Move the global sync optimisation to the timeline
48703d4 drm/i915: Defer breadcrumb emission
d673086 drm/i915: Record space required for breadcrumb emission
2493555 drm/i915: Rename ->emit_request to ->emit_breadcrumb
46618d0 drm/i915: Introduce a global_seqno for each request
454114b drm/i915: Wait first for submission, before waiting for request completion
271653c drm/i915: Queue the idling context switch after all other timelines
a8990af drm/i915: Combine seqno + tracking into a global timeline struct
0c4f326 drm/i915: Restore nonblocking awaits for modesetting
609dbff drm: Add reference counting to drm_atomic_state
a5cd0fa drm/i915: Move GEM activity tracking into a common struct reservation_object
6dfa747 drm/i915: Use lockless object free
51be5fa drm/i915: Treat a framebuffer reference as an active reference whilst shrinking
89cefa9 drm/i915: Move object release to a freelist + worker
af3efa6 drm/i915: Acquire the backing storage outside of struct_mutex in set-domain
bdaf19a drm/i915: Implement pwrite without struct-mutex
8856275 drm/i915: Implement pread without struct-mutex
3bc7efe drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex
8824701 drm/i915: Move object backing storage manipulation to its own locking
b4f1045 drm/i915: Pass around sg_table to get_pages/put_pages backend
e9184f6 drm/i915: Refactor object page API
897bc5e drm/i915: Use radixtree to jump start intel_partial_pages()
2885c3f drm/i915: Use a radixtree for random access to the object's backing storage
ba563be drm/i915: Markup GEM API with lockdep asserts
2c12286 drm/i915: Reuse the active golden render state batch
7a26c4d0 drm/i915: Introduce an internal allocator for disposable private objects
5f0cb1d drm/i915: Defer active reference until required
5dafc01 drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked()
21447b1 drm/i915: Rearrange i915_wait_request() accounting with callers
983cd49 drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate
42b0316 drm/i915: Support asynchronous waits on struct fence from i915_gem_request
16d0a2a drm/i915: Compress GPU objects in error state
5b2d629 drm/i915: Consolidate error object printing
d5c6209f drm/i915: Always use the GTT for error capture
6220554 drm/i915: Stop the machine whilst capturing the GPU crash dump
a8d19e4 drm/i915: Allow disabling error capture

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission
  2016-10-07  9:46 ` [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
@ 2016-10-07 10:25   ` Joonas Lahtinen
  2016-10-07 10:27   ` Joonas Lahtinen
  1 sibling, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 10:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> Defer the assignment of the global seqno on a request to its submission.
> In the next patch, we will only allocate the global seqno at that time,
> here we are just enabling the wait-for-submission before wait-for-seqno
> paths.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission
  2016-10-07  9:46 ` [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
  2016-10-07 10:25   ` Joonas Lahtinen
@ 2016-10-07 10:27   ` Joonas Lahtinen
  2016-10-07 11:03     ` Chris Wilson
  1 sibling, 1 reply; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 10:27 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> @@ -324,14 +324,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>  	struct drm_i915_gem_request *request =
>  		container_of(fence, typeof(*request), submit);
>  	struct intel_engine_cs *engine = request->engine;
> +	struct intel_timeline *timeline;

Bump the assignment to here for consistency.

Then add my R-b I already promised.

Regards, Joonas

PS. Mismanagin' my e-mail...
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 40/42] drm/i915: Enable multiple timelines
  2016-10-07  9:46 ` [PATCH 40/42] drm/i915: Enable multiple timelines Chris Wilson
@ 2016-10-07 10:29   ` Joonas Lahtinen
  2016-10-07 11:00     ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 10:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> With the infrastructure converted over to tracking multiple timelines in
> the GEM API whilst preserving the efficiency of using a single execution
> timeline internally, we can now assign a separate timeline to every
> context with full-ppgtt.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

No changelog or comments, no re-review.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 40/42] drm/i915: Enable multiple timelines
  2016-10-07 10:29   ` Joonas Lahtinen
@ 2016-10-07 11:00     ` Chris Wilson
  2016-10-07 11:07       ` Joonas Lahtinen
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 11:00 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 01:29:09PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> > With the infrastructure converted over to tracking multiple timelines in
> > the GEM API whilst preserving the efficiency of using a single execution
> > timeline internally, we can now assign a separate timeline to every
> > context with full-ppgtt.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> No changelog or comments, no re-review.

Because it hasn't changed, it has been rebased, but the crux of the
enabling is the same.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission
  2016-10-07 10:27   ` Joonas Lahtinen
@ 2016-10-07 11:03     ` Chris Wilson
  2016-10-07 11:10       ` Joonas Lahtinen
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 11:03 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 01:27:12PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> > @@ -324,14 +324,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> >  	struct drm_i915_gem_request *request =
> >  		container_of(fence, typeof(*request), submit);
> >  	struct intel_engine_cs *engine = request->engine;
> > +	struct intel_timeline *timeline;
> 
> Bump the assignment to here for consistency.

The timeline is a bit more interesting due to how we are operating on
both the request->timeline and engine->timeline, so I wanted it separate
and keep it with a reminder of the split.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
  2016-10-07 10:12   ` Joonas Lahtinen
@ 2016-10-07 11:05     ` Chris Wilson
  2016-10-07 11:33       ` Joonas Lahtinen
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 11:05 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 01:12:58PM +0300, Joonas Lahtinen wrote:
> Patches 14-25 to a separate series. Will continue their review there.

Nope. For reasons as I explained when I pulled them in, they are
required to prevent a critical use-after-free when moving the active
reference handling onto the vma retirement.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 40/42] drm/i915: Enable multiple timelines
  2016-10-07 11:00     ` Chris Wilson
@ 2016-10-07 11:07       ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 11:07 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On pe, 2016-10-07 at 12:00 +0100, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 01:29:09PM +0300, Joonas Lahtinen wrote:
> >
> > No changelog or comments, no re-review.
> 
> Because it hasn't changed, it has been rebased, but the crux of the
> enabling is the same.

At least I have not received reply to questions in Message-id
<1474300333.3344.63.camel@linux.intel.com>

Could be our e-mail acting up too. If you have replied, could you re-
send.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission
  2016-10-07 11:03     ` Chris Wilson
@ 2016-10-07 11:10       ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 11:10 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On pe, 2016-10-07 at 12:03 +0100, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 01:27:12PM +0300, Joonas Lahtinen wrote:
> > 
> > On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> > > 
> > > @@ -324,14 +324,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> > > > > >  	struct drm_i915_gem_request *request =
> > > > > >  		container_of(fence, typeof(*request), submit);
> > > > > >  	struct intel_engine_cs *engine = request->engine;
> > > > > > +	struct intel_timeline *timeline;
> > 
> > Bump the assignment to here for consistency.
> 
> The timeline is a bit more interesting due to how we are operating on
> both the request->timeline and engine->timeline, so I wanted it separate
> and keep it with a reminder of the split.

Maybe add a comment or two then? I see no value in providing the
"information" this way.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
  2016-10-07 11:05     ` Chris Wilson
@ 2016-10-07 11:33       ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-07 11:33 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, Nikkanen, Kimmo

On pe, 2016-10-07 at 12:05 +0100, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 01:12:58PM +0300, Joonas Lahtinen wrote:
> > 
> > Patches 14-25 to a separate series. Will continue their review there.
> 
> Nope. For reasons as I explained when I pulled them in, they are
> required to prevent a critical use-after-free when moving the active
> reference handling onto the vma retirement.

Ack.

As discussed in IRC, asked John to give them a look too.

Will proceed with the remaining ones on Monday.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
  2016-10-07  9:46 ` [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
  2016-10-07 10:12   ` Joonas Lahtinen
@ 2016-10-07 13:36   ` John Harrison
  2016-10-11  9:32   ` Tvrtko Ursulin
  2 siblings, 0 replies; 107+ messages in thread
From: John Harrison @ 2016-10-07 13:36 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1311 bytes --]

On 07/10/2016 10:46, Chris Wilson wrote:
> A while ago we switched from a contiguous array of pages into an sglist,
> for that was both more convenient for mapping to hardware and avoided
> the requirement for a vmalloc array of pages on every object. However,
> certain GEM API calls (like pwrite, pread as well as performing
> relocations) do desire access to individual struct pages. A quick hack
> was to introduce a cache of the last access such that finding the
> following page was quick - this works so long as the caller desired
> sequential access. Walking backwards, or multiple callers, still hits a
> slow linear search for each page. One solution is to store each
> successful lookup in a radix tree.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  57 ++++--------
>   drivers/gpu/drm/i915/i915_gem.c         | 149 ++++++++++++++++++++++++++++----
>   drivers/gpu/drm/i915/i915_gem_stolen.c  |   4 +-
>   drivers/gpu/drm/i915/i915_gem_userptr.c |   4 +-
>   4 files changed, 154 insertions(+), 60 deletions(-)

Am curious to know if you have any performance data to show how much of 
a benefit this is? Are there any real world apps that would notice or 
just IGT benchmarks?

Reviewed-by: John Harrison <john.c.harrison@intel.com>


[-- Attachment #1.2: Type: text/html, Size: 1798 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 15/42] drm/i915: Use radixtree to jump start intel_partial_pages()
  2016-10-07  9:46 ` [PATCH 15/42] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
@ 2016-10-07 13:46   ` John Harrison
  0 siblings, 0 replies; 107+ messages in thread
From: John Harrison @ 2016-10-07 13:46 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 2245 bytes --]

On 07/10/2016 10:46, Chris Wilson wrote:
> We can use the radixtree index of the obj->pages to find the start
> position of the desired partial range.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 38 +++++++++++++++++++++++--------------
>   1 file changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 78b692e5b998..7e0c98576e72 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3562,35 +3562,45 @@ intel_partial_pages(const struct i915_ggtt_view *view,
>   		    struct drm_i915_gem_object *obj)
>   {
>   	struct sg_table *st;
> -	struct scatterlist *sg;
> -	struct sg_page_iter obj_sg_iter;
> +	struct scatterlist *sg, *iter;
> +	unsigned int count = view->params.partial.size;
> +	unsigned int offset;
>   	int ret = -ENOMEM;
>   
>   	st = kmalloc(sizeof(*st), GFP_KERNEL);
>   	if (!st)
>   		goto err_st_alloc;
>   
> -	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
> +	ret = sg_alloc_table(st, count, GFP_KERNEL);
>   	if (ret)
>   		goto err_sg_alloc;
>   
> +	iter = i915_gem_object_get_sg(obj,
> +				      view->params.partial.offset,
> +				      &offset);
> +
>   	sg = st->sgl;
>   	st->nents = 0;
> -	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
> -		view->params.partial.offset)
> -	{
> -		if (st->nents >= view->params.partial.size)
> -			break;
> +	do {
> +		unsigned int len =
> +			min(iter->length, (count - offset) << PAGE_SHIFT);
>   
> -		sg_set_page(sg, NULL, PAGE_SIZE, 0);
> -		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
> -		sg_dma_len(sg) = PAGE_SIZE;
> +		sg_set_page(sg, NULL, len, 0);
> +		sg_dma_address(sg) =
> +			sg_dma_address(iter) + (offset << PAGE_SHIFT);
> +		sg_dma_len(sg) = len;
>   
> -		sg = sg_next(sg);
>   		st->nents++;
> -	}
> +		count -= len >> PAGE_SHIFT;
> +		if (count == 0) {
> +			sg_mark_end(sg);
> +			return st;
> +		}
>   
> -	return st;
> +		sg = __sg_next(sg);
> +		iter = __sg_next(iter);
> +		offset = 0;
> +	} while (1);
>   
>   err_sg_alloc:
>   	kfree(st);


Reviewed-by: John Harrison <john.c.harrison@intel.com>


[-- Attachment #1.2: Type: text/html, Size: 2672 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-07  9:45 ` [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
  2016-10-07  9:56   ` Joonas Lahtinen
@ 2016-10-07 15:51   ` Tvrtko Ursulin
  2016-10-07 16:12     ` Chris Wilson
  1 sibling, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-07 15:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 10:45, Chris Wilson wrote:
> We will need to wait on DMA completion (as signaled via struct fence)
> before executing our i915_gem_request. Therefore we want to expose a
> method for adding the await on the fence itself to the request.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
>   2 files changed, 42 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8832f8ec1583..e1f7a32d4080 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <linux/prefetch.h>
> +#include <linux/fence-array.h>
>   
>   #include "i915_drv.h"
>   
> @@ -495,6 +496,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>   	return 0;
>   }
>   
> +int
> +i915_gem_request_await_fence(struct drm_i915_gem_request *req,
> +			     struct fence *fence)
> +{
> +	struct fence_array *array;
> +	int ret;
> +	int i;
> +
> +	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> +		return 0;
> +
> +	if (fence_is_i915(fence))
> +		return i915_gem_request_await_request(req, to_request(fence));
> +
> +	if (!fence_is_array(fence)) {
> +		ret = i915_sw_fence_await_dma_fence(&req->submit,
> +						    fence, 10*HZ,
> +						    GFP_KERNEL);
> +		return ret < 0 ? ret : 0;
> +	}
> +
> +	array = to_fence_array(fence);
> +	for (i = 0; i < array->num_fences; i++) {
> +		struct fence *child = array->fences[i];

1. What is the advantage in manually waiting on array elements rather 
than just the array? Is the existance of signal_on_any a problem?

2. Can child be another array?

Regards,

Tvrtko

> +
> +		if (fence_is_i915(child))
> +			ret = i915_gem_request_await_request(req,
> +							     to_request(child));
> +		else
> +			ret = i915_sw_fence_await_dma_fence(&req->submit,
> +							    child, 10*HZ,
> +							    GFP_KERNEL);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_gem_request_await_object - set this request to (async) wait upon a bo
>    *
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 974bd7bcc801..c85a3d82febf 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -214,6 +214,8 @@ int
>   i915_gem_request_await_object(struct drm_i915_gem_request *to,
>   			      struct drm_i915_gem_object *obj,
>   			      bool write);
> +int i915_gem_request_await_fence(struct drm_i915_gem_request *req,
> +				 struct fence *fence);
>   
>   void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
>   #define i915_add_request(req) \

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate
  2016-10-07  9:46 ` [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
@ 2016-10-07 16:10   ` Tvrtko Ursulin
  2016-10-07 16:22     ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-07 16:10 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 10:46, Chris Wilson wrote:
> In forthcoming patches, we want to be able to dynamically allocate the
> wait_queue_t used whilst awaiting. This is more convenient if we extend
> the i915_sw_fence_await_sw_fence() to perform the allocation for us if
> we pass in a gfp mask as an alternative than a preallocated struct.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_sw_fence.c | 40 ++++++++++++++++++++++++++++++++----
>   drivers/gpu/drm/i915/i915_sw_fence.h |  8 ++++++++
>   2 files changed, 44 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 1e5cbc585ca2..dfb59dced0ce 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -13,6 +13,8 @@
>   
>   #include "i915_sw_fence.h"
>   
> +#define I915_SW_FENCE_FLAG_ALLOC BIT(0)
> +

You collide with WQ_FLAG_EXCLUSIVE here, so I assume i915 will have 
complete control of the wq in question?

>   static DEFINE_SPINLOCK(i915_sw_fence_lock);
>   
>   static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
> @@ -135,6 +137,8 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *
>   	list_del(&wq->task_list);
>   	__i915_sw_fence_complete(wq->private, key);
>   	i915_sw_fence_put(wq->private);
> +	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
> +		kfree(wq);
>   	return 0;
>   }
>   
> @@ -192,9 +196,9 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
>   	return err;
>   }
>   
> -int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
> -				 struct i915_sw_fence *signaler,
> -				 wait_queue_t *wq)
> +static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
> +					  struct i915_sw_fence *signaler,
> +					  wait_queue_t *wq, gfp_t gfp)
>   {
>   	unsigned long flags;
>   	int pending;
> @@ -206,8 +210,22 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>   	if (unlikely(i915_sw_fence_check_if_after(fence, signaler)))
>   		return -EINVAL;
>   
> +	pending = 0;
> +	if (!wq) {
> +		wq = kmalloc(sizeof(*wq), gfp);
> +		if (!wq) {
> +			if (!gfpflags_allow_blocking(gfp))
> +				return -ENOMEM;
> +
> +			i915_sw_fence_wait(signaler);

This looks like a strange API, unless I am missing something - it 
normally sets things up for waiting, unless the allocation fails when it 
actually does the wait?

> +			return 0;
> +		}
> +
> +		pending |= I915_SW_FENCE_FLAG_ALLOC;

Why is this stored in pending? Just because it is around as re-used?

> +	}
> +
>   	INIT_LIST_HEAD(&wq->task_list);
> -	wq->flags = 0;
> +	wq->flags = pending;
>   	wq->func = i915_sw_fence_wake;
>   	wq->private = i915_sw_fence_get(fence);
>   
> @@ -226,6 +244,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>   	return pending;
>   }
>   
> +int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
> +				 struct i915_sw_fence *signaler,
> +				 wait_queue_t *wq)
> +{
> +	return __i915_sw_fence_await_sw_fence(fence, signaler, wq, 0);
> +}
> +
> +int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
> +				     struct i915_sw_fence *signaler,
> +				     gfp_t gfp)
> +{
> +	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
> +}
> +
>   struct dma_fence_cb {
>   	struct fence_cb base;
>   	struct i915_sw_fence *fence;
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
> index 373141602ca4..5861c69f24d4 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.h
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.h
> @@ -46,6 +46,9 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence);
>   int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
>   				 struct i915_sw_fence *after,
>   				 wait_queue_t *wq);
> +int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
> +				     struct i915_sw_fence *after,
> +				     gfp_t gfp);
>   int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
>   				  struct fence *dma,
>   				  unsigned long timeout,
> @@ -62,4 +65,9 @@ static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence)
>   	return atomic_read(&fence->pending) < 0;
>   }
>   
> +static inline void i915_sw_fence_wait(struct i915_sw_fence *fence)
> +{
> +	wait_event(fence->wait, i915_sw_fence_done(fence));
> +}
> +
>   #endif /* _I915_SW_FENCE_H_ */

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-07 15:51   ` Tvrtko Ursulin
@ 2016-10-07 16:12     ` Chris Wilson
  2016-10-07 16:16       ` Tvrtko Ursulin
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 16:12 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 04:51:14PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 10:45, Chris Wilson wrote:
> >We will need to wait on DMA completion (as signaled via struct fence)
> >before executing our i915_gem_request. Therefore we want to expose a
> >method for adding the await on the fence itself to the request.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
> >  2 files changed, 42 insertions(+)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> >index 8832f8ec1583..e1f7a32d4080 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_request.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_request.c
> >@@ -23,6 +23,7 @@
> >   */
> >  #include <linux/prefetch.h>
> >+#include <linux/fence-array.h>
> >  #include "i915_drv.h"
> >@@ -495,6 +496,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
> >  	return 0;
> >  }
> >+int
> >+i915_gem_request_await_fence(struct drm_i915_gem_request *req,
> >+			     struct fence *fence)
> >+{
> >+	struct fence_array *array;
> >+	int ret;
> >+	int i;
> >+
> >+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> >+		return 0;
> >+
> >+	if (fence_is_i915(fence))
> >+		return i915_gem_request_await_request(req, to_request(fence));
> >+
> >+	if (!fence_is_array(fence)) {
> >+		ret = i915_sw_fence_await_dma_fence(&req->submit,
> >+						    fence, 10*HZ,
> >+						    GFP_KERNEL);
> >+		return ret < 0 ? ret : 0;
> >+	}
> >+
> >+	array = to_fence_array(fence);
> >+	for (i = 0; i < array->num_fences; i++) {
> >+		struct fence *child = array->fences[i];
> 
> 1. What is the advantage in manually waiting on array elements
> rather than just the array? Is the existance of signal_on_any a
> problem?

We can wait on native fences much more efficiently. In the typical case,
we don't even need the wait at all since the ordering is imposed by the
timeline. Or we can hook into the submission fence to avoid the
interrupt. Failing that we have to respond to the interrupt. Also note
that foriegn fences handle autosignaling differently to native fences,
as we cannot rely on third parties having a working hangcheck.
 
> 2. Can child be another array?

Yes, but we don't want to recurse (or at least need to bound the
recursion).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-07 16:12     ` Chris Wilson
@ 2016-10-07 16:16       ` Tvrtko Ursulin
  2016-10-07 16:37         ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-07 16:16 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 17:12, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 04:51:14PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 10:45, Chris Wilson wrote:
>>> We will need to wait on DMA completion (as signaled via struct fence)
>>> before executing our i915_gem_request. Therefore we want to expose a
>>> method for adding the await on the fence itself to the request.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/i915/i915_gem_request.c | 40 +++++++++++++++++++++++++++++++++
>>>   drivers/gpu/drm/i915/i915_gem_request.h |  2 ++
>>>   2 files changed, 42 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
>>> index 8832f8ec1583..e1f7a32d4080 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_request.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
>>> @@ -23,6 +23,7 @@
>>>    */
>>>   #include <linux/prefetch.h>
>>> +#include <linux/fence-array.h>
>>>   #include "i915_drv.h"
>>> @@ -495,6 +496,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
>>>   	return 0;
>>>   }
>>> +int
>>> +i915_gem_request_await_fence(struct drm_i915_gem_request *req,
>>> +			     struct fence *fence)
>>> +{
>>> +	struct fence_array *array;
>>> +	int ret;
>>> +	int i;
>>> +
>>> +	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
>>> +		return 0;
>>> +
>>> +	if (fence_is_i915(fence))
>>> +		return i915_gem_request_await_request(req, to_request(fence));
>>> +
>>> +	if (!fence_is_array(fence)) {
>>> +		ret = i915_sw_fence_await_dma_fence(&req->submit,
>>> +						    fence, 10*HZ,
>>> +						    GFP_KERNEL);
>>> +		return ret < 0 ? ret : 0;
>>> +	}
>>> +
>>> +	array = to_fence_array(fence);
>>> +	for (i = 0; i < array->num_fences; i++) {
>>> +		struct fence *child = array->fences[i];
>> 1. What is the advantage in manually waiting on array elements
>> rather than just the array? Is the existance of signal_on_any a
>> problem?
> We can wait on native fences much more efficiently. In the typical case,
> we don't even need the wait at all since the ordering is imposed by the
> timeline. Or we can hook into the submission fence to avoid the
> interrupt. Failing that we have to respond to the interrupt. Also note
> that foriegn fences handle autosignaling differently to native fences,
> as we cannot rely on third parties having a working hangcheck.
>   
>> 2. Can child be another array?
> Yes, but we don't want to recurse (or at least need to bound the
> recursion).

In that case could the array have been created in the signal_on_any mode 
and how would we handle that?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate
  2016-10-07 16:10   ` Tvrtko Ursulin
@ 2016-10-07 16:22     ` Chris Wilson
  2016-10-08  8:21       ` Tvrtko Ursulin
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 16:22 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 05:10:04PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 10:46, Chris Wilson wrote:
> >In forthcoming patches, we want to be able to dynamically allocate the
> >wait_queue_t used whilst awaiting. This is more convenient if we extend
> >the i915_sw_fence_await_sw_fence() to perform the allocation for us if
> >we pass in a gfp mask as an alternative than a preallocated struct.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_sw_fence.c | 40 ++++++++++++++++++++++++++++++++----
> >  drivers/gpu/drm/i915/i915_sw_fence.h |  8 ++++++++
> >  2 files changed, 44 insertions(+), 4 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> >index 1e5cbc585ca2..dfb59dced0ce 100644
> >--- a/drivers/gpu/drm/i915/i915_sw_fence.c
> >+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> >@@ -13,6 +13,8 @@
> >  #include "i915_sw_fence.h"
> >+#define I915_SW_FENCE_FLAG_ALLOC BIT(0)
> >+
> 
> You collide with WQ_FLAG_EXCLUSIVE here, so I assume i915 will have
> complete control of the wq in question?

Hmm, we should move bit as well, but yes the flag is atm private to the
wake up function for which we use a custom function.

> >+		wq = kmalloc(sizeof(*wq), gfp);
> >+		if (!wq) {
> >+			if (!gfpflags_allow_blocking(gfp))
> >+				return -ENOMEM;
> >+
> >+			i915_sw_fence_wait(signaler);
> 
> This looks like a strange API, unless I am missing something - it
> normally sets things up for waiting, unless the allocation fails
> when it actually does the wait?

Yes. If we blocked on the malloc and failed, we block a little more for
the completion. iirc, the idea came from the async task.
> 
> >+			return 0;
> >+		}
> >+
> >+		pending |= I915_SW_FENCE_FLAG_ALLOC;
> 
> Why is this stored in pending? Just because it is around as re-used?

Urm, yes. s/pending/tmp/
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 10/42] drm/i915: Defer active reference until required
  2016-10-07  9:46 ` [PATCH 10/42] drm/i915: Defer active reference until required Chris Wilson
@ 2016-10-07 16:35   ` Tvrtko Ursulin
  2016-10-07 16:58     ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-07 16:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 10:46, Chris Wilson wrote:
> We only need the active reference to keep the object alive after the
> handle has been deleted (so as to prevent a synchronous gem_close). Why
> then pay the price of a kref on every execbuf when we can insert that
> final active ref just in time for the handle deletion?

I really dislike this.  Where there was elegance with obj/vma_put, it is 
now replaced with out of place looking 
__i915_gem_object_release_unless_active. I don't see why would higher 
level layers have to concern themselves with calling something with such 
a low-level sounding name.

How much does this influence performance and in what cases? If 
significant, could we try to come up with something similar but more 
elegant?

Regards,

Tvrtko

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h              | 28 ++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem.c              | 22 +++++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  2 +-
>   drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 --
>   drivers/gpu/drm/i915/i915_gem_gtt.c          |  7 ++++++-
>   drivers/gpu/drm/i915/i915_gem_render_state.c |  3 ++-
>   drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++++++---
>   8 files changed, 71 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ee25e265416f..fee5cc92e2f2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2232,6 +2232,12 @@ struct drm_i915_gem_object {
>   	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
>   
>   	/**
> +	 * Have we taken a reference for the object for incomplete GPU
> +	 * activity?
> +	 */
> +#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
> +
> +	/**
>   	 * This is set if the object has been written to since last bound
>   	 * to the GTT
>   	 */
> @@ -2399,6 +2405,28 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
>   	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
>   }
>   
> +static inline bool
> +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
> +{
> +	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +static inline void
> +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +static inline void
> +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
> +}
> +
> +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
> +
>   static inline unsigned int
>   i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7fa5cb764739..b560263bf446 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2618,7 +2618,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
>   		list_move_tail(&obj->global_list,
>   			       &request->i915->mm.bound_list);
>   
> -	i915_gem_object_put(obj);
> +	if (i915_gem_object_has_active_reference(obj)) {
> +		i915_gem_object_clear_active_reference(obj);
> +		i915_gem_object_put(obj);
> +	}
>   }
>   
>   static bool i915_context_is_banned(const struct i915_gem_context *ctx)
> @@ -2889,6 +2892,12 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
>   	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
>   		if (vma->vm->file == fpriv)
>   			i915_vma_close(vma);
> +
> +	if (i915_gem_object_is_active(obj) &&
> +	    !i915_gem_object_has_active_reference(obj)) {
> +		i915_gem_object_set_active_reference(obj);
> +		i915_gem_object_get(obj);
> +	}
>   	mutex_unlock(&obj->base.dev->struct_mutex);
>   }
>   
> @@ -4365,6 +4374,17 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>   	intel_runtime_pm_put(dev_priv);
>   }
>   
> +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> +	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
> +	if (i915_gem_object_is_active(obj))
> +		i915_gem_object_set_active_reference(obj);
> +	else
> +		i915_gem_object_put(obj);
> +}
> +
>   int i915_gem_suspend(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index ed989596d9a3..cb25cad3318c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
>   		list_for_each_entry_safe(obj, next,
>   					 &pool->cache_list[n],
>   					 batch_pool_link)
> -			i915_gem_object_put(obj);
> +			__i915_gem_object_release_unless_active(obj);
>   
>   		INIT_LIST_HEAD(&pool->cache_list[n]);
>   	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index df10f4e95736..1d2ab73a8f43 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
>   		if (ce->ring)
>   			intel_ring_free(ce->ring);
>   
> -		i915_vma_put(ce->state);
> +		__i915_gem_object_release_unless_active(ce->state->obj);
>   	}
>   
>   	put_pid(ctx->pid);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 72c7c1855e70..0deecd4e3b6c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1299,8 +1299,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   	 * add the active reference first and queue for it to be dropped
>   	 * *last*.
>   	 */
> -	if (!i915_gem_object_is_active(obj))
> -		i915_gem_object_get(obj);
>   	i915_gem_object_set_active(obj, idx);
>   	i915_gem_active_set(&obj->last_read[idx], req);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 2d846aa39ca5..1c95da8424cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3712,11 +3712,16 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>   void i915_vma_unpin_and_release(struct i915_vma **p_vma)
>   {
>   	struct i915_vma *vma;
> +	struct drm_i915_gem_object *obj;
>   
>   	vma = fetch_and_zero(p_vma);
>   	if (!vma)
>   		return;
>   
> +	obj = vma->obj;
> +
>   	i915_vma_unpin(vma);
> -	i915_vma_put(vma);
> +	i915_vma_close(vma);
> +
> +	__i915_gem_object_release_unless_active(obj);
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 95b7e9afd5f8..09cf4874c45f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -224,7 +224,8 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>   	i915_vma_move_to_active(so.vma, req, 0);
>   err_unpin:
>   	i915_vma_unpin(so.vma);
> +	i915_vma_close(so.vma);
>   err_obj:
> -	i915_gem_object_put(obj);
> +	__i915_gem_object_release_unless_active(obj);
>   	return ret;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b60c6f09fbfd..f3dfb7ca625d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1763,14 +1763,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
>   static void cleanup_status_page(struct intel_engine_cs *engine)
>   {
>   	struct i915_vma *vma;
> +	struct drm_i915_gem_object *obj;
>   
>   	vma = fetch_and_zero(&engine->status_page.vma);
>   	if (!vma)
>   		return;
>   
> +	obj = vma->obj;
> +
>   	i915_vma_unpin(vma);
> -	i915_gem_object_unpin_map(vma->obj);
> -	i915_vma_put(vma);
> +	i915_vma_close(vma);
> +
> +	i915_gem_object_unpin_map(obj);
> +	__i915_gem_object_release_unless_active(obj);
>   }
>   
>   static int init_status_page(struct intel_engine_cs *engine)
> @@ -1968,7 +1973,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
>   void
>   intel_ring_free(struct intel_ring *ring)
>   {
> -	i915_vma_put(ring->vma);
> +	struct drm_i915_gem_object *obj = ring->vma->obj;
> +
> +	i915_vma_close(ring->vma);
> +	__i915_gem_object_release_unless_active(obj);
> +
>   	kfree(ring);
>   }
>   

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-07 16:16       ` Tvrtko Ursulin
@ 2016-10-07 16:37         ` Chris Wilson
  2016-10-08  8:23           ` Tvrtko Ursulin
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 16:37 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 05:16:17PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 17:12, Chris Wilson wrote:
> >>2. Can child be another array?
> >Yes, but we don't want to recurse (or at least need to bound the
> >recursion).
> 
> In that case could the array have been created in the signal_on_any
> mode and how would we handle that?

Hmm. Not along our paths, I think. The only fence-array we have are from
sync-file which are signal-on-all (except... in the case of where it
wraps a single fence, that fence could be a composite). signal-on-any is
icky, to be complete we should not decompose it into its elements -
however, whether or not a fence-array is operating in signal_on_any mode
is not stored. So at the moment, the best I can do is offer a comment.

The only user of it at the moment is amdgpu waiting for the first of
many VM manager to become available, not something we'll see
immediately via sync-file.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-07  9:46 ` [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
  2016-10-07 10:01   ` Joonas Lahtinen
@ 2016-10-07 16:52   ` Tvrtko Ursulin
  2016-10-07 17:08     ` Chris Wilson
  1 sibling, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-07 16:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 10:46, Chris Wilson wrote:
> Quite a few of our objects used for internal hardware programming do not
> benefit from being swappable or from being zero initialised. As such
> they do not benefit from using a shmemfs backing storage and since they
> are internal and never directly exposed to the user, we do not need to
> worry about providing a filp. For these we can use an
> drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> are not swap backed and not automatically pinned. If they are reaped
> by the shrinker, the pages are released and the contents discarded. For
> the internal use case, this is fine as for example, ringbuffers are
> pinned from being written by a request to be read by the hardware. Once
> they are idle, they can be discarded entirely. As such they are a good
> match for execlist ringbuffers and a small variety of other internal
> objects.
>
> In the first iteration, this is limited to the scratch batch buffers we
> use (for command parsing and state initialisation).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Makefile                |   1 +
>   drivers/gpu/drm/i915/i915_drv.h              |   5 +
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
>   drivers/gpu/drm/i915/i915_gem_internal.c     | 161 +++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
>   drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
>   7 files changed, 189 insertions(+), 24 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index a998c2bce70a..b94a90f34d2d 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
>   	  i915_gem_execbuffer.o \
>   	  i915_gem_fence.o \
>   	  i915_gem_gtt.o \
> +	  i915_gem_internal.o \
>   	  i915_gem.o \
>   	  i915_gem_render_state.o \
>   	  i915_gem_request.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fee5cc92e2f2..bad97f1e5265 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>   					       u32 gtt_offset,
>   					       u32 size);
>   
> +/* i915_gem_internal.c */
> +struct drm_i915_gem_object *
> +i915_gem_object_create_internal(struct drm_device *dev,
> +				unsigned int size);
> +

Wasn't size_t our convention for GEM objects?

>   /* i915_gem_shrinker.c */
>   unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
>   			      unsigned long target,
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index cb25cad3318c..3934c9103cf2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   			size_t size)
>   {
>   	struct drm_i915_gem_object *obj = NULL;
> -	struct drm_i915_gem_object *tmp, *next;
> +	struct drm_i915_gem_object *tmp;
>   	struct list_head *list;
> -	int n;
> +	int n, ret;
>   
>   	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
>   
> @@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   		n = ARRAY_SIZE(pool->cache_list) - 1;
>   	list = &pool->cache_list[n];
>   
> -	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
> +	list_for_each_entry(tmp, list, batch_pool_link) {
>   		/* The batches are strictly LRU ordered */
>   		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
>   					     &tmp->base.dev->struct_mutex))
>   			break;
>   
> -		/* While we're looping, do some clean up */
> -		if (tmp->madv == __I915_MADV_PURGED) {
> -			list_del(&tmp->batch_pool_link);
> -			i915_gem_object_put(tmp);
> -			continue;
> -		}
> -
>   		if (tmp->base.size >= size) {
>   			obj = tmp;
>   			break;
> @@ -132,19 +125,16 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   	}
>   
>   	if (obj == NULL) {
> -		int ret;
> -
> -		obj = i915_gem_object_create(&pool->engine->i915->drm, size);
> +		obj = i915_gem_object_create_internal(&pool->engine->i915->drm,
> +						      size);
>   		if (IS_ERR(obj))
>   			return obj;
> -
> -		ret = i915_gem_object_get_pages(obj);
> -		if (ret)
> -			return ERR_PTR(ret);
> -
> -		obj->madv = I915_MADV_DONTNEED;
>   	}
>   
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
>   	list_move_tail(&obj->batch_pool_link, list);
>   	i915_gem_object_pin_pages(obj);
>   	return obj;
> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
> new file mode 100644
> index 000000000000..534a61c1aba2
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> @@ -0,0 +1,161 @@
> +/*
> + * Copyright © 2014-2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +
> +static void internal_free_pages(struct sg_table *st)
> +{
> +	struct sg_page_iter sg_iter;
> +
> +	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
> +		put_page(sg_page_iter_page(&sg_iter));
> +
> +	sg_free_table(st);
> +	kfree(st);
> +}
> +
> +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
> +{
> +	const unsigned int npages = obj->base.size / PAGE_SIZE;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	unsigned long last_pfn = 0;	/* suppress gcc warning */
> +	gfp_t gfp;
> +	int i;
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (!st)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	st->nents = 0;
> +
> +	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
> +	if (IS_CRESTLINE(obj->base.dev) || IS_BROADWATER(obj->base.dev)) {

/O\ Nooooo... dev_priv please! :)

> +		/* 965gm cannot relocate objects above 4GiB. */
> +		gfp &= ~__GFP_HIGHMEM;
> +		gfp |= __GFP_DMA32;
> +	}
> +
> +	for (i = 0; i < npages; i++) {
> +		struct page *page;
> +
> +		page = alloc_page(gfp);
> +		if (!page)
> +			goto err;
> +
> +#ifdef CONFIG_SWIOTLB
> +		if (swiotlb_nr_tbl()) {
> +			st->nents++;
> +			sg_set_page(sg, page, PAGE_SIZE, 0);
> +			sg = sg_next(sg);
> +			continue;
> +		}
> +#endif
> +		if (!i || page_to_pfn(page) != last_pfn + 1) {
> +			if (i)
> +				sg = sg_next(sg);
> +			st->nents++;
> +			sg_set_page(sg, page, PAGE_SIZE, 0);
> +		} else {
> +			sg->length += PAGE_SIZE;
> +		}
> +		last_pfn = page_to_pfn(page);
> +	}
> +#ifdef CONFIG_SWIOTLB
> +	if (!swiotlb_nr_tbl())
> +#endif
> +		sg_mark_end(sg);

Looks like the loop above could be moved into a helper and shared with 
i915_gem_object_get_pages_gtt. Maybe just a page-alloc and 
page-alloc-error callbacks would be required.

> +	obj->pages = st;
> +
> +	if (i915_gem_gtt_prepare_object(obj)) {
> +		obj->pages = NULL;
> +		goto err;
> +	}
> +
> +	/* Mark the pages as dontneed whilst they are still pinned. As soon
> +	 * as they are unpinned they are allowed to be reaped by the shrinker,
> +	 * and the caller is expected to repopulate - the contents of this
> +	 * object are only valid whilst active and pinned.
> +	 */
> +	obj->madv = I915_MADV_DONTNEED;
> +	return 0;
> +
> +err:
> +	sg_mark_end(sg);
> +	internal_free_pages(st);
> +	return -ENOMEM;
> +}
> +
> +static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
> +{
> +	internal_free_pages(obj->pages);
> +
> +	obj->dirty = 0;
> +	obj->madv = I915_MADV_WILLNEED;
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
> +	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
> +	.get_pages = i915_gem_object_get_pages_internal,
> +	.put_pages = i915_gem_object_put_pages_internal,
> +};
> +
> +/**
> + * Creates a new object that wraps some internal memory for private use.
> + * This object is not backed by swappable storage, and as such its contents
> + * are volatile and only valid whilst pinned. If the object is reaped by the
> + * shrinker, its pages and data will be discarded. Equally, it is not a full
> + * GEM object and so not valid for access from userspace. This makes it useful
> + * for hardware interfaces like ringbuffers (which are pinned from the time
> + * the request is written to the time the hardware stops accessing it), but
> + * not for contexts (which need to be preserved when not active for later
> + * reuse). Note that it is not cleared upon allocation.
> + */
> +struct drm_i915_gem_object *
> +i915_gem_object_create_internal(struct drm_device *dev,
> +				unsigned int size)

size_t again.

> +{
> +	struct drm_i915_gem_object *obj;
> +
> +	obj = i915_gem_object_alloc(dev);
> +	if (!obj)
> +		return ERR_PTR(-ENOMEM);
> +
> +	drm_gem_private_object_init(dev, &obj->base, size);
> +	i915_gem_object_init(obj, &i915_gem_object_internal_ops);
> +
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;

At least to_i915 would be helpful.

> +
> +	return obj;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 09cf4874c45f..c009ee1c27fd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -187,7 +187,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>   	if (so.rodata->batch_items * 4 > 4096)
>   		return -EINVAL;
>   
> -	obj = i915_gem_object_create(&req->i915->drm, 4096);
> +	obj = i915_gem_object_create_internal(&req->i915->drm, 4096);
>   	if (IS_ERR(obj))
>   		return PTR_ERR(obj);
>   
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 480584c09306..99737b842d73 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -251,7 +251,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
>   
>   	obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
>   	if (!obj)
> -		obj = i915_gem_object_create(&engine->i915->drm, size);
> +		obj = i915_gem_object_create_internal(&engine->i915->drm, size);
>   	if (IS_ERR(obj)) {
>   		DRM_ERROR("Failed to allocate scratch page\n");
>   		return PTR_ERR(obj);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index f3dfb7ca625d..b5d9c03f84ce 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1783,9 +1783,10 @@ static int init_status_page(struct intel_engine_cs *engine)
>   	struct drm_i915_gem_object *obj;
>   	struct i915_vma *vma;
>   	unsigned int flags;
> +	void *vaddr;
>   	int ret;
>   
> -	obj = i915_gem_object_create(&engine->i915->drm, 4096);
> +	obj = i915_gem_object_create_internal(&engine->i915->drm, 4096);
>   	if (IS_ERR(obj)) {
>   		DRM_ERROR("Failed to allocate status page\n");
>   		return PTR_ERR(obj);
> @@ -1818,15 +1819,22 @@ static int init_status_page(struct intel_engine_cs *engine)
>   	if (ret)
>   		goto err;
>   
> +	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	if (IS_ERR(vaddr)) {
> +		ret = PTR_ERR(vaddr);
> +		goto err_unpin;
> +	}
> +

Unrelated fix, best if you could afford to extract it.

>   	engine->status_page.vma = vma;
>   	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> -	engine->status_page.page_addr =
> -		i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	engine->status_page.page_addr = memset(vaddr, 0, 4096);
>   

Very unusual, at least for me. :)

>   	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
>   			 engine->name, i915_ggtt_offset(vma));
>   	return 0;
>   
> +err_unpin:
> +	i915_vma_unpin(vma);
>   err:
>   	i915_gem_object_put(obj);
>   	return ret;

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 10/42] drm/i915: Defer active reference until required
  2016-10-07 16:35   ` Tvrtko Ursulin
@ 2016-10-07 16:58     ` Chris Wilson
  2016-10-08  8:18       ` Tvrtko Ursulin
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 16:58 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 05:35:38PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 10:46, Chris Wilson wrote:
> >We only need the active reference to keep the object alive after the
> >handle has been deleted (so as to prevent a synchronous gem_close). Why
> >then pay the price of a kref on every execbuf when we can insert that
> >final active ref just in time for the handle deletion?
> 
> I really dislike this.  Where there was elegance with obj/vma_put,
> it is now replaced with out of place looking
> __i915_gem_object_release_unless_active. I don't see why would
> higher level layers have to concern themselves with calling
> something with such a low-level sounding name.
> 
> How much does this influence performance and in what cases? If
> significant, could we try to come up with something similar but more
> elegant?

Back in the day, this was one of the most frequent atomic operations we
did. And whilst perf overemphasizes the stalls from locked instructions,
the sheer numbers of them we do are significant (since we do one at the
start and end of every execbuf for every object in typical conditions).
Whilst it is less significant in the face of obj->resv undoing all of the
gains, it is still a deep paper cut. (At the GL level, consider about 100
objects per batch, several thousand times a second x 2, these ops are low
hanging fruit.)

What's needed is a function to take the place of the close_object for
internally allocated objects. It is also worth noting that they are either
already part of a cache, or are suitable for caching....
-Chris
> 
> Regards,
> 
> Tvrtko
> 
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_drv.h              | 28 ++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem.c              | 22 +++++++++++++++++++++-
> >  drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  2 +-
> >  drivers/gpu/drm/i915/i915_gem_context.c      |  2 +-
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  2 --
> >  drivers/gpu/drm/i915/i915_gem_gtt.c          |  7 ++++++-
> >  drivers/gpu/drm/i915/i915_gem_render_state.c |  3 ++-
> >  drivers/gpu/drm/i915/intel_ringbuffer.c      | 15 ++++++++++++---
> >  8 files changed, 71 insertions(+), 10 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index ee25e265416f..fee5cc92e2f2 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -2232,6 +2232,12 @@ struct drm_i915_gem_object {
> >  	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
> >  	/**
> >+	 * Have we taken a reference for the object for incomplete GPU
> >+	 * activity?
> >+	 */
> >+#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
> >+
> >+	/**
> >  	 * This is set if the object has been written to since last bound
> >  	 * to the GTT
> >  	 */
> >@@ -2399,6 +2405,28 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> >  	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
> >  }
> >+static inline bool
> >+i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
> >+{
> >+	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
> >+}
> >+
> >+static inline void
> >+i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
> >+{
> >+	lockdep_assert_held(&obj->base.dev->struct_mutex);
> >+	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
> >+}
> >+
> >+static inline void
> >+i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
> >+{
> >+	lockdep_assert_held(&obj->base.dev->struct_mutex);
> >+	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
> >+}
> >+
> >+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
> >+
> >  static inline unsigned int
> >  i915_gem_object_get_tiling(struct drm_i915_gem_object *obj)
> >  {
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >index 7fa5cb764739..b560263bf446 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -2618,7 +2618,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
> >  		list_move_tail(&obj->global_list,
> >  			       &request->i915->mm.bound_list);
> >-	i915_gem_object_put(obj);
> >+	if (i915_gem_object_has_active_reference(obj)) {
> >+		i915_gem_object_clear_active_reference(obj);
> >+		i915_gem_object_put(obj);
> >+	}
> >  }
> >  static bool i915_context_is_banned(const struct i915_gem_context *ctx)
> >@@ -2889,6 +2892,12 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
> >  	list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link)
> >  		if (vma->vm->file == fpriv)
> >  			i915_vma_close(vma);
> >+
> >+	if (i915_gem_object_is_active(obj) &&
> >+	    !i915_gem_object_has_active_reference(obj)) {
> >+		i915_gem_object_set_active_reference(obj);
> >+		i915_gem_object_get(obj);
> >+	}
> >  	mutex_unlock(&obj->base.dev->struct_mutex);
> >  }
> >@@ -4365,6 +4374,17 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> >  	intel_runtime_pm_put(dev_priv);
> >  }
> >+void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
> >+{
> >+	lockdep_assert_held(&obj->base.dev->struct_mutex);
> >+
> >+	GEM_BUG_ON(i915_gem_object_has_active_reference(obj));
> >+	if (i915_gem_object_is_active(obj))
> >+		i915_gem_object_set_active_reference(obj);
> >+	else
> >+		i915_gem_object_put(obj);
> >+}
> >+
> >  int i915_gem_suspend(struct drm_device *dev)
> >  {
> >  	struct drm_i915_private *dev_priv = to_i915(dev);
> >diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >index ed989596d9a3..cb25cad3318c 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >@@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
> >  		list_for_each_entry_safe(obj, next,
> >  					 &pool->cache_list[n],
> >  					 batch_pool_link)
> >-			i915_gem_object_put(obj);
> >+			__i915_gem_object_release_unless_active(obj);
> >  		INIT_LIST_HEAD(&pool->cache_list[n]);
> >  	}
> >diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> >index df10f4e95736..1d2ab73a8f43 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_context.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_context.c
> >@@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
> >  		if (ce->ring)
> >  			intel_ring_free(ce->ring);
> >-		i915_vma_put(ce->state);
> >+		__i915_gem_object_release_unless_active(ce->state->obj);
> >  	}
> >  	put_pid(ctx->pid);
> >diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >index 72c7c1855e70..0deecd4e3b6c 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> >@@ -1299,8 +1299,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
> >  	 * add the active reference first and queue for it to be dropped
> >  	 * *last*.
> >  	 */
> >-	if (!i915_gem_object_is_active(obj))
> >-		i915_gem_object_get(obj);
> >  	i915_gem_object_set_active(obj, idx);
> >  	i915_gem_active_set(&obj->last_read[idx], req);
> >diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >index 2d846aa39ca5..1c95da8424cb 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >@@ -3712,11 +3712,16 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
> >  void i915_vma_unpin_and_release(struct i915_vma **p_vma)
> >  {
> >  	struct i915_vma *vma;
> >+	struct drm_i915_gem_object *obj;
> >  	vma = fetch_and_zero(p_vma);
> >  	if (!vma)
> >  		return;
> >+	obj = vma->obj;
> >+
> >  	i915_vma_unpin(vma);
> >-	i915_vma_put(vma);
> >+	i915_vma_close(vma);
> >+
> >+	__i915_gem_object_release_unless_active(obj);
> >  }
> >diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> >index 95b7e9afd5f8..09cf4874c45f 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> >@@ -224,7 +224,8 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
> >  	i915_vma_move_to_active(so.vma, req, 0);
> >  err_unpin:
> >  	i915_vma_unpin(so.vma);
> >+	i915_vma_close(so.vma);
> >  err_obj:
> >-	i915_gem_object_put(obj);
> >+	__i915_gem_object_release_unless_active(obj);
> >  	return ret;
> >  }
> >diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >index b60c6f09fbfd..f3dfb7ca625d 100644
> >--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >@@ -1763,14 +1763,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine)
> >  static void cleanup_status_page(struct intel_engine_cs *engine)
> >  {
> >  	struct i915_vma *vma;
> >+	struct drm_i915_gem_object *obj;
> >  	vma = fetch_and_zero(&engine->status_page.vma);
> >  	if (!vma)
> >  		return;
> >+	obj = vma->obj;
> >+
> >  	i915_vma_unpin(vma);
> >-	i915_gem_object_unpin_map(vma->obj);
> >-	i915_vma_put(vma);
> >+	i915_vma_close(vma);
> >+
> >+	i915_gem_object_unpin_map(obj);
> >+	__i915_gem_object_release_unless_active(obj);
> >  }
> >  static int init_status_page(struct intel_engine_cs *engine)
> >@@ -1968,7 +1973,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
> >  void
> >  intel_ring_free(struct intel_ring *ring)
> >  {
> >-	i915_vma_put(ring->vma);
> >+	struct drm_i915_gem_object *obj = ring->vma->obj;
> >+
> >+	i915_vma_close(ring->vma);
> >+	__i915_gem_object_release_unless_active(obj);
> >+
> >  	kfree(ring);
> >  }
> 

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-07 16:52   ` Tvrtko Ursulin
@ 2016-10-07 17:08     ` Chris Wilson
  2016-10-08  8:12       ` Tvrtko Ursulin
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-07 17:08 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Oct 07, 2016 at 05:52:47PM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 10:46, Chris Wilson wrote:
> >Quite a few of our objects used for internal hardware programming do not
> >benefit from being swappable or from being zero initialised. As such
> >they do not benefit from using a shmemfs backing storage and since they
> >are internal and never directly exposed to the user, we do not need to
> >worry about providing a filp. For these we can use an
> >drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> >are not swap backed and not automatically pinned. If they are reaped
> >by the shrinker, the pages are released and the contents discarded. For
> >the internal use case, this is fine as for example, ringbuffers are
> >pinned from being written by a request to be read by the hardware. Once
> >they are idle, they can be discarded entirely. As such they are a good
> >match for execlist ringbuffers and a small variety of other internal
> >objects.
> >
> >In the first iteration, this is limited to the scratch batch buffers we
> >use (for command parsing and state initialisation).
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >---
> >  drivers/gpu/drm/i915/Makefile                |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h              |   5 +
> >  drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
> >  drivers/gpu/drm/i915/i915_gem_internal.c     | 161 +++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
> >  drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
> >  drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
> >  7 files changed, 189 insertions(+), 24 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
> >
> >diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> >index a998c2bce70a..b94a90f34d2d 100644
> >--- a/drivers/gpu/drm/i915/Makefile
> >+++ b/drivers/gpu/drm/i915/Makefile
> >@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
> >  	  i915_gem_execbuffer.o \
> >  	  i915_gem_fence.o \
> >  	  i915_gem_gtt.o \
> >+	  i915_gem_internal.o \
> >  	  i915_gem.o \
> >  	  i915_gem_render_state.o \
> >  	  i915_gem_request.o \
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index fee5cc92e2f2..bad97f1e5265 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
> >  					       u32 gtt_offset,
> >  					       u32 size);
> >+/* i915_gem_internal.c */
> >+struct drm_i915_gem_object *
> >+i915_gem_object_create_internal(struct drm_device *dev,
> >+				unsigned int size);
> >+
> 
> Wasn't size_t our convention for GEM objects?

Not our convention, no. Using size_t has caused too many bugs, that we
started to erradicate it (in our usual piecemeal approach).

> >+		/* 965gm cannot relocate objects above 4GiB. */
> >+		gfp &= ~__GFP_HIGHMEM;
> >+		gfp |= __GFP_DMA32;
> >+	}
> >+
> >+	for (i = 0; i < npages; i++) {
> >+		struct page *page;
> >+
> >+		page = alloc_page(gfp);
> >+		if (!page)
> >+			goto err;
> >+
> >+#ifdef CONFIG_SWIOTLB
> >+		if (swiotlb_nr_tbl()) {
> >+			st->nents++;
> >+			sg_set_page(sg, page, PAGE_SIZE, 0);
> >+			sg = sg_next(sg);
> >+			continue;
> >+		}
> >+#endif
> >+		if (!i || page_to_pfn(page) != last_pfn + 1) {
> >+			if (i)
> >+				sg = sg_next(sg);
> >+			st->nents++;
> >+			sg_set_page(sg, page, PAGE_SIZE, 0);
> >+		} else {
> >+			sg->length += PAGE_SIZE;
> >+		}
> >+		last_pfn = page_to_pfn(page);
> >+	}
> >+#ifdef CONFIG_SWIOTLB
> >+	if (!swiotlb_nr_tbl())
> >+#endif
> >+		sg_mark_end(sg);
> 
> Looks like the loop above could be moved into a helper and shared
> with i915_gem_object_get_pages_gtt. Maybe just a page-alloc and
> page-alloc-error callbacks would be required.

So just the entire thing as a callback... I would have thought you might
suggest trying high order allocations and falling back to low order.
-Chirs

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-07 17:08     ` Chris Wilson
@ 2016-10-08  8:12       ` Tvrtko Ursulin
  2016-10-08  8:32         ` Chris Wilson
  2016-10-08  8:34         ` [PATCH v2] " Chris Wilson
  0 siblings, 2 replies; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-08  8:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 18:08, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 05:52:47PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 10:46, Chris Wilson wrote:
>>> Quite a few of our objects used for internal hardware programming do not
>>> benefit from being swappable or from being zero initialised. As such
>>> they do not benefit from using a shmemfs backing storage and since they
>>> are internal and never directly exposed to the user, we do not need to
>>> worry about providing a filp. For these we can use an
>>> drm_i915_gem_object wrapper around a sg_table of plain struct page. They
>>> are not swap backed and not automatically pinned. If they are reaped
>>> by the shrinker, the pages are released and the contents discarded. For
>>> the internal use case, this is fine as for example, ringbuffers are
>>> pinned from being written by a request to be read by the hardware. Once
>>> they are idle, they can be discarded entirely. As such they are a good
>>> match for execlist ringbuffers and a small variety of other internal
>>> objects.
>>>
>>> In the first iteration, this is limited to the scratch batch buffers we
>>> use (for command parsing and state initialisation).
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/i915/Makefile                |   1 +
>>>   drivers/gpu/drm/i915/i915_drv.h              |   5 +
>>>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
>>>   drivers/gpu/drm/i915/i915_gem_internal.c     | 161 +++++++++++++++++++++++++++
>>>   drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
>>>   drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
>>>   drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
>>>   7 files changed, 189 insertions(+), 24 deletions(-)
>>>   create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
>>>
>>> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>>> index a998c2bce70a..b94a90f34d2d 100644
>>> --- a/drivers/gpu/drm/i915/Makefile
>>> +++ b/drivers/gpu/drm/i915/Makefile
>>> @@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
>>>   	  i915_gem_execbuffer.o \
>>>   	  i915_gem_fence.o \
>>>   	  i915_gem_gtt.o \
>>> +	  i915_gem_internal.o \
>>>   	  i915_gem.o \
>>>   	  i915_gem_render_state.o \
>>>   	  i915_gem_request.o \
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index fee5cc92e2f2..bad97f1e5265 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>>>   					       u32 gtt_offset,
>>>   					       u32 size);
>>> +/* i915_gem_internal.c */
>>> +struct drm_i915_gem_object *
>>> +i915_gem_object_create_internal(struct drm_device *dev,
>>> +				unsigned int size);
>>> +
>> Wasn't size_t our convention for GEM objects?
> Not our convention, no. Using size_t has caused too many bugs, that we
> started to erradicate it (in our usual piecemeal approach).

Oh wow, I missed that decision. Last thing I remember was that someone 
was trying to convert it all to size_t. Fine by me.

>>> +		/* 965gm cannot relocate objects above 4GiB. */
>>> +		gfp &= ~__GFP_HIGHMEM;
>>> +		gfp |= __GFP_DMA32;
>>> +	}
>>> +
>>> +	for (i = 0; i < npages; i++) {
>>> +		struct page *page;
>>> +
>>> +		page = alloc_page(gfp);
>>> +		if (!page)
>>> +			goto err;
>>> +
>>> +#ifdef CONFIG_SWIOTLB
>>> +		if (swiotlb_nr_tbl()) {
>>> +			st->nents++;
>>> +			sg_set_page(sg, page, PAGE_SIZE, 0);
>>> +			sg = sg_next(sg);
>>> +			continue;
>>> +		}
>>> +#endif
>>> +		if (!i || page_to_pfn(page) != last_pfn + 1) {
>>> +			if (i)
>>> +				sg = sg_next(sg);
>>> +			st->nents++;
>>> +			sg_set_page(sg, page, PAGE_SIZE, 0);
>>> +		} else {
>>> +			sg->length += PAGE_SIZE;
>>> +		}
>>> +		last_pfn = page_to_pfn(page);
>>> +	}
>>> +#ifdef CONFIG_SWIOTLB
>>> +	if (!swiotlb_nr_tbl())
>>> +#endif
>>> +		sg_mark_end(sg);
>> Looks like the loop above could be moved into a helper and shared
>> with i915_gem_object_get_pages_gtt. Maybe just a page-alloc and
>> page-alloc-error callbacks would be required.
> So just the entire thing as a callback... I would have thought you might
> suggest trying high order allocations and falling back to low order.

I am not sure higher order attempts would be worth it. What I was 
thinking was to implement the loop above generically in one place, and 
then two (or even three with userptr) callers would call that with their 
own alloc-page and alloc-page-on-error callbacks. So that there is a 
single implementation of the coalescing logic etc.

Regards,

Tvrtko



_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 10/42] drm/i915: Defer active reference until required
  2016-10-07 16:58     ` Chris Wilson
@ 2016-10-08  8:18       ` Tvrtko Ursulin
  0 siblings, 0 replies; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-08  8:18 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 17:58, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 05:35:38PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 10:46, Chris Wilson wrote:
>>> We only need the active reference to keep the object alive after the
>>> handle has been deleted (so as to prevent a synchronous gem_close). Why
>>> then pay the price of a kref on every execbuf when we can insert that
>>> final active ref just in time for the handle deletion?
>> I really dislike this.  Where there was elegance with obj/vma_put,
>> it is now replaced with out of place looking
>> __i915_gem_object_release_unless_active. I don't see why would
>> higher level layers have to concern themselves with calling
>> something with such a low-level sounding name.
>>
>> How much does this influence performance and in what cases? If
>> significant, could we try to come up with something similar but more
>> elegant?
> Back in the day, this was one of the most frequent atomic operations we
> did. And whilst perf overemphasizes the stalls from locked instructions,
> the sheer numbers of them we do are significant (since we do one at the
> start and end of every execbuf for every object in typical conditions).
> Whilst it is less significant in the face of obj->resv undoing all of the
> gains, it is still a deep paper cut. (At the GL level, consider about 100
> objects per batch, several thousand times a second x 2, these ops are low
> hanging fruit.)

I understand there is a lot of them (same is true for many other 
operations we do), but how does that translate to some benchmarks?

> What's needed is a function to take the place of the close_object for
> internally allocated objects. It is also worth noting that they are either
> already part of a cache, or are suitable for caching....

Ok but those are not x 100 per batch.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate
  2016-10-07 16:22     ` Chris Wilson
@ 2016-10-08  8:21       ` Tvrtko Ursulin
  0 siblings, 0 replies; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-08  8:21 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 17:22, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 05:10:04PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 10:46, Chris Wilson wrote:
>>> In forthcoming patches, we want to be able to dynamically allocate the
>>> wait_queue_t used whilst awaiting. This is more convenient if we extend
>>> the i915_sw_fence_await_sw_fence() to perform the allocation for us if
>>> we pass in a gfp mask as an alternative than a preallocated struct.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_sw_fence.c | 40 ++++++++++++++++++++++++++++++++----
>>>   drivers/gpu/drm/i915/i915_sw_fence.h |  8 ++++++++
>>>   2 files changed, 44 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
>>> index 1e5cbc585ca2..dfb59dced0ce 100644
>>> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
>>> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
>>> @@ -13,6 +13,8 @@
>>>   #include "i915_sw_fence.h"
>>> +#define I915_SW_FENCE_FLAG_ALLOC BIT(0)
>>> +
>> You collide with WQ_FLAG_EXCLUSIVE here, so I assume i915 will have
>> complete control of the wq in question?
> Hmm, we should move bit as well, but yes the flag is atm private to the
> wake up function for which we use a custom function.
>
>>> +		wq = kmalloc(sizeof(*wq), gfp);
>>> +		if (!wq) {
>>> +			if (!gfpflags_allow_blocking(gfp))
>>> +				return -ENOMEM;
>>> +
>>> +			i915_sw_fence_wait(signaler);
>> This looks like a strange API, unless I am missing something - it
>> normally sets things up for waiting, unless the allocation fails
>> when it actually does the wait?
> Yes. If we blocked on the malloc and failed, we block a little more for
> the completion. iirc, the idea came from the async task.

Still sounds strange to me. I understand it is a unlikely corner case 
but sounds like that sort of decision should happen in the caller. I 
will have to revisit the i915_sw_fence implementation and usage in more 
detail it seems.

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-07 16:37         ` Chris Wilson
@ 2016-10-08  8:23           ` Tvrtko Ursulin
  2016-10-08  8:58             ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-08  8:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 17:37, Chris Wilson wrote:
> On Fri, Oct 07, 2016 at 05:16:17PM +0100, Tvrtko Ursulin wrote:
>> On 07/10/2016 17:12, Chris Wilson wrote:
>>>> 2. Can child be another array?
>>> Yes, but we don't want to recurse (or at least need to bound the
>>> recursion).
>> In that case could the array have been created in the signal_on_any
>> mode and how would we handle that?
> Hmm. Not along our paths, I think. The only fence-array we have are from
> sync-file which are signal-on-all (except... in the case of where it
> wraps a single fence, that fence could be a composite). signal-on-any is
> icky, to be complete we should not decompose it into its elements -
> however, whether or not a fence-array is operating in signal_on_any mode
> is not stored. So at the moment, the best I can do is offer a comment.

Yes signal-on-any sounds extremely weird. It mandates you decompose 3rd 
party fences in all cases, with recursion.

> The only user of it at the moment is amdgpu waiting for the first of
> many VM manager to become available, not something we'll see
> immediately via sync-file.

So userspace cannot engineer it to happen with some funky operations 
before giving us a fence?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-08  8:12       ` Tvrtko Ursulin
@ 2016-10-08  8:32         ` Chris Wilson
  2016-10-08  8:34         ` [PATCH v2] " Chris Wilson
  1 sibling, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-08  8:32 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Sat, Oct 08, 2016 at 09:12:13AM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 18:08, Chris Wilson wrote:
> >On Fri, Oct 07, 2016 at 05:52:47PM +0100, Tvrtko Ursulin wrote:
> >>On 07/10/2016 10:46, Chris Wilson wrote:
> >>>Quite a few of our objects used for internal hardware programming do not
> >>>benefit from being swappable or from being zero initialised. As such
> >>>they do not benefit from using a shmemfs backing storage and since they
> >>>are internal and never directly exposed to the user, we do not need to
> >>>worry about providing a filp. For these we can use an
> >>>drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> >>>are not swap backed and not automatically pinned. If they are reaped
> >>>by the shrinker, the pages are released and the contents discarded. For
> >>>the internal use case, this is fine as for example, ringbuffers are
> >>>pinned from being written by a request to be read by the hardware. Once
> >>>they are idle, they can be discarded entirely. As such they are a good
> >>>match for execlist ringbuffers and a small variety of other internal
> >>>objects.
> >>>
> >>>In the first iteration, this is limited to the scratch batch buffers we
> >>>use (for command parsing and state initialisation).
> >>>
> >>>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>---
> >>>  drivers/gpu/drm/i915/Makefile                |   1 +
> >>>  drivers/gpu/drm/i915/i915_drv.h              |   5 +
> >>>  drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
> >>>  drivers/gpu/drm/i915/i915_gem_internal.c     | 161 +++++++++++++++++++++++++++
> >>>  drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
> >>>  drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
> >>>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
> >>>  7 files changed, 189 insertions(+), 24 deletions(-)
> >>>  create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> >>>index a998c2bce70a..b94a90f34d2d 100644
> >>>--- a/drivers/gpu/drm/i915/Makefile
> >>>+++ b/drivers/gpu/drm/i915/Makefile
> >>>@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
> >>>  	  i915_gem_execbuffer.o \
> >>>  	  i915_gem_fence.o \
> >>>  	  i915_gem_gtt.o \
> >>>+	  i915_gem_internal.o \
> >>>  	  i915_gem.o \
> >>>  	  i915_gem_render_state.o \
> >>>  	  i915_gem_request.o \
> >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>>index fee5cc92e2f2..bad97f1e5265 100644
> >>>--- a/drivers/gpu/drm/i915/i915_drv.h
> >>>+++ b/drivers/gpu/drm/i915/i915_drv.h
> >>>@@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
> >>>  					       u32 gtt_offset,
> >>>  					       u32 size);
> >>>+/* i915_gem_internal.c */
> >>>+struct drm_i915_gem_object *
> >>>+i915_gem_object_create_internal(struct drm_device *dev,
> >>>+				unsigned int size);
> >>>+
> >>Wasn't size_t our convention for GEM objects?
> >Not our convention, no. Using size_t has caused too many bugs, that we
> >started to erradicate it (in our usual piecemeal approach).
> 
> Oh wow, I missed that decision. Last thing I remember was that
> someone was trying to convert it all to size_t. Fine by me.

Who? They probably didn't have to spend the time fixing size_t variables
being wrong on 32bit.
 
> >>>+		/* 965gm cannot relocate objects above 4GiB. */
> >>>+		gfp &= ~__GFP_HIGHMEM;
> >>>+		gfp |= __GFP_DMA32;
> >>>+	}
> >>>+
> >>>+	for (i = 0; i < npages; i++) {
> >>>+		struct page *page;
> >>>+
> >>>+		page = alloc_page(gfp);
> >>>+		if (!page)
> >>>+			goto err;
> >>>+
> >>>+#ifdef CONFIG_SWIOTLB
> >>>+		if (swiotlb_nr_tbl()) {
> >>>+			st->nents++;
> >>>+			sg_set_page(sg, page, PAGE_SIZE, 0);
> >>>+			sg = sg_next(sg);
> >>>+			continue;
> >>>+		}
> >>>+#endif
> >>>+		if (!i || page_to_pfn(page) != last_pfn + 1) {
> >>>+			if (i)
> >>>+				sg = sg_next(sg);
> >>>+			st->nents++;
> >>>+			sg_set_page(sg, page, PAGE_SIZE, 0);
> >>>+		} else {
> >>>+			sg->length += PAGE_SIZE;
> >>>+		}
> >>>+		last_pfn = page_to_pfn(page);
> >>>+	}
> >>>+#ifdef CONFIG_SWIOTLB
> >>>+	if (!swiotlb_nr_tbl())
> >>>+#endif
> >>>+		sg_mark_end(sg);
> >>Looks like the loop above could be moved into a helper and shared
> >>with i915_gem_object_get_pages_gtt. Maybe just a page-alloc and
> >>page-alloc-error callbacks would be required.
> >So just the entire thing as a callback... I would have thought you might
> >suggest trying high order allocations and falling back to low order.
> 
> I am not sure higher order attempts would be worth it.

Looks like it is. :)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* [PATCH v2] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-08  8:12       ` Tvrtko Ursulin
  2016-10-08  8:32         ` Chris Wilson
@ 2016-10-08  8:34         ` Chris Wilson
  2016-10-10  7:01           ` Joonas Lahtinen
  2016-10-10  8:11           ` Tvrtko Ursulin
  1 sibling, 2 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-08  8:34 UTC (permalink / raw)
  To: intel-gfx

Quite a few of our objects used for internal hardware programming do not
benefit from being swappable or from being zero initialised. As such
they do not benefit from using a shmemfs backing storage and since they
are internal and never directly exposed to the user, we do not need to
worry about providing a filp. For these we can use an
drm_i915_gem_object wrapper around a sg_table of plain struct page. They
are not swap backed and not automatically pinned. If they are reaped
by the shrinker, the pages are released and the contents discarded. For
the internal use case, this is fine as for example, ringbuffers are
pinned from being written by a request to be read by the hardware. Once
they are idle, they can be discarded entirely. As such they are a good
match for execlist ringbuffers and a small variety of other internal
objects.

In the first iteration, this is limited to the scratch batch buffers we
use (for command parsing and state initialisation).

v2: Allocate physically contiguous pages, where possible.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile                |   1 +
 drivers/gpu/drm/i915/i915_drv.h              |   5 +
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
 drivers/gpu/drm/i915/i915_gem_internal.c     | 162 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
 7 files changed, 190 insertions(+), 24 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index a998c2bce70a..b94a90f34d2d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_execbuffer.o \
 	  i915_gem_fence.o \
 	  i915_gem_gtt.o \
+	  i915_gem_internal.o \
 	  i915_gem.o \
 	  i915_gem_render_state.o \
 	  i915_gem_request.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fee5cc92e2f2..bad97f1e5265 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
 					       u32 gtt_offset,
 					       u32 size);
 
+/* i915_gem_internal.c */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_device *dev,
+				unsigned int size);
+
 /* i915_gem_shrinker.c */
 unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
 			      unsigned long target,
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index cb25cad3318c..3934c9103cf2 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 			size_t size)
 {
 	struct drm_i915_gem_object *obj = NULL;
-	struct drm_i915_gem_object *tmp, *next;
+	struct drm_i915_gem_object *tmp;
 	struct list_head *list;
-	int n;
+	int n, ret;
 
 	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
 
@@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 		n = ARRAY_SIZE(pool->cache_list) - 1;
 	list = &pool->cache_list[n];
 
-	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
+	list_for_each_entry(tmp, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
 		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
 					     &tmp->base.dev->struct_mutex))
 			break;
 
-		/* While we're looping, do some clean up */
-		if (tmp->madv == __I915_MADV_PURGED) {
-			list_del(&tmp->batch_pool_link);
-			i915_gem_object_put(tmp);
-			continue;
-		}
-
 		if (tmp->base.size >= size) {
 			obj = tmp;
 			break;
@@ -132,19 +125,16 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	}
 
 	if (obj == NULL) {
-		int ret;
-
-		obj = i915_gem_object_create(&pool->engine->i915->drm, size);
+		obj = i915_gem_object_create_internal(&pool->engine->i915->drm,
+						      size);
 		if (IS_ERR(obj))
 			return obj;
-
-		ret = i915_gem_object_get_pages(obj);
-		if (ret)
-			return ERR_PTR(ret);
-
-		obj->madv = I915_MADV_DONTNEED;
 	}
 
+	ret = i915_gem_object_get_pages(obj);
+	if (ret)
+		return ERR_PTR(ret);
+
 	list_move_tail(&obj->batch_pool_link, list);
 	i915_gem_object_pin_pages(obj);
 	return obj;
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
new file mode 100644
index 000000000000..255b9232b8ef
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright © 2014-2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
+
+static void internal_free_pages(struct sg_table *st)
+{
+	struct scatterlist *sg;
+
+	for (sg = st->sgl; sg; sg = __sg_next(sg))
+		__free_pages(sg_page(sg), get_order(sg->length));
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+{
+	unsigned int npages = obj->base.size / PAGE_SIZE;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int max_order;
+	gfp_t gfp;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	st->nents = 0;
+
+	max_order = MAX_ORDER;
+#ifdef CONFIG_SWIOTLB
+	if (swiotlb_nr_tbl())
+		max_order = min(max_order, ilog2(IO_TLB_SEGSIZE));
+#endif
+
+	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
+	if (IS_CRESTLINE(obj->base.dev) || IS_BROADWATER(obj->base.dev)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		gfp &= ~__GFP_HIGHMEM;
+		gfp |= __GFP_DMA32;
+	}
+
+	do {
+		int order = min(fls(npages) - 1, max_order);
+		struct page *page;
+
+		do {
+			page = alloc_pages(gfp | (order ? QUIET : 0), order);
+			if (page)
+				break;
+			if (!order--)
+				goto err;
+		} while (1);
+
+		sg_set_page(sg, page, PAGE_SIZE << order, 0);
+		st->nents++;
+
+		npages -= 1 << order;
+		if (!npages) {
+			sg_mark_end(sg);
+			break;
+		}
+
+		sg = __sg_next(sg);
+	} while (1);
+	obj->pages = st;
+
+	if (i915_gem_gtt_prepare_object(obj)) {
+		obj->pages = NULL;
+		goto err;
+	}
+
+	/* Mark the pages as dontneed whilst they are still pinned. As soon
+	 * as they are unpinned they are allowed to be reaped by the shrinker,
+	 * and the caller is expected to repopulate - the contents of this
+	 * object are only valid whilst active and pinned.
+	 */
+	obj->madv = I915_MADV_DONTNEED;
+	return 0;
+
+err:
+	sg_mark_end(sg);
+	internal_free_pages(st);
+	return -ENOMEM;
+}
+
+static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
+{
+	internal_free_pages(obj->pages);
+
+	obj->dirty = 0;
+	obj->madv = I915_MADV_WILLNEED;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
+	.get_pages = i915_gem_object_get_pages_internal,
+	.put_pages = i915_gem_object_put_pages_internal,
+};
+
+/**
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse). Note that it is not cleared upon allocation.
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_device *dev,
+				unsigned int size)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_alloc(dev);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(dev, &obj->base, size);
+	i915_gem_object_init(obj, &i915_gem_object_internal_ops);
+
+	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 09cf4874c45f..c009ee1c27fd 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -187,7 +187,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	if (so.rodata->batch_items * 4 > 4096)
 		return -EINVAL;
 
-	obj = i915_gem_object_create(&req->i915->drm, 4096);
+	obj = i915_gem_object_create_internal(&req->i915->drm, 4096);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 480584c09306..99737b842d73 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -251,7 +251,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
 
 	obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
 	if (!obj)
-		obj = i915_gem_object_create(&engine->i915->drm, size);
+		obj = i915_gem_object_create_internal(&engine->i915->drm, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate scratch page\n");
 		return PTR_ERR(obj);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f3dfb7ca625d..b5d9c03f84ce 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1783,9 +1783,10 @@ static int init_status_page(struct intel_engine_cs *engine)
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	unsigned int flags;
+	void *vaddr;
 	int ret;
 
-	obj = i915_gem_object_create(&engine->i915->drm, 4096);
+	obj = i915_gem_object_create_internal(&engine->i915->drm, 4096);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("Failed to allocate status page\n");
 		return PTR_ERR(obj);
@@ -1818,15 +1819,22 @@ static int init_status_page(struct intel_engine_cs *engine)
 	if (ret)
 		goto err;
 
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto err_unpin;
+	}
+
 	engine->status_page.vma = vma;
 	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
-	engine->status_page.page_addr =
-		i915_gem_object_pin_map(obj, I915_MAP_WB);
+	engine->status_page.page_addr = memset(vaddr, 0, 4096);
 
 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
 			 engine->name, i915_ggtt_offset(vma));
 	return 0;
 
+err_unpin:
+	i915_vma_unpin(vma);
 err:
 	i915_gem_object_put(obj);
 	return ret;
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 107+ messages in thread

* Re: [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request
  2016-10-08  8:23           ` Tvrtko Ursulin
@ 2016-10-08  8:58             ` Chris Wilson
  0 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-08  8:58 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Sat, Oct 08, 2016 at 09:23:25AM +0100, Tvrtko Ursulin wrote:
> 
> On 07/10/2016 17:37, Chris Wilson wrote:
> >On Fri, Oct 07, 2016 at 05:16:17PM +0100, Tvrtko Ursulin wrote:
> >>On 07/10/2016 17:12, Chris Wilson wrote:
> >>>>2. Can child be another array?
> >>>Yes, but we don't want to recurse (or at least need to bound the
> >>>recursion).
> >>In that case could the array have been created in the signal_on_any
> >>mode and how would we handle that?
> >Hmm. Not along our paths, I think. The only fence-array we have are from
> >sync-file which are signal-on-all (except... in the case of where it
> >wraps a single fence, that fence could be a composite). signal-on-any is
> >icky, to be complete we should not decompose it into its elements -
> >however, whether or not a fence-array is operating in signal_on_any mode
> >is not stored. So at the moment, the best I can do is offer a comment.
> 
> Yes signal-on-any sounds extremely weird. It mandates you decompose
> 3rd party fences in all cases, with recursion.
> 
> >The only user of it at the moment is amdgpu waiting for the first of
> >many VM manager to become available, not something we'll see
> >immediately via sync-file.
> 
> So userspace cannot engineer it to happen with some funky operations
> before giving us a fence?

No. sync_file is a fence-array (in normal signal-on-all mode) or a
single fence (which via merging sync_files can not be another fence-array,
though the sync_file could be constructed pointing to a single fence-array
fence). If that is a signal-on-any fence, it is broken if the sync_file
is ever merged.... And reviewing the existing code, no one should be
creating sync_file from a signal-on-any fence-array. There is no ABI to
allow the user to create signal-on-any sync_file. I'm feeling safer that
there are quite a few issues to resolve before signal-on-any fences are
allowed to pollute the sync_file ABI.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH v2] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-08  8:34         ` [PATCH v2] " Chris Wilson
@ 2016-10-10  7:01           ` Joonas Lahtinen
  2016-10-10  8:11           ` Tvrtko Ursulin
  1 sibling, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-10  7:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On la, 2016-10-08 at 09:34 +0100, Chris Wilson wrote:
> Quite a few of our objects used for internal hardware programming do not
> benefit from being swappable or from being zero initialised. As such
> they do not benefit from using a shmemfs backing storage and since they
> are internal and never directly exposed to the user, we do not need to
> worry about providing a filp. For these we can use an
> drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> are not swap backed and not automatically pinned. If they are reaped
> by the shrinker, the pages are released and the contents discarded. For
> the internal use case, this is fine as for example, ringbuffers are
> pinned from being written by a request to be read by the hardware. Once
> they are idle, they can be discarded entirely. As such they are a good
> match for execlist ringbuffers and a small variety of other internal
> objects.
> 
> In the first iteration, this is limited to the scratch batch buffers we
> use (for command parsing and state initialisation).
> 
> v2: Allocate physically contiguous pages, where possible.

Does not hurt, but what does it gain us, exactly?

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* ✗ Fi.CI.BAT: warning for series starting with [01/42] drm/i915: Allow disabling error capture
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (42 preceding siblings ...)
  2016-10-07 10:19 ` ✗ Fi.CI.BAT: warning for series starting with [01/42] drm/i915: Allow disabling error capture Patchwork
@ 2016-10-10  7:23 ` Patchwork
  2016-10-10 15:31 ` ✗ Fi.CI.BAT: failure for series starting with [01/42] drm/i915: Allow disabling error capture (rev2) Patchwork
  44 siblings, 0 replies; 107+ messages in thread
From: Patchwork @ 2016-10-10  7:23 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/42] drm/i915: Allow disabling error capture
URL   : https://patchwork.freedesktop.org/series/13436/
State : warning

== Summary ==

Series 13436v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/13436/revisions/1/mbox/

Test drv_module_reload_basic:
                pass       -> DMESG-WARN (fi-skl-6770hq)
                dmesg-warn -> PASS       (fi-ilk-650)
Test kms_pipe_crc_basic:
        Subgroup hang-read-crc-pipe-b:
                pass       -> DMESG-WARN (fi-skl-6700k)
Test vgem_basic:
        Subgroup unload:
                skip       -> PASS       (fi-byt-n2820)
                skip       -> PASS       (fi-bsw-n3050)
                pass       -> SKIP       (fi-skl-6770hq)
                skip       -> PASS       (fi-hsw-4770)

fi-bdw-5557u     total:248  pass:231  dwarn:0   dfail:0   fail:0   skip:17 
fi-bsw-n3050     total:248  pass:205  dwarn:0   dfail:0   fail:0   skip:43 
fi-bxt-t5700     total:248  pass:217  dwarn:0   dfail:0   fail:0   skip:31 
fi-byt-j1900     total:248  pass:214  dwarn:1   dfail:0   fail:1   skip:32 
fi-byt-n2820     total:248  pass:211  dwarn:0   dfail:0   fail:1   skip:36 
fi-hsw-4770      total:248  pass:225  dwarn:0   dfail:0   fail:0   skip:23 
fi-hsw-4770r     total:248  pass:224  dwarn:0   dfail:0   fail:0   skip:24 
fi-ilk-650       total:248  pass:185  dwarn:0   dfail:0   fail:2   skip:61 
fi-ivb-3520m     total:248  pass:221  dwarn:0   dfail:0   fail:0   skip:27 
fi-ivb-3770      total:248  pass:207  dwarn:0   dfail:0   fail:0   skip:41 
fi-skl-6260u     total:248  pass:232  dwarn:0   dfail:0   fail:0   skip:16 
fi-skl-6700hq    total:248  pass:224  dwarn:1   dfail:0   fail:0   skip:23 
fi-skl-6700k     total:248  pass:221  dwarn:2   dfail:0   fail:0   skip:25 
fi-skl-6770hq    total:248  pass:229  dwarn:2   dfail:0   fail:1   skip:16 
fi-snb-2520m     total:248  pass:211  dwarn:0   dfail:0   fail:0   skip:37 
fi-snb-2600      total:248  pass:209  dwarn:0   dfail:0   fail:0   skip:39 

Results at /archive/results/CI_IGT_test/Patchwork_2647/

81b22c4383bfe6290f7b9d821bf56768567c1718 drm-intel-nightly: 2016y-10m-07d-07h-29m-30s UTC integration manifest
33cf896 drm/i915: Support explicit fencing for execbuf
01f0787 drm/i915: Enable userspace to opt-out of implicit fencing
0815015 drm/i915: Enable multiple timelines
6409884 drm/i915: Defer setting of global seqno on request to submission
3f9e348 drm/i915: Reserve space in the global seqno during request allocation
d9ec193 drm/i915: Create a unique name for the context
b3bebb7 drm/i915: Move the global sync optimisation to the timeline
48703d4 drm/i915: Defer breadcrumb emission
d673086 drm/i915: Record space required for breadcrumb emission
2493555 drm/i915: Rename ->emit_request to ->emit_breadcrumb
46618d0 drm/i915: Introduce a global_seqno for each request
454114b drm/i915: Wait first for submission, before waiting for request completion
271653c drm/i915: Queue the idling context switch after all other timelines
a8990af drm/i915: Combine seqno + tracking into a global timeline struct
0c4f326 drm/i915: Restore nonblocking awaits for modesetting
609dbff drm: Add reference counting to drm_atomic_state
a5cd0fa drm/i915: Move GEM activity tracking into a common struct reservation_object
6dfa747 drm/i915: Use lockless object free
51be5fa drm/i915: Treat a framebuffer reference as an active reference whilst shrinking
89cefa9 drm/i915: Move object release to a freelist + worker
af3efa6 drm/i915: Acquire the backing storage outside of struct_mutex in set-domain
bdaf19a drm/i915: Implement pwrite without struct-mutex
8856275 drm/i915: Implement pread without struct-mutex
3bc7efe drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex
8824701 drm/i915: Move object backing storage manipulation to its own locking
b4f1045 drm/i915: Pass around sg_table to get_pages/put_pages backend
e9184f6 drm/i915: Refactor object page API
897bc5e drm/i915: Use radixtree to jump start intel_partial_pages()
2885c3f drm/i915: Use a radixtree for random access to the object's backing storage
ba563be drm/i915: Markup GEM API with lockdep asserts
2c12286 drm/i915: Reuse the active golden render state batch
7a26c4d0 drm/i915: Introduce an internal allocator for disposable private objects
5f0cb1d drm/i915: Defer active reference until required
5dafc01 drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked()
21447b1 drm/i915: Rearrange i915_wait_request() accounting with callers
983cd49 drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate
42b0316 drm/i915: Support asynchronous waits on struct fence from i915_gem_request
16d0a2a drm/i915: Compress GPU objects in error state
5b2d629 drm/i915: Consolidate error object printing
d5c6209f drm/i915: Always use the GTT for error capture
6220554 drm/i915: Stop the machine whilst capturing the GPU crash dump
a8d19e4 drm/i915: Allow disabling error capture

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH v2] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-08  8:34         ` [PATCH v2] " Chris Wilson
  2016-10-10  7:01           ` Joonas Lahtinen
@ 2016-10-10  8:11           ` Tvrtko Ursulin
  2016-10-10  8:19             ` Chris Wilson
  1 sibling, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-10  8:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 08/10/2016 09:34, Chris Wilson wrote:
> Quite a few of our objects used for internal hardware programming do not
> benefit from being swappable or from being zero initialised. As such
> they do not benefit from using a shmemfs backing storage and since they
> are internal and never directly exposed to the user, we do not need to
> worry about providing a filp. For these we can use an
> drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> are not swap backed and not automatically pinned. If they are reaped
> by the shrinker, the pages are released and the contents discarded. For
> the internal use case, this is fine as for example, ringbuffers are
> pinned from being written by a request to be read by the hardware. Once
> they are idle, they can be discarded entirely. As such they are a good
> match for execlist ringbuffers and a small variety of other internal
> objects.
>
> In the first iteration, this is limited to the scratch batch buffers we
> use (for command parsing and state initialisation).
>
> v2: Allocate physically contiguous pages, where possible.

Since the allocator will be used constantly at runtime, my recollection 
is that higher order allocations can easily become next to impossible, 
so I am wondering why? Also, on your last reply you did not write why 
you think this is interesting to try?

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/Makefile                |   1 +
>   drivers/gpu/drm/i915/i915_drv.h              |   5 +
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
>   drivers/gpu/drm/i915/i915_gem_internal.c     | 162 +++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
>   drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
>   7 files changed, 190 insertions(+), 24 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index a998c2bce70a..b94a90f34d2d 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
>   	  i915_gem_execbuffer.o \
>   	  i915_gem_fence.o \
>   	  i915_gem_gtt.o \
> +	  i915_gem_internal.o \
>   	  i915_gem.o \
>   	  i915_gem_render_state.o \
>   	  i915_gem_request.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fee5cc92e2f2..bad97f1e5265 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>   					       u32 gtt_offset,
>   					       u32 size);
>   
> +/* i915_gem_internal.c */
> +struct drm_i915_gem_object *
> +i915_gem_object_create_internal(struct drm_device *dev,
> +				unsigned int size);
> +
>   /* i915_gem_shrinker.c */
>   unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
>   			      unsigned long target,
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index cb25cad3318c..3934c9103cf2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   			size_t size)
>   {
>   	struct drm_i915_gem_object *obj = NULL;
> -	struct drm_i915_gem_object *tmp, *next;
> +	struct drm_i915_gem_object *tmp;
>   	struct list_head *list;
> -	int n;
> +	int n, ret;
>   
>   	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
>   
> @@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   		n = ARRAY_SIZE(pool->cache_list) - 1;
>   	list = &pool->cache_list[n];
>   
> -	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
> +	list_for_each_entry(tmp, list, batch_pool_link) {
>   		/* The batches are strictly LRU ordered */
>   		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
>   					     &tmp->base.dev->struct_mutex))
>   			break;
>   
> -		/* While we're looping, do some clean up */
> -		if (tmp->madv == __I915_MADV_PURGED) {
> -			list_del(&tmp->batch_pool_link);
> -			i915_gem_object_put(tmp);
> -			continue;
> -		}
> -
>   		if (tmp->base.size >= size) {
>   			obj = tmp;
>   			break;
> @@ -132,19 +125,16 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   	}
>   
>   	if (obj == NULL) {
> -		int ret;
> -
> -		obj = i915_gem_object_create(&pool->engine->i915->drm, size);
> +		obj = i915_gem_object_create_internal(&pool->engine->i915->drm,
> +						      size);
>   		if (IS_ERR(obj))
>   			return obj;
> -
> -		ret = i915_gem_object_get_pages(obj);
> -		if (ret)
> -			return ERR_PTR(ret);
> -
> -		obj->madv = I915_MADV_DONTNEED;
>   	}
>   
> +	ret = i915_gem_object_get_pages(obj);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
>   	list_move_tail(&obj->batch_pool_link, list);
>   	i915_gem_object_pin_pages(obj);
>   	return obj;
> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
> new file mode 100644
> index 000000000000..255b9232b8ef
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> @@ -0,0 +1,162 @@
> +/*
> + * Copyright © 2014-2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +
> +#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
> +
> +static void internal_free_pages(struct sg_table *st)
> +{
> +	struct scatterlist *sg;
> +
> +	for (sg = st->sgl; sg; sg = __sg_next(sg))
> +		__free_pages(sg_page(sg), get_order(sg->length));

Fragile since wont work together with coalescing, which I am sad to see 
not implemented. For some reason it makes me feel real good when it is 
there.

> +
> +	sg_free_table(st);
> +	kfree(st);
> +}
> +
> +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
> +{
> +	unsigned int npages = obj->base.size / PAGE_SIZE;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int max_order;
> +	gfp_t gfp;
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (!st)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	st->nents = 0;
> +
> +	max_order = MAX_ORDER;
> +#ifdef CONFIG_SWIOTLB
> +	if (swiotlb_nr_tbl())
> +		max_order = min(max_order, ilog2(IO_TLB_SEGSIZE));
> +#endif
> +
> +	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
> +	if (IS_CRESTLINE(obj->base.dev) || IS_BROADWATER(obj->base.dev)) {

You don't want to avoid more usages of dev?

> +		/* 965gm cannot relocate objects above 4GiB. */
> +		gfp &= ~__GFP_HIGHMEM;
> +		gfp |= __GFP_DMA32;
> +	}
> +
> +	do {
> +		int order = min(fls(npages) - 1, max_order);
> +		struct page *page;
> +
> +		do {
> +			page = alloc_pages(gfp | (order ? QUIET : 0), order);
> +			if (page)
> +				break;
> +			if (!order--)
> +				goto err;
> +		} while (1);

Feels like it could hammer hard on a fragmented system, I have big 
concerns about this.

> +
> +		sg_set_page(sg, page, PAGE_SIZE << order, 0);
> +		st->nents++;
> +
> +		npages -= 1 << order;
> +		if (!npages) {
> +			sg_mark_end(sg);
> +			break;
> +		}
> +
> +		sg = __sg_next(sg);
> +	} while (1);
> +	obj->pages = st;
> +
> +	if (i915_gem_gtt_prepare_object(obj)) {
> +		obj->pages = NULL;
> +		goto err;
> +	}
> +
> +	/* Mark the pages as dontneed whilst they are still pinned. As soon
> +	 * as they are unpinned they are allowed to be reaped by the shrinker,
> +	 * and the caller is expected to repopulate - the contents of this
> +	 * object are only valid whilst active and pinned.
> +	 */
> +	obj->madv = I915_MADV_DONTNEED;
> +	return 0;
> +
> +err:
> +	sg_mark_end(sg);
> +	internal_free_pages(st);
> +	return -ENOMEM;
> +}
> +
> +static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
> +{
> +	internal_free_pages(obj->pages);
> +
> +	obj->dirty = 0;
> +	obj->madv = I915_MADV_WILLNEED;
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
> +	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
> +	.get_pages = i915_gem_object_get_pages_internal,
> +	.put_pages = i915_gem_object_put_pages_internal,
> +};
> +
> +/**
> + * Creates a new object that wraps some internal memory for private use.
> + * This object is not backed by swappable storage, and as such its contents
> + * are volatile and only valid whilst pinned. If the object is reaped by the
> + * shrinker, its pages and data will be discarded. Equally, it is not a full
> + * GEM object and so not valid for access from userspace. This makes it useful
> + * for hardware interfaces like ringbuffers (which are pinned from the time
> + * the request is written to the time the hardware stops accessing it), but
> + * not for contexts (which need to be preserved when not active for later
> + * reuse). Note that it is not cleared upon allocation.
> + */
> +struct drm_i915_gem_object *
> +i915_gem_object_create_internal(struct drm_device *dev,
> +				unsigned int size)
> +{
> +	struct drm_i915_gem_object *obj;
> +
> +	obj = i915_gem_object_alloc(dev);
> +	if (!obj)
> +		return ERR_PTR(-ENOMEM);
> +
> +	drm_gem_private_object_init(dev, &obj->base, size);
> +	i915_gem_object_init(obj, &i915_gem_object_internal_ops);
> +
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
> +
> +	return obj;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index 09cf4874c45f..c009ee1c27fd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -187,7 +187,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>   	if (so.rodata->batch_items * 4 > 4096)
>   		return -EINVAL;
>   
> -	obj = i915_gem_object_create(&req->i915->drm, 4096);
> +	obj = i915_gem_object_create_internal(&req->i915->drm, 4096);
>   	if (IS_ERR(obj))
>   		return PTR_ERR(obj);
>   
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 480584c09306..99737b842d73 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -251,7 +251,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
>   
>   	obj = i915_gem_object_create_stolen(&engine->i915->drm, size);
>   	if (!obj)
> -		obj = i915_gem_object_create(&engine->i915->drm, size);
> +		obj = i915_gem_object_create_internal(&engine->i915->drm, size);
>   	if (IS_ERR(obj)) {
>   		DRM_ERROR("Failed to allocate scratch page\n");
>   		return PTR_ERR(obj);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index f3dfb7ca625d..b5d9c03f84ce 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1783,9 +1783,10 @@ static int init_status_page(struct intel_engine_cs *engine)
>   	struct drm_i915_gem_object *obj;
>   	struct i915_vma *vma;
>   	unsigned int flags;
> +	void *vaddr;
>   	int ret;
>   
> -	obj = i915_gem_object_create(&engine->i915->drm, 4096);
> +	obj = i915_gem_object_create_internal(&engine->i915->drm, 4096);
>   	if (IS_ERR(obj)) {
>   		DRM_ERROR("Failed to allocate status page\n");
>   		return PTR_ERR(obj);
> @@ -1818,15 +1819,22 @@ static int init_status_page(struct intel_engine_cs *engine)
>   	if (ret)
>   		goto err;
>   
> +	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	if (IS_ERR(vaddr)) {
> +		ret = PTR_ERR(vaddr);
> +		goto err_unpin;
> +	}

And you don't want to split the bugfix out?

> +
>   	engine->status_page.vma = vma;
>   	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
> -	engine->status_page.page_addr =
> -		i915_gem_object_pin_map(obj, I915_MAP_WB);
> +	engine->status_page.page_addr = memset(vaddr, 0, 4096);
>   
>   	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
>   			 engine->name, i915_ggtt_offset(vma));
>   	return 0;
>   
> +err_unpin:
> +	i915_vma_unpin(vma);
>   err:
>   	i915_gem_object_put(obj);
>   	return ret;

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH v2] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-10  8:11           ` Tvrtko Ursulin
@ 2016-10-10  8:19             ` Chris Wilson
  2016-10-10  8:25               ` Tvrtko Ursulin
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-10  8:19 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Oct 10, 2016 at 09:11:49AM +0100, Tvrtko Ursulin wrote:
> On 08/10/2016 09:34, Chris Wilson wrote:
> >Quite a few of our objects used for internal hardware programming do not
> >benefit from being swappable or from being zero initialised. As such
> >they do not benefit from using a shmemfs backing storage and since they
> >are internal and never directly exposed to the user, we do not need to
> >worry about providing a filp. For these we can use an
> >drm_i915_gem_object wrapper around a sg_table of plain struct page. They
> >are not swap backed and not automatically pinned. If they are reaped
> >by the shrinker, the pages are released and the contents discarded. For
> >the internal use case, this is fine as for example, ringbuffers are
> >pinned from being written by a request to be read by the hardware. Once
> >they are idle, they can be discarded entirely. As such they are a good
> >match for execlist ringbuffers and a small variety of other internal
> >objects.
> >
> >In the first iteration, this is limited to the scratch batch buffers we
> >use (for command parsing and state initialisation).
> >
> >v2: Allocate physically contiguous pages, where possible.
> 
> Since the allocator will be used constantly at runtime, my
> recollection is that higher order allocations can easily become next
> to impossible, so I am wondering why? Also, on your last reply you
> did not write why you think this is interesting to try?
> 
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >---
> >  drivers/gpu/drm/i915/Makefile                |   1 +
> >  drivers/gpu/drm/i915/i915_drv.h              |   5 +
> >  drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
> >  drivers/gpu/drm/i915/i915_gem_internal.c     | 162 +++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
> >  drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
> >  drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
> >  7 files changed, 190 insertions(+), 24 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
> >
> >diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> >index a998c2bce70a..b94a90f34d2d 100644
> >--- a/drivers/gpu/drm/i915/Makefile
> >+++ b/drivers/gpu/drm/i915/Makefile
> >@@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
> >  	  i915_gem_execbuffer.o \
> >  	  i915_gem_fence.o \
> >  	  i915_gem_gtt.o \
> >+	  i915_gem_internal.o \
> >  	  i915_gem.o \
> >  	  i915_gem_render_state.o \
> >  	  i915_gem_request.o \
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index fee5cc92e2f2..bad97f1e5265 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
> >  					       u32 gtt_offset,
> >  					       u32 size);
> >+/* i915_gem_internal.c */
> >+struct drm_i915_gem_object *
> >+i915_gem_object_create_internal(struct drm_device *dev,
> >+				unsigned int size);
> >+
> >  /* i915_gem_shrinker.c */
> >  unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
> >  			      unsigned long target,
> >diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >index cb25cad3318c..3934c9103cf2 100644
> >--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> >@@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
> >  			size_t size)
> >  {
> >  	struct drm_i915_gem_object *obj = NULL;
> >-	struct drm_i915_gem_object *tmp, *next;
> >+	struct drm_i915_gem_object *tmp;
> >  	struct list_head *list;
> >-	int n;
> >+	int n, ret;
> >  	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
> >@@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
> >  		n = ARRAY_SIZE(pool->cache_list) - 1;
> >  	list = &pool->cache_list[n];
> >-	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
> >+	list_for_each_entry(tmp, list, batch_pool_link) {
> >  		/* The batches are strictly LRU ordered */
> >  		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
> >  					     &tmp->base.dev->struct_mutex))
> >  			break;
> >-		/* While we're looping, do some clean up */
> >-		if (tmp->madv == __I915_MADV_PURGED) {
> >-			list_del(&tmp->batch_pool_link);
> >-			i915_gem_object_put(tmp);
> >-			continue;
> >-		}
> >-
> >  		if (tmp->base.size >= size) {
> >  			obj = tmp;
> >  			break;
> >@@ -132,19 +125,16 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
> >  	}
> >  	if (obj == NULL) {
> >-		int ret;
> >-
> >-		obj = i915_gem_object_create(&pool->engine->i915->drm, size);
> >+		obj = i915_gem_object_create_internal(&pool->engine->i915->drm,
> >+						      size);
> >  		if (IS_ERR(obj))
> >  			return obj;
> >-
> >-		ret = i915_gem_object_get_pages(obj);
> >-		if (ret)
> >-			return ERR_PTR(ret);
> >-
> >-		obj->madv = I915_MADV_DONTNEED;
> >  	}
> >+	ret = i915_gem_object_get_pages(obj);
> >+	if (ret)
> >+		return ERR_PTR(ret);
> >+
> >  	list_move_tail(&obj->batch_pool_link, list);
> >  	i915_gem_object_pin_pages(obj);
> >  	return obj;
> >diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
> >new file mode 100644
> >index 000000000000..255b9232b8ef
> >--- /dev/null
> >+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> >@@ -0,0 +1,162 @@
> >+/*
> >+ * Copyright © 2014-2016 Intel Corporation
> >+ *
> >+ * Permission is hereby granted, free of charge, to any person obtaining a
> >+ * copy of this software and associated documentation files (the "Software"),
> >+ * to deal in the Software without restriction, including without limitation
> >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> >+ * and/or sell copies of the Software, and to permit persons to whom the
> >+ * Software is furnished to do so, subject to the following conditions:
> >+ *
> >+ * The above copyright notice and this permission notice (including the next
> >+ * paragraph) shall be included in all copies or substantial portions of the
> >+ * Software.
> >+ *
> >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> >+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> >+ * IN THE SOFTWARE.
> >+ *
> >+ */
> >+
> >+#include <drm/drmP.h>
> >+#include <drm/i915_drm.h>
> >+#include "i915_drv.h"
> >+
> >+#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
> >+
> >+static void internal_free_pages(struct sg_table *st)
> >+{
> >+	struct scatterlist *sg;
> >+
> >+	for (sg = st->sgl; sg; sg = __sg_next(sg))
> >+		__free_pages(sg_page(sg), get_order(sg->length));
> 
> Fragile since wont work together with coalescing, which I am sad to
> see not implemented. For some reason it makes me feel real good when
> it is there.

We do the coalescing.

> >+		do {
> >+			page = alloc_pages(gfp | (order ? QUIET : 0), order);
> >+			if (page)
> >+				break;
> >+			if (!order--)
> >+				goto err;
> >+		} while (1);
> 
> Feels like it could hammer hard on a fragmented system, I have big
> concerns about this.

You did notice that these were marked as temporary allocations (because
they are reclaimed under memory pressure), and on an already fragemented
system we do the right thing anyway?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH v2] drm/i915: Introduce an internal allocator for disposable private objects
  2016-10-10  8:19             ` Chris Wilson
@ 2016-10-10  8:25               ` Tvrtko Ursulin
  0 siblings, 0 replies; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-10  8:25 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 10/10/2016 09:19, Chris Wilson wrote:
> On Mon, Oct 10, 2016 at 09:11:49AM +0100, Tvrtko Ursulin wrote:
>> On 08/10/2016 09:34, Chris Wilson wrote:
>>> Quite a few of our objects used for internal hardware programming do not
>>> benefit from being swappable or from being zero initialised. As such
>>> they do not benefit from using a shmemfs backing storage and since they
>>> are internal and never directly exposed to the user, we do not need to
>>> worry about providing a filp. For these we can use an
>>> drm_i915_gem_object wrapper around a sg_table of plain struct page. They
>>> are not swap backed and not automatically pinned. If they are reaped
>>> by the shrinker, the pages are released and the contents discarded. For
>>> the internal use case, this is fine as for example, ringbuffers are
>>> pinned from being written by a request to be read by the hardware. Once
>>> they are idle, they can be discarded entirely. As such they are a good
>>> match for execlist ringbuffers and a small variety of other internal
>>> objects.
>>>
>>> In the first iteration, this is limited to the scratch batch buffers we
>>> use (for command parsing and state initialisation).
>>>
>>> v2: Allocate physically contiguous pages, where possible.
>> Since the allocator will be used constantly at runtime, my
>> recollection is that higher order allocations can easily become next
>> to impossible, so I am wondering why? Also, on your last reply you
>> did not write why you think this is interesting to try?
>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/Makefile                |   1 +
>>>   drivers/gpu/drm/i915/i915_drv.h              |   5 +
>>>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  28 ++---
>>>   drivers/gpu/drm/i915/i915_gem_internal.c     | 162 +++++++++++++++++++++++++++
>>>   drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
>>>   drivers/gpu/drm/i915/intel_engine_cs.c       |   2 +-
>>>   drivers/gpu/drm/i915/intel_ringbuffer.c      |  14 ++-
>>>   7 files changed, 190 insertions(+), 24 deletions(-)
>>>   create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c
>>>
>>> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>>> index a998c2bce70a..b94a90f34d2d 100644
>>> --- a/drivers/gpu/drm/i915/Makefile
>>> +++ b/drivers/gpu/drm/i915/Makefile
>>> @@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
>>>   	  i915_gem_execbuffer.o \
>>>   	  i915_gem_fence.o \
>>>   	  i915_gem_gtt.o \
>>> +	  i915_gem_internal.o \
>>>   	  i915_gem.o \
>>>   	  i915_gem_render_state.o \
>>>   	  i915_gem_request.o \
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index fee5cc92e2f2..bad97f1e5265 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -3538,6 +3538,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>>>   					       u32 gtt_offset,
>>>   					       u32 size);
>>> +/* i915_gem_internal.c */
>>> +struct drm_i915_gem_object *
>>> +i915_gem_object_create_internal(struct drm_device *dev,
>>> +				unsigned int size);
>>> +
>>>   /* i915_gem_shrinker.c */
>>>   unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv,
>>>   			      unsigned long target,
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>> index cb25cad3318c..3934c9103cf2 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
>>> @@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>>>   			size_t size)
>>>   {
>>>   	struct drm_i915_gem_object *obj = NULL;
>>> -	struct drm_i915_gem_object *tmp, *next;
>>> +	struct drm_i915_gem_object *tmp;
>>>   	struct list_head *list;
>>> -	int n;
>>> +	int n, ret;
>>>   	lockdep_assert_held(&pool->engine->i915->drm.struct_mutex);
>>> @@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>>>   		n = ARRAY_SIZE(pool->cache_list) - 1;
>>>   	list = &pool->cache_list[n];
>>> -	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
>>> +	list_for_each_entry(tmp, list, batch_pool_link) {
>>>   		/* The batches are strictly LRU ordered */
>>>   		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
>>>   					     &tmp->base.dev->struct_mutex))
>>>   			break;
>>> -		/* While we're looping, do some clean up */
>>> -		if (tmp->madv == __I915_MADV_PURGED) {
>>> -			list_del(&tmp->batch_pool_link);
>>> -			i915_gem_object_put(tmp);
>>> -			continue;
>>> -		}
>>> -
>>>   		if (tmp->base.size >= size) {
>>>   			obj = tmp;
>>>   			break;
>>> @@ -132,19 +125,16 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>>>   	}
>>>   	if (obj == NULL) {
>>> -		int ret;
>>> -
>>> -		obj = i915_gem_object_create(&pool->engine->i915->drm, size);
>>> +		obj = i915_gem_object_create_internal(&pool->engine->i915->drm,
>>> +						      size);
>>>   		if (IS_ERR(obj))
>>>   			return obj;
>>> -
>>> -		ret = i915_gem_object_get_pages(obj);
>>> -		if (ret)
>>> -			return ERR_PTR(ret);
>>> -
>>> -		obj->madv = I915_MADV_DONTNEED;
>>>   	}
>>> +	ret = i915_gem_object_get_pages(obj);
>>> +	if (ret)
>>> +		return ERR_PTR(ret);
>>> +
>>>   	list_move_tail(&obj->batch_pool_link, list);
>>>   	i915_gem_object_pin_pages(obj);
>>>   	return obj;
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
>>> new file mode 100644
>>> index 000000000000..255b9232b8ef
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
>>> @@ -0,0 +1,162 @@
>>> +/*
>>> + * Copyright © 2014-2016 Intel Corporation
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy of this software and associated documentation files (the "Software"),
>>> + * to deal in the Software without restriction, including without limitation
>>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>> + * Software is furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including the next
>>> + * paragraph) shall be included in all copies or substantial portions of the
>>> + * Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>>> + * IN THE SOFTWARE.
>>> + *
>>> + */
>>> +
>>> +#include <drm/drmP.h>
>>> +#include <drm/i915_drm.h>
>>> +#include "i915_drv.h"
>>> +
>>> +#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
>>> +
>>> +static void internal_free_pages(struct sg_table *st)
>>> +{
>>> +	struct scatterlist *sg;
>>> +
>>> +	for (sg = st->sgl; sg; sg = __sg_next(sg))
>>> +		__free_pages(sg_page(sg), get_order(sg->length));
>> Fragile since wont work together with coalescing, which I am sad to
>> see not implemented. For some reason it makes me feel real good when
>> it is there.
> We do the coalescing.

Where? It is not in this patch? One allocation == one st->nents++. If 
the next allocation is contiguous there is no coalescing.

>>> +		do {
>>> +			page = alloc_pages(gfp | (order ? QUIET : 0), order);
>>> +			if (page)
>>> +				break;
>>> +			if (!order--)
>>> +				goto err;
>>> +		} while (1);
>> Feels like it could hammer hard on a fragmented system, I have big
>> concerns about this.
> You did notice that these were marked as temporary allocations (because
> they are reclaimed under memory pressure), and on an already fragemented
> system we do the right thing anyway?

You try smaller and then for the next allocation start from the initial 
order which did not work previously.

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 16/42] drm/i915: Refactor object page API
  2016-10-07  9:46 ` [PATCH 16/42] drm/i915: Refactor object page API Chris Wilson
@ 2016-10-10 10:54   ` John Harrison
  2016-10-11 11:23   ` Tvrtko Ursulin
  2016-10-13 11:04   ` Joonas Lahtinen
  2 siblings, 0 replies; 107+ messages in thread
From: John Harrison @ 2016-10-10 10:54 UTC (permalink / raw)
  To: intel-gfx

On 07/10/2016 10:46, Chris Wilson wrote:
> The plan is to make obtaining the backing storage for the object avoid
> struct_mutex (i.e. use its own locking). The first step is to update the
> API so that normal users only call pin/unpin whilst working on the
> backing storage.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_cmd_parser.c       |   2 +-
>   drivers/gpu/drm/i915/i915_debugfs.c          |  17 +--
>   drivers/gpu/drm/i915/i915_drv.h              |  89 ++++++++----
>   drivers/gpu/drm/i915/i915_gem.c              | 207 +++++++++++++--------------
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |   3 +-
>   drivers/gpu/drm/i915/i915_gem_dmabuf.c       |  14 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   2 +-
>   drivers/gpu/drm/i915/i915_gem_fence.c        |   4 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.c          |  10 +-
>   drivers/gpu/drm/i915/i915_gem_internal.c     |  19 +--
>   drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
>   drivers/gpu/drm/i915/i915_gem_shrinker.c     |  10 +-
>   drivers/gpu/drm/i915/i915_gem_stolen.c       |  24 ++--
>   drivers/gpu/drm/i915/i915_gem_tiling.c       |   8 +-
>   drivers/gpu/drm/i915/i915_gem_userptr.c      |  30 ++--
>   drivers/gpu/drm/i915/i915_gpu_error.c        |   4 +-
>   drivers/gpu/drm/i915/intel_lrc.c             |   6 +-
>   17 files changed, 234 insertions(+), 217 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 70980f82a15b..8d20020cb9f9 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -1290,7 +1290,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
>   	}
>   
>   	if (ret == 0 && needs_clflush_after)
> -		drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len);
> +		drm_clflush_virt_range(shadow_batch_obj->mm.mapping, batch_len);
>   	i915_gem_object_unpin_map(shadow_batch_obj);
>   
>   	return ret;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e4b5ba771bea..b807ddf65e04 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -112,7 +112,7 @@ static char get_global_flag(struct drm_i915_gem_object *obj)
>   
>   static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
>   {
> -	return obj->mapping ? 'M' : ' ';
> +	return obj->mm.mapping ? 'M' : ' ';
>   }
>   
>   static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
> @@ -158,8 +158,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>   		   i915_gem_active_get_seqno(&obj->last_write,
>   					     &obj->base.dev->struct_mutex),
>   		   i915_cache_level_str(dev_priv, obj->cache_level),
> -		   obj->dirty ? " dirty" : "",
> -		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> +		   obj->mm.dirty ? " dirty" : "",
> +		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
>   	if (obj->base.name)
>   		seq_printf(m, " (name: %d)", obj->base.name);
>   	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> @@ -411,12 +411,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   		size += obj->base.size;
>   		++count;
>   
> -		if (obj->madv == I915_MADV_DONTNEED) {
> +		if (obj->mm.madv == I915_MADV_DONTNEED) {
>   			purgeable_size += obj->base.size;
>   			++purgeable_count;
>   		}
>   
> -		if (obj->mapping) {
> +		if (obj->mm.mapping) {
>   			mapped_count++;
>   			mapped_size += obj->base.size;
>   		}
> @@ -433,12 +433,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   			++dpy_count;
>   		}
>   
> -		if (obj->madv == I915_MADV_DONTNEED) {
> +		if (obj->mm.madv == I915_MADV_DONTNEED) {
>   			purgeable_size += obj->base.size;
>   			++purgeable_count;
>   		}
>   
> -		if (obj->mapping) {
> +		if (obj->mm.mapping) {
>   			mapped_count++;
>   			mapped_size += obj->base.size;
>   		}
> @@ -2018,7 +2018,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>   		seq_printf(m, "\tBound in GGTT at 0x%08x\n",
>   			   i915_ggtt_offset(vma));
>   
> -	if (i915_gem_object_get_pages(vma->obj)) {
> +	if (i915_gem_object_pin_pages(vma->obj)) {
>   		seq_puts(m, "\tFailed to get pages for context object\n\n");
Error message needs updating to match the function call change.

>   		return;
>   	}
> @@ -2037,6 +2037,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>   		kunmap_atomic(reg_state);
>   	}
>   
> +	i915_gem_object_unpin_pages(vma->obj);
>   	seq_putc(m, '\n');
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a96b446d8db4..3c22d49005fe 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2238,17 +2238,6 @@ struct drm_i915_gem_object {
>   #define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
>   
>   	/**
> -	 * This is set if the object has been written to since last bound
> -	 * to the GTT
> -	 */
> -	unsigned int dirty:1;
> -
> -	/**
> -	 * Advice: are the backing pages purgeable?
> -	 */
> -	unsigned int madv:2;
> -
> -	/**
>   	 * Whether the current gtt mapping needs to be mappable (and isn't just
>   	 * mappable by accident). Track pin and fault separate for a more
>   	 * accurate mappable working set.
> @@ -2276,16 +2265,31 @@ struct drm_i915_gem_object {
>   	unsigned int bind_count;
>   	unsigned int pin_display;
>   
> -	struct sg_table *pages;
> -	int pages_pin_count;
> -	struct i915_gem_object_page_iter {
> -		struct scatterlist *sg_pos;
> -		unsigned long sg_idx;
> +	struct {
> +		unsigned int pages_pin_count;
> +
> +		struct sg_table *pages;
> +		void *mapping;
> +
> +		struct i915_gem_object_page_iter {
> +			struct scatterlist *sg_pos;
> +			unsigned long sg_idx;
>   
> -		struct radix_tree_root radix;
> -		struct mutex lock;
> -	} get_page;
> -	void *mapping;
> +			struct radix_tree_root radix;
> +			struct mutex lock;
> +		} get_page;
> +
> +		/**
> +		 * Advice: are the backing pages purgeable?
> +		 */
> +		unsigned int madv:2;
> +
> +		/**
> +		 * This is set if the object has been written to since the
> +		 * pages were last acquired.
> +		 */
> +		unsigned int dirty:1;
> +	} mm;
>   
>   	/** Breadcrumb of last rendering to the buffer.
>   	 * There can only be one writer, but we allow for multiple readers.
> @@ -3160,13 +3164,10 @@ void i915_vma_close(struct i915_vma *vma);
>   void i915_vma_destroy(struct i915_vma *vma);
>   
>   int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
> -int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>   void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
>   void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>   
> -int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> -
> -static inline int __sg_page_count(struct scatterlist *sg)
> +static inline int __sg_page_count(const struct scatterlist *sg)
>   {
>   	return sg->length >> PAGE_SHIFT;
>   }
> @@ -3187,18 +3188,48 @@ dma_addr_t
>   i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>   				unsigned long n);
>   
> -static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> +int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> +
> +static inline int __must_check
> +i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>   {
> -	BUG_ON(obj->pages == NULL);
> -	obj->pages_pin_count++;
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	if (obj->mm.pages_pin_count++)
> +		return 0;
> +
> +	return __i915_gem_object_get_pages(obj);
> +}
> +
> +static inline void
> +__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	GEM_BUG_ON(!obj->mm.pages);
> +	obj->mm.pages_pin_count++;
> +}
> +
> +static inline bool
> +i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
> +{
> +	return obj->mm.pages_pin_count;
> +}
> +
> +static inline void
> +__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> +	GEM_BUG_ON(!obj->mm.pages);
> +	obj->mm.pages_pin_count--;
>   }
>   
>   static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   {
> -	BUG_ON(obj->pages_pin_count == 0);
> -	obj->pages_pin_count--;
> +	__i915_gem_object_unpin_pages(obj);
>   }
Am I missing something or is the above identical to simply '#define 
i915_gem_object_unpin_pages __i915_gem_object_unpin_pages'? In which 
case, why bother with having two separate functions?

>   
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
> +
>   enum i915_map_type {
>   	I915_MAP_WB = 0,
>   	I915_MAP_WC,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index af7d51f16658..df774ddf62ae 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -216,7 +216,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>   	sg_dma_address(sg) = obj->phys_handle->busaddr;
>   	sg_dma_len(sg) = obj->base.size;
>   
> -	obj->pages = st;
> +	obj->mm.pages = st;
>   	return 0;
>   }
>   
> @@ -225,7 +225,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>   {
>   	int ret;
>   
> -	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
>   
>   	ret = i915_gem_object_set_to_cpu_domain(obj, true);
>   	if (WARN_ON(ret)) {
> @@ -235,10 +235,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>   		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>   	}
>   
> -	if (obj->madv == I915_MADV_DONTNEED)
> -		obj->dirty = 0;
> +	if (obj->mm.madv == I915_MADV_DONTNEED)
> +		obj->mm.dirty = false;
>   
> -	if (obj->dirty) {
> +	if (obj->mm.dirty) {
>   		struct address_space *mapping = obj->base.filp->f_mapping;
>   		char *vaddr = obj->phys_handle->vaddr;
>   		int i;
> @@ -257,22 +257,23 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>   			kunmap_atomic(dst);
>   
>   			set_page_dirty(page);
> -			if (obj->madv == I915_MADV_WILLNEED)
> +			if (obj->mm.madv == I915_MADV_WILLNEED)
>   				mark_page_accessed(page);
>   			put_page(page);
>   			vaddr += PAGE_SIZE;
>   		}
> -		obj->dirty = 0;
> +		obj->mm.dirty = false;
>   	}
>   
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
>   static void
>   i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
>   {
>   	drm_pci_free(obj->base.dev, obj->phys_handle);
> +	i915_gem_object_unpin_pages(obj);
>   }
>   
>   static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> @@ -506,7 +507,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   		return 0;
>   	}
>   
> -	if (obj->madv != I915_MADV_WILLNEED)
> +	if (obj->mm.madv != I915_MADV_WILLNEED)
>   		return -EFAULT;
>   
>   	if (obj->base.filp == NULL)
> @@ -516,7 +517,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_put_pages(obj);
> +	ret = __i915_gem_object_put_pages(obj);
>   	if (ret)
>   		return ret;
>   
> @@ -528,7 +529,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   	obj->phys_handle = phys;
>   	obj->ops = &i915_gem_phys_ops;
>   
> -	return i915_gem_object_get_pages(obj);
> +	return i915_gem_object_pin_pages(obj);
>   }
>   
>   static int
> @@ -724,12 +725,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	i915_gem_object_flush_gtt_write_domain(obj);
>   
>   	/* If we're not in the cpu read domain, set ourself into the gtt
> @@ -777,12 +776,10 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	i915_gem_object_flush_gtt_write_domain(obj);
>   
>   	/* If we're not in the cpu write domain, set ourself into the
> @@ -812,7 +809,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>   		obj->cache_dirty = true;
>   
>   	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
> -	obj->dirty = 1;
> +	obj->mm.dirty = true;
>   	/* return with the pages pinned */
>   	return 0;
>   
> @@ -949,13 +946,11 @@ i915_gem_gtt_pread(struct drm_device *dev,
>   		if (ret)
>   			goto out;
>   
> -		ret = i915_gem_object_get_pages(obj);
> +		ret = i915_gem_object_pin_pages(obj);
>   		if (ret) {
>   			remove_mappable_node(&node);
>   			goto out;
>   		}
> -
> -		i915_gem_object_pin_pages(obj);
>   	}
>   
>   	ret = i915_gem_object_set_to_gtt_domain(obj, false);
> @@ -1062,7 +1057,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
>   	offset = args->offset;
>   	remain = args->size;
>   
> -	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
> +	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
>   			 offset >> PAGE_SHIFT) {
>   		struct page *page = sg_page_iter_page(&sg_iter);
>   
> @@ -1254,13 +1249,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>   		if (ret)
>   			goto out;
>   
> -		ret = i915_gem_object_get_pages(obj);
> +		ret = i915_gem_object_pin_pages(obj);
>   		if (ret) {
>   			remove_mappable_node(&node);
>   			goto out;
>   		}
> -
> -		i915_gem_object_pin_pages(obj);
>   	}
>   
>   	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> @@ -1268,7 +1261,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>   		goto out_unpin;
>   
>   	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
> -	obj->dirty = true;
> +	obj->mm.dirty = true;
>   
>   	user_data = u64_to_user_ptr(args->data_ptr);
>   	offset = args->offset;
> @@ -1439,7 +1432,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>   	offset = args->offset;
>   	remain = args->size;
>   
> -	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
> +	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
>   			 offset >> PAGE_SHIFT) {
>   		struct page *page = sg_page_iter_page(&sg_iter);
>   		int partial_cacheline_write;
> @@ -2228,7 +2221,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
>   	 * backing pages, *now*.
>   	 */
>   	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
> -	obj->madv = __I915_MADV_PURGED;
> +	obj->mm.madv = __I915_MADV_PURGED;
>   }
>   
>   /* Try to discard unwanted pages */
> @@ -2237,7 +2230,7 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
>   {
>   	struct address_space *mapping;
>   
> -	switch (obj->madv) {
> +	switch (obj->mm.madv) {
>   	case I915_MADV_DONTNEED:
>   		i915_gem_object_truncate(obj);
>   	case __I915_MADV_PURGED:
> @@ -2258,7 +2251,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>   	struct page *page;
>   	int ret;
>   
> -	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
>   
>   	ret = i915_gem_object_set_to_cpu_domain(obj, true);
>   	if (WARN_ON(ret)) {
> @@ -2274,22 +2267,22 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>   	if (i915_gem_object_needs_bit17_swizzle(obj))
>   		i915_gem_object_save_bit_17_swizzle(obj);
>   
> -	if (obj->madv == I915_MADV_DONTNEED)
> -		obj->dirty = 0;
> +	if (obj->mm.madv == I915_MADV_DONTNEED)
> +		obj->mm.dirty = false;
>   
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> -		if (obj->dirty)
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +		if (obj->mm.dirty)
>   			set_page_dirty(page);
>   
> -		if (obj->madv == I915_MADV_WILLNEED)
> +		if (obj->mm.madv == I915_MADV_WILLNEED)
>   			mark_page_accessed(page);
>   
>   		put_page(page);
>   	}
> -	obj->dirty = 0;
> +	obj->mm.dirty = false;
>   
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
>   static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
> @@ -2297,21 +2290,20 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>   	struct radix_tree_iter iter;
>   	void **slot;
>   
> -	radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
> -		radix_tree_delete(&obj->get_page.radix, iter.index);
> +	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
> +		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
>   }
>   
> -int
> -i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   {
>   	const struct drm_i915_gem_object_ops *ops = obj->ops;
>   
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   
> -	if (obj->pages == NULL)
> +	if (!obj->mm.pages)
>   		return 0;
>   
> -	if (obj->pages_pin_count)
> +	if (i915_gem_object_has_pinned_pages(obj))
>   		return -EBUSY;
>   
>   	GEM_BUG_ON(obj->bind_count);
> @@ -2321,22 +2313,22 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   	 * lists early. */
>   	list_del(&obj->global_list);
>   
> -	if (obj->mapping) {
> +	if (obj->mm.mapping) {
>   		void *ptr;
>   
> -		ptr = ptr_mask_bits(obj->mapping);
> +		ptr = ptr_mask_bits(obj->mm.mapping);
>   		if (is_vmalloc_addr(ptr))
>   			vunmap(ptr);
>   		else
>   			kunmap(kmap_to_page(ptr));
>   
> -		obj->mapping = NULL;
> +		obj->mm.mapping = NULL;
>   	}
>   
>   	__i915_gem_object_reset_page_iter(obj);
>   
>   	ops->put_pages(obj);
> -	obj->pages = NULL;
> +	obj->mm.pages = NULL;
>   
>   	i915_gem_object_invalidate(obj);
>   
> @@ -2431,7 +2423,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>   	if (!swiotlb_nr_tbl())
>   #endif
>   		sg_mark_end(sg);
> -	obj->pages = st;
> +	obj->mm.pages = st;
>   
>   	ret = i915_gem_gtt_prepare_object(obj);
>   	if (ret)
> @@ -2442,7 +2434,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>   
>   	if (i915_gem_object_is_tiled(obj) &&
>   	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
> -		i915_gem_object_pin_pages(obj);
> +		__i915_gem_object_pin_pages(obj);
>   
>   	return 0;
>   
> @@ -2474,8 +2466,7 @@ err_pages:
>    * either as a result of memory pressure (reaping pages under the shrinker)
>    * or as the object is itself released.
>    */
> -int
> -i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> +int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>   	const struct drm_i915_gem_object_ops *ops = obj->ops;
> @@ -2483,24 +2474,25 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   
> -	if (obj->pages)
> +	if (obj->mm.pages)
>   		return 0;
>   
> -	if (obj->madv != I915_MADV_WILLNEED) {
> +	if (obj->mm.madv != I915_MADV_WILLNEED) {
>   		DRM_DEBUG("Attempting to obtain a purgeable object\n");
> +		__i915_gem_object_unpin_pages(obj);
>   		return -EFAULT;
>   	}
>   
> -	BUG_ON(obj->pages_pin_count);
> -
>   	ret = ops->get_pages(obj);
> -	if (ret)
> +	if (ret) {
> +		__i915_gem_object_unpin_pages(obj);
>   		return ret;
> +	}
>   
>   	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>   
> -	obj->get_page.sg_pos = obj->pages->sgl;
> -	obj->get_page.sg_idx = 0;
> +	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> +	obj->mm.get_page.sg_idx = 0;
>   
>   	return 0;
>   }
> @@ -2510,7 +2502,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
>   				 enum i915_map_type type)
>   {
>   	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
> -	struct sg_table *sgt = obj->pages;
> +	struct sg_table *sgt = obj->mm.pages;
>   	struct sgt_iter sgt_iter;
>   	struct page *page;
>   	struct page *stack_pages[32];
> @@ -2564,14 +2556,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ERR_PTR(ret);
>   
> -	i915_gem_object_pin_pages(obj);
> -	pinned = obj->pages_pin_count > 1;
> +	pinned = obj->mm.pages_pin_count > 1;
>   
> -	ptr = ptr_unpack_bits(obj->mapping, has_type);
> +	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
>   	if (ptr && has_type != type) {
>   		if (pinned) {
>   			ret = -EBUSY;
> @@ -2583,7 +2574,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   		else
>   			kunmap(kmap_to_page(ptr));
>   
> -		ptr = obj->mapping = NULL;
> +		ptr = obj->mm.mapping = NULL;
>   	}
>   
>   	if (!ptr) {
> @@ -2593,7 +2584,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   			goto err;
>   		}
>   
> -		obj->mapping = ptr_pack_bits(ptr, type);
> +		obj->mm.mapping = ptr_pack_bits(ptr, type);
>   	}
>   
>   	return ptr;
> @@ -3030,7 +3021,7 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		goto destroy;
>   
>   	GEM_BUG_ON(obj->bind_count == 0);
> -	GEM_BUG_ON(!obj->pages);
> +	GEM_BUG_ON(!obj->mm.pages);
>   
>   	if (i915_vma_is_map_and_fenceable(vma)) {
>   		/* release the fence reg _after_ flushing */
> @@ -3054,7 +3045,7 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	drm_mm_remove_node(&vma->node);
>   	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
>   
> -	if (vma->pages != obj->pages) {
> +	if (vma->pages != obj->mm.pages) {
>   		GEM_BUG_ON(!vma->pages);
>   		sg_free_table(vma->pages);
>   		kfree(vma->pages);
> @@ -3186,12 +3177,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   		return -E2BIG;
>   	}
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	if (flags & PIN_OFFSET_FIXED) {
>   		u64 offset = flags & PIN_OFFSET_MASK;
>   		if (offset & (alignment - 1) || offset > end - size) {
> @@ -3270,7 +3259,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   	 * to GPU, and we can ignore the cache flush because it'll happen
>   	 * again at bind time.
>   	 */
> -	if (obj->pages == NULL)
> +	if (!obj->mm.pages)
>   		return false;
>   
>   	/*
> @@ -3294,7 +3283,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   	}
>   
>   	trace_i915_gem_object_clflush(obj);
> -	drm_clflush_sg(obj->pages);
> +	drm_clflush_sg(obj->mm.pages);
>   	obj->cache_dirty = false;
>   
>   	return true;
> @@ -3408,7 +3397,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   	 * continue to assume that the obj remained out of the CPU cached
>   	 * domain.
>   	 */
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> @@ -3432,7 +3421,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   	if (write) {
>   		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
>   		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
> -		obj->dirty = 1;
> +		obj->mm.dirty = true;
>   	}
>   
>   	trace_i915_gem_object_change_domain(obj,
> @@ -3441,6 +3430,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   
>   	/* And bump the LRU for this access */
>   	i915_gem_object_bump_inactive_ggtt(obj);
> +	i915_gem_object_unpin_pages(obj);
>   
>   	return 0;
>   }
> @@ -4210,23 +4200,23 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>   		goto unlock;
>   	}
>   
> -	if (obj->pages &&
> +	if (obj->mm.pages &&
>   	    i915_gem_object_is_tiled(obj) &&
>   	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
> -		if (obj->madv == I915_MADV_WILLNEED)
> -			i915_gem_object_unpin_pages(obj);
> +		if (obj->mm.madv == I915_MADV_WILLNEED)
> +			__i915_gem_object_unpin_pages(obj);
>   		if (args->madv == I915_MADV_WILLNEED)
> -			i915_gem_object_pin_pages(obj);
> +			__i915_gem_object_pin_pages(obj);
>   	}
>   
> -	if (obj->madv != __I915_MADV_PURGED)
> -		obj->madv = args->madv;
> +	if (obj->mm.madv != __I915_MADV_PURGED)
> +		obj->mm.madv = args->madv;
>   
>   	/* if the object is no longer attached, discard its backing storage */
> -	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
> +	if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
>   		i915_gem_object_truncate(obj);
>   
> -	args->retained = obj->madv != __I915_MADV_PURGED;
> +	args->retained = obj->mm.madv != __I915_MADV_PURGED;
>   
>   	i915_gem_object_put(obj);
>   unlock:
> @@ -4252,9 +4242,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	obj->ops = ops;
>   
>   	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
> -	obj->madv = I915_MADV_WILLNEED;
> -	INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
> -	mutex_init(&obj->get_page.lock);
> +
> +	obj->mm.madv = I915_MADV_WILLNEED;
> +	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL);
> +	mutex_init(&obj->mm.get_page.lock);
>   
>   	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
>   }
> @@ -4331,7 +4322,7 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
>   	 * back the contents from the GPU.
>   	 */
>   
> -	if (obj->madv != I915_MADV_WILLNEED)
> +	if (obj->mm.madv != I915_MADV_WILLNEED)
>   		return false;
>   
>   	if (obj->base.filp == NULL)
> @@ -4373,32 +4364,27 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>   	}
>   	GEM_BUG_ON(obj->bind_count);
>   
> -	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
> -	 * before progressing. */
> -	if (obj->stolen)
> -		i915_gem_object_unpin_pages(obj);
> -
>   	WARN_ON(atomic_read(&obj->frontbuffer_bits));
>   
> -	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
> +	if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED &&
>   	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
>   	    i915_gem_object_is_tiled(obj))
> -		i915_gem_object_unpin_pages(obj);
> +		__i915_gem_object_unpin_pages(obj);
>   
> -	if (WARN_ON(obj->pages_pin_count))
> -		obj->pages_pin_count = 0;
> +	if (obj->ops->release)
> +		obj->ops->release(obj);
> +
> +	if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
> +		obj->mm.pages_pin_count = 0;
>   	if (discard_backing_storage(obj))
> -		obj->madv = I915_MADV_DONTNEED;
> -	i915_gem_object_put_pages(obj);
> +		obj->mm.madv = I915_MADV_DONTNEED;
> +	__i915_gem_object_put_pages(obj);
>   
> -	BUG_ON(obj->pages);
> +	GEM_BUG_ON(obj->mm.pages);
>   
>   	if (obj->base.import_attach)
>   		drm_prime_gem_destroy(&obj->base, NULL);
>   
> -	if (obj->ops->release)
> -		obj->ops->release(obj);
> -
>   	drm_gem_object_release(&obj->base);
>   	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>   
> @@ -4935,14 +4921,13 @@ i915_gem_object_create_from_data(struct drm_device *dev,
>   	if (ret)
>   		goto fail;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		goto fail;
>   
> -	i915_gem_object_pin_pages(obj);
> -	sg = obj->pages;
> +	sg = obj->mm.pages;
>   	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
> -	obj->dirty = 1;		/* Backing store is now out of date */
> +	obj->mm.dirty = true; /* Backing store is now out of date */
>   	i915_gem_object_unpin_pages(obj);
>   
>   	if (WARN_ON(bytes != size)) {
> @@ -4962,7 +4947,7 @@ static struct scatterlist *
>   __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>   			 unsigned long n, unsigned int *offset)
>   {
> -	struct scatterlist *sg = obj->pages->sgl;
> +	struct scatterlist *sg = obj->mm.pages->sgl;
>   	int idx = 0;
>   
>   	while (idx + __sg_page_count(sg) <= n) {
> @@ -4980,11 +4965,11 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>   		       unsigned long n,
>   		       unsigned int *offset)
>   {
> -	struct i915_gem_object_page_iter *iter = &obj->get_page;
> +	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
>   	struct scatterlist *sg;
>   
>   	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
> -	GEM_BUG_ON(obj->pages_pin_count == 0);
> +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>   
>   	if (n < READ_ONCE(iter->sg_idx))
>   		goto lookup;
> @@ -5058,7 +5043,7 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>   	struct page *page;
>   
>   	page = i915_gem_object_get_page(obj, n);
> -	if (!obj->dirty)
> +	if (!obj->mm.dirty)
>   		set_page_dirty(page);
>   
>   	return page;
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 3934c9103cf2..6b656822bb3a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -131,11 +131,10 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   			return obj;
>   	}
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ERR_PTR(ret);
>   
>   	list_move_tail(&obj->batch_pool_link, list);
> -	i915_gem_object_pin_pages(obj);
>   	return obj;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 97c9d68b45df..10441dc72e73 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -48,12 +48,10 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>   	if (ret)
>   		goto err;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		goto err_unlock;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	/* Copy sg so that we make an independent mapping */
>   	st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
>   	if (st == NULL) {
> @@ -61,13 +59,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>   		goto err_unpin;
>   	}
>   
> -	ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
> +	ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
>   	if (ret)
>   		goto err_free;
>   
> -	src = obj->pages->sgl;
> +	src = obj->mm.pages->sgl;
>   	dst = st->sgl;
> -	for (i = 0; i < obj->pages->nents; i++) {
> +	for (i = 0; i < obj->mm.pages->nents; i++) {
>   		sg_set_page(dst, sg_page(src), src->length, 0);
>   		dst = sg_next(dst);
>   		src = sg_next(src);
> @@ -299,14 +297,14 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
>   	if (IS_ERR(sg))
>   		return PTR_ERR(sg);
>   
> -	obj->pages = sg;
> +	obj->mm.pages = sg;
>   	return 0;
>   }
>   
>   static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
>   {
>   	dma_buf_unmap_attachment(obj->base.import_attach,
> -				 obj->pages, DMA_BIDIRECTIONAL);
> +				 obj->mm.pages, DMA_BIDIRECTIONAL);
>   }
>   
>   static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0deecd4e3b6c..062e098b0909 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1290,7 +1290,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> -	obj->dirty = 1; /* be paranoid  */
> +	obj->mm.dirty = true; /* be paranoid  */
>   
>   	/* Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
> index 8df1fa7234e8..398856160656 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence.c
> @@ -648,7 +648,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
>   		return;
>   
>   	i = 0;
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
>   		char new_bit_17 = page_to_phys(page) >> 17;
>   		if ((new_bit_17 & 0x1) !=
>   		    (test_bit(i, obj->bit_17) != 0)) {
> @@ -687,7 +687,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
>   
>   	i = 0;
>   
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
>   		if (page_to_phys(page) & (1 << 17))
>   			__set_bit(i, obj->bit_17);
>   		else
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7e0c98576e72..3c711e0b8a3f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -175,7 +175,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
>   {
>   	u32 pte_flags = 0;
>   
> -	vma->pages = vma->obj->pages;
> +	vma->pages = vma->obj->mm.pages;
>   
>   	/* Currently applicable only to VLV */
>   	if (vma->obj->gt_ro)
> @@ -2295,7 +2295,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
>   int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>   {
>   	if (!dma_map_sg(&obj->base.dev->pdev->dev,
> -			obj->pages->sgl, obj->pages->nents,
> +			obj->mm.pages->sgl, obj->mm.pages->nents,
>   			PCI_DMA_BIDIRECTIONAL))
>   		return -ENOSPC;
>   
> @@ -2685,7 +2685,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
>   		}
>   	}
>   
> -	dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents,
> +	dma_unmap_sg(kdev, obj->mm.pages->sgl, obj->mm.pages->nents,
>   		     PCI_DMA_BIDIRECTIONAL);
>   }
>   
> @@ -3526,7 +3526,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
>   
>   	/* Populate source page list from the object. */
>   	i = 0;
> -	for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
> +	for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
>   		page_addr_list[i++] = dma_addr;
>   
>   	GEM_BUG_ON(i != n_pages);
> @@ -3617,7 +3617,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>   		return 0;
>   
>   	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
> -		vma->pages = vma->obj->pages;
> +		vma->pages = vma->obj->mm.pages;
>   	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
>   		vma->pages =
>   			intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
> index 534a61c1aba2..24eb347f9e0a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_internal.c
> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> @@ -28,10 +28,11 @@
>   
>   static void internal_free_pages(struct sg_table *st)
>   {
> -	struct sg_page_iter sg_iter;
> +	struct sgt_iter iter;
> +	struct page *page;
>   
> -	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
> -		put_page(sg_page_iter_page(&sg_iter));
> +	for_each_sgt_page(page, iter, st)
> +		put_page(page);
>   
>   	sg_free_table(st);
>   	kfree(st);
> @@ -94,10 +95,10 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   	if (!swiotlb_nr_tbl())
>   #endif
>   		sg_mark_end(sg);
> -	obj->pages = st;
> +	obj->mm.pages = st;
>   
>   	if (i915_gem_gtt_prepare_object(obj)) {
> -		obj->pages = NULL;
> +		obj->mm.pages = NULL;
>   		goto err;
>   	}
>   
> @@ -106,7 +107,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   	 * and the caller is expected to repopulate - the contents of this
>   	 * object are only valid whilst active and pinned.
>   	 */
> -	obj->madv = I915_MADV_DONTNEED;
> +	obj->mm.madv = I915_MADV_DONTNEED;
>   	return 0;
>   
>   err:
> @@ -117,10 +118,10 @@ err:
>   
>   static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
>   {
> -	internal_free_pages(obj->pages);
> +	internal_free_pages(obj->mm.pages);
>   
> -	obj->dirty = 0;
> -	obj->madv = I915_MADV_WILLNEED;
> +	obj->mm.dirty = false;
> +	obj->mm.madv = I915_MADV_WILLNEED;
>   }
>   
>   static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index adb00c5125ad..64d9712ec789 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -230,7 +230,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
>   		return 0;
>   
>   	/* Recreate the page after shrinking */
> -	if (!so->vma->obj->pages)
> +	if (!so->vma->obj->mm.pages)
>   		so->batch_offset = -1;
>   
>   	ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 1c237d02f30b..e0e6d68fe470 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -78,7 +78,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>   	 * to the GPU, simply unbinding from the GPU is not going to succeed
>   	 * in releasing our pin count on the pages themselves.
>   	 */
> -	if (obj->pages_pin_count > obj->bind_count)
> +	if (obj->mm.pages_pin_count > obj->bind_count)
>   		return false;
>   
>   	if (any_vma_pinned(obj))
> @@ -88,7 +88,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>   	 * discard the contents (because the user has marked them as being
>   	 * purgeable) or if we can move their contents out to swap.
>   	 */
> -	return swap_available() || obj->madv == I915_MADV_DONTNEED;
> +	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
>   }
>   
>   /**
> @@ -175,11 +175,11 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   			list_move_tail(&obj->global_list, &still_in_list);
>   
>   			if (flags & I915_SHRINK_PURGEABLE &&
> -			    obj->madv != I915_MADV_DONTNEED)
> +			    obj->mm.madv != I915_MADV_DONTNEED)
>   				continue;
>   
>   			if (flags & I915_SHRINK_VMAPS &&
> -			    !is_vmalloc_addr(obj->mapping))
> +			    !is_vmalloc_addr(obj->mm.mapping))
>   				continue;
>   
>   			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
> @@ -193,7 +193,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   
>   			/* For the unbound phase, this should be a no-op! */
>   			i915_gem_object_unbind(obj);
> -			if (i915_gem_object_put_pages(obj) == 0)
> +			if (__i915_gem_object_put_pages(obj) == 0)
>   				count += obj->base.size >> PAGE_SHIFT;
>   
>   			i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 24bad4e60ef0..6d139d6b4609 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -554,16 +554,17 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
>   static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
>   {
>   	/* Should only be called during free */
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
> -
>   static void
>   i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>   
> +	__i915_gem_object_unpin_pages(obj);
> +
>   	if (obj->stolen) {
>   		i915_gem_stolen_remove_node(dev_priv, obj->stolen);
>   		kfree(obj->stolen);
> @@ -589,15 +590,16 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>   	drm_gem_private_object_init(dev, &obj->base, stolen->size);
>   	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
>   
> -	obj->pages = i915_pages_create_for_stolen(dev,
> -						  stolen->start, stolen->size);
> -	if (obj->pages == NULL)
> +	obj->mm.pages = i915_pages_create_for_stolen(dev,
> +						     stolen->start,
> +						     stolen->size);
> +	if (!obj->mm.pages)
>   		goto cleanup;
>   
> -	obj->get_page.sg_pos = obj->pages->sgl;
> -	obj->get_page.sg_idx = 0;
> +	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> +	obj->mm.get_page.sg_idx = 0;
>   
> -	i915_gem_object_pin_pages(obj);
> +	__i915_gem_object_pin_pages(obj);
>   	obj->stolen = stolen;
>   
>   	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
> @@ -717,14 +719,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>   		goto err;
>   	}
>   
> -	vma->pages = obj->pages;
> +	vma->pages = obj->mm.pages;
>   	vma->flags |= I915_VMA_GLOBAL_BIND;
>   	__i915_vma_set_map_and_fenceable(vma);
>   	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
>   	obj->bind_count++;
>   
>   	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
> -	i915_gem_object_pin_pages(obj);
> +	__i915_gem_object_pin_pages(obj);
>   
>   	return obj;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index a14b1e3d4c78..7286de7bd25e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -260,13 +260,13 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>   		if (!err) {
>   			struct i915_vma *vma;
>   
> -			if (obj->pages &&
> -			    obj->madv == I915_MADV_WILLNEED &&
> +			if (obj->mm.pages &&
> +			    obj->mm.madv == I915_MADV_WILLNEED &&
>   			    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
>   				if (args->tiling_mode == I915_TILING_NONE)
> -					i915_gem_object_unpin_pages(obj);
> +					__i915_gem_object_unpin_pages(obj);
>   				if (!i915_gem_object_is_tiled(obj))
> -					i915_gem_object_pin_pages(obj);
> +					__i915_gem_object_pin_pages(obj);
>   			}
>   
>   			list_for_each_entry(vma, &obj->vma_list, obj_link) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index cb95789da76e..0fdd1d6723d1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -73,10 +73,10 @@ static void cancel_userptr(struct work_struct *work)
>   	/* Cancel any active worker and force us to re-evaluate gup */
>   	obj->userptr.work = NULL;
>   
> -	if (obj->pages != NULL) {
> +	if (obj->mm.pages) {
>   		/* We are inside a kthread context and can't be interrupted */
>   		WARN_ON(i915_gem_object_unbind(obj));
> -		WARN_ON(i915_gem_object_put_pages(obj));
> +		WARN_ON(__i915_gem_object_put_pages(obj));
>   	}
>   
>   	i915_gem_object_put(obj);
> @@ -432,15 +432,15 @@ __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
>   {
>   	int ret;
>   
> -	ret = st_set_pages(&obj->pages, pvec, num_pages);
> +	ret = st_set_pages(&obj->mm.pages, pvec, num_pages);
>   	if (ret)
>   		return ret;
>   
>   	ret = i915_gem_gtt_prepare_object(obj);
>   	if (ret) {
> -		sg_free_table(obj->pages);
> -		kfree(obj->pages);
> -		obj->pages = NULL;
> +		sg_free_table(obj->mm.pages);
> +		kfree(obj->mm.pages);
> +		obj->mm.pages = NULL;
>   	}
>   
>   	return ret;
> @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   			if (ret == 0) {
>   				list_add_tail(&obj->global_list,
>   					      &to_i915(dev)->mm.unbound_list);
> -				obj->get_page.sg_pos = obj->pages->sgl;
> -				obj->get_page.sg_idx = 0;
> +				obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> +				obj->mm.get_page.sg_idx = 0;
>   				pinned = 0;
>   			}
>   		}
> @@ -668,22 +668,22 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
>   	BUG_ON(obj->userptr.work != NULL);
>   	__i915_gem_userptr_set_active(obj, false);
>   
> -	if (obj->madv != I915_MADV_WILLNEED)
> -		obj->dirty = 0;
> +	if (obj->mm.madv != I915_MADV_WILLNEED)
> +		obj->mm.dirty = false;
>   
>   	i915_gem_gtt_finish_object(obj);
>   
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> -		if (obj->dirty)
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +		if (obj->mm.dirty)
>   			set_page_dirty(page);
>   
>   		mark_page_accessed(page);
>   		put_page(page);
>   	}
> -	obj->dirty = 0;
> +	obj->mm.dirty = false;
>   
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
>   static void
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6c22be28ba01..628d5cdf9200 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -881,8 +881,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   	err->write_domain = obj->base.write_domain;
>   	err->fence_reg = vma->fence ? vma->fence->id : -1;
>   	err->tiling = i915_gem_object_get_tiling(obj);
> -	err->dirty = obj->dirty;
> -	err->purgeable = obj->madv != I915_MADV_WILLNEED;
> +	err->dirty = obj->mm.dirty;
> +	err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
>   	err->userptr = obj->userptr.mm != NULL;
>   	err->cache_level = obj->cache_level;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 02c48d390148..6acecfc41f5e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -744,7 +744,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>   	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
>   		i915_ggtt_offset(ce->ring->vma);
>   
> -	ce->state->obj->dirty = true;
> +	ce->state->obj->mm.dirty = true;
>   
>   	/* Invalidate GuC TLB. */
>   	if (i915.enable_guc_submission) {
> @@ -2042,7 +2042,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>   		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
>   		return ret;
>   	}
> -	ctx_obj->dirty = true;
> +	ctx_obj->mm.dirty = true;
>   
>   	/* The second page of the context object contains some fields which must
>   	 * be set up prior to the first execution. */
> @@ -2179,7 +2179,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>   			reg[CTX_RING_HEAD+1] = 0;
>   			reg[CTX_RING_TAIL+1] = 0;
>   
> -			ce->state->obj->dirty = true;
> +			ce->state->obj->mm.dirty = true;
>   			i915_gem_object_unpin_map(ce->state->obj);
>   
>   			ce->ring->head = ce->ring->tail = 0;

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [01/42] drm/i915: Allow disabling error capture (rev2)
  2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
                   ` (43 preceding siblings ...)
  2016-10-10  7:23 ` Patchwork
@ 2016-10-10 15:31 ` Patchwork
  44 siblings, 0 replies; 107+ messages in thread
From: Patchwork @ 2016-10-10 15:31 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: series starting with [01/42] drm/i915: Allow disabling error capture (rev2)
URL   : https://patchwork.freedesktop.org/series/13436/
State : failure

== Summary ==

  CC      arch/x86/kernel/cpu/capflags.o
  LD      fs/btrfs/btrfs.o
  LD      arch/x86/kernel/cpu/built-in.o
  LD      arch/x86/kernel/built-in.o
  LD      drivers/usb/core/usbcore.o
  LD      fs/btrfs/built-in.o
  LD      drivers/usb/core/built-in.o
  LD      arch/x86/built-in.o
  LD      fs/ext4/ext4.o
  LD      fs/ext4/built-in.o
  LD      drivers/usb/host/built-in.o
  LD      fs/built-in.o
  LD      drivers/usb/built-in.o
  LD      net/ipv6/ipv6.o
  LD      net/ipv6/built-in.o
  LD [M]  drivers/net/ethernet/intel/igb/igb.o
  LD [M]  drivers/net/ethernet/intel/e1000e/e1000e.o
  LD      net/ipv4/built-in.o
  LD      net/core/built-in.o
  LD      net/built-in.o
  LD      drivers/net/ethernet/built-in.o
  LD      drivers/net/built-in.o
scripts/Makefile.build:440: recipe for target 'drivers/gpu/drm/i915' failed
make[3]: *** [drivers/gpu/drm/i915] Error 2
scripts/Makefile.build:440: recipe for target 'drivers/gpu/drm' failed
make[2]: *** [drivers/gpu/drm] Error 2
scripts/Makefile.build:440: recipe for target 'drivers/gpu' failed
make[1]: *** [drivers/gpu] Error 2
Makefile:968: recipe for target 'drivers' failed
make: *** [drivers] Error 2

Full logs at /archive/deploy/logs/Patchwork_2658

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
  2016-10-07  9:46 ` [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
  2016-10-07 10:12   ` Joonas Lahtinen
  2016-10-07 13:36   ` John Harrison
@ 2016-10-11  9:32   ` Tvrtko Ursulin
  2016-10-11 10:15     ` John Harrison
  2 siblings, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-11  9:32 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 10:46, Chris Wilson wrote:
> A while ago we switched from a contiguous array of pages into an sglist,
> for that was both more convenient for mapping to hardware and avoided
> the requirement for a vmalloc array of pages on every object. However,
> certain GEM API calls (like pwrite, pread as well as performing
> relocations) do desire access to individual struct pages. A quick hack
> was to introduce a cache of the last access such that finding the
> following page was quick - this works so long as the caller desired
> sequential access. Walking backwards, or multiple callers, still hits a
> slow linear search for each page. One solution is to store each
> successful lookup in a radix tree.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h         |  57 ++++--------
>   drivers/gpu/drm/i915/i915_gem.c         | 149 ++++++++++++++++++++++++++++----
>   drivers/gpu/drm/i915/i915_gem_stolen.c  |   4 +-
>   drivers/gpu/drm/i915/i915_gem_userptr.c |   4 +-
>   4 files changed, 154 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bad97f1e5265..a96b446d8db4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2278,9 +2278,12 @@ struct drm_i915_gem_object {
>   
>   	struct sg_table *pages;
>   	int pages_pin_count;
> -	struct get_page {
> -		struct scatterlist *sg;
> -		int last;
> +	struct i915_gem_object_page_iter {
> +		struct scatterlist *sg_pos;
> +		unsigned long sg_idx;
> +
> +		struct radix_tree_root radix;
> +		struct mutex lock;
>   	} get_page;
>   	void *mapping;
>   
> @@ -3168,45 +3171,21 @@ static inline int __sg_page_count(struct scatterlist *sg)
>   	return sg->length >> PAGE_SHIFT;
>   }
>   
> -struct page *
> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
> -
> -static inline dma_addr_t
> -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
> -{
> -	if (n < obj->get_page.last) {
> -		obj->get_page.sg = obj->pages->sgl;
> -		obj->get_page.last = 0;
> -	}
> -
> -	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
> -		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
> -		if (unlikely(sg_is_chain(obj->get_page.sg)))
> -			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
> -	}
> -
> -	return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
> -}
> -
> -static inline struct page *
> -i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
> -{
> -	if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
> -		return NULL;
> +struct scatterlist *
> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> +		       unsigned long n, unsigned int *offset);
>   
> -	if (n < obj->get_page.last) {
> -		obj->get_page.sg = obj->pages->sgl;
> -		obj->get_page.last = 0;
> -	}
> +struct page *
> +i915_gem_object_get_page(struct drm_i915_gem_object *obj,
> +			 unsigned long n);
>   
> -	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
> -		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
> -		if (unlikely(sg_is_chain(obj->get_page.sg)))
> -			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
> -	}
> +struct page *
> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
> +			       unsigned long n);
>   
> -	return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last);
> -}
> +dma_addr_t
> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
> +				unsigned long n);
>   
>   static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ada837e393a7..af7d51f16658 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2292,6 +2292,15 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>   	kfree(obj->pages);
>   }
>   
> +static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
> +{
> +	struct radix_tree_iter iter;
> +	void **slot;
> +
> +	radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
> +		radix_tree_delete(&obj->get_page.radix, iter.index);
> +}
> +
>   int
>   i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   {
> @@ -2324,6 +2333,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   		obj->mapping = NULL;
>   	}
>   
> +	__i915_gem_object_reset_page_iter(obj);
> +
>   	ops->put_pages(obj);
>   	obj->pages = NULL;
>   
> @@ -2488,8 +2499,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   
>   	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>   
> -	obj->get_page.sg = obj->pages->sgl;
> -	obj->get_page.last = 0;
> +	obj->get_page.sg_pos = obj->pages->sgl;
> +	obj->get_page.sg_idx = 0;
>   
>   	return 0;
>   }
> @@ -4242,6 +4253,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   
>   	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
>   	obj->madv = I915_MADV_WILLNEED;
> +	INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
> +	mutex_init(&obj->get_page.lock);
>   
>   	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
>   }
> @@ -4904,21 +4917,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>   	}
>   }
>   
> -/* Like i915_gem_object_get_page(), but mark the returned page dirty */
> -struct page *
> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
> -{
> -	struct page *page;
> -
> -	/* Only default objects have per-page dirty tracking */
> -	if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
> -		return NULL;
> -
> -	page = i915_gem_object_get_page(obj, n);
> -	set_page_dirty(page);
> -	return page;
> -}
> -
>   /* Allocate a new GEM object and fill it with the supplied data */
>   struct drm_i915_gem_object *
>   i915_gem_object_create_from_data(struct drm_device *dev,
> @@ -4959,3 +4957,120 @@ fail:
>   	i915_gem_object_put(obj);
>   	return ERR_PTR(ret);
>   }
> +
> +static struct scatterlist *
> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> +			 unsigned long n, unsigned int *offset)
> +{
> +	struct scatterlist *sg = obj->pages->sgl;
> +	int idx = 0;
> +
> +	while (idx + __sg_page_count(sg) <= n) {
> +		idx += __sg_page_count(sg++);
> +		if (unlikely(sg_is_chain(sg)))
> +			sg = sg_chain_ptr(sg);
> +	}
> +
> +	*offset = n - idx;
> +	return sg;
> +}
> +
> +struct scatterlist *
> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
> +		       unsigned long n,
> +		       unsigned int *offset)
> +{
> +	struct i915_gem_object_page_iter *iter = &obj->get_page;
> +	struct scatterlist *sg;
> +
> +	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
> +	GEM_BUG_ON(obj->pages_pin_count == 0);
> +
> +	if (n < READ_ONCE(iter->sg_idx))
> +		goto lookup;
> +

Ok, so on lookup of "n" you build the radix tree for all sg entries from 
zero to n. Therefore for a lookup below the current index, there must be 
a radix tree entry, correct?

> +	mutex_lock(&iter->lock);
> +	if (n >= iter->sg_idx &&
> +	    n < iter->sg_idx + __sg_page_count(iter->sg_pos)) {
> +		sg = iter->sg_pos;
> +		*offset = n - iter->sg_idx;
> +		mutex_unlock(&iter->lock);
> +		return sg;
> +	}
> +
> +	while (iter->sg_idx <= n) {
> +		unsigned long exception;
> +		unsigned int count, i;
> +
> +		radix_tree_insert(&iter->radix,
> +				  iter->sg_idx,
> +				  iter->sg_pos);
> +
> +		exception =
> +			RADIX_TREE_EXCEPTIONAL_ENTRY |
> +			iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
> +		count = __sg_page_count(iter->sg_pos);
> +		for (i = 1; i < count; i++)
> +			radix_tree_insert(&iter->radix,
> +					  iter->sg_idx + i,
> +					  (void *)exception);
> +
> +		iter->sg_idx += count;
> +		iter->sg_pos = __sg_next(iter->sg_pos);
> +	}
> +	mutex_unlock(&iter->lock);

Why not avoid falling through the lookup and return the previous sg from 
here?

> +
> +lookup:
> +	rcu_read_lock();
> +	sg = radix_tree_lookup(&iter->radix, n);
> +	rcu_read_unlock();
> +
> +	if (unlikely(!sg))
> +		return __i915_gem_object_get_sg(obj, n, offset);
> +

Considering the first observation from above, when does it then happen 
that there is no radix tree entry at this point?

Or in other words, maybe some comments should be added to this patch - 
there are none and it is not that trivial.

> +	*offset = 0;
> +	if (unlikely(radix_tree_exception(sg))) {
> +		unsigned long base =
> +			(unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
> +		sg = radix_tree_lookup(&iter->radix, base);
> +		*offset = n - base;
> +	}
> +	return sg;
> +}
> +
> +struct page *
> +i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned long n)
> +{
> +	struct scatterlist *sg;
> +	unsigned int offset;
> +
> +	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
> +
> +	sg = i915_gem_object_get_sg(obj, n, &offset);
> +	return nth_page(sg_page(sg), offset);
> +}
> +
> +/* Like i915_gem_object_get_page(), but mark the returned page dirty */
> +struct page *
> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
> +			       unsigned long n)
> +{
> +	struct page *page;
> +
> +	page = i915_gem_object_get_page(obj, n);
> +	if (!obj->dirty)
> +		set_page_dirty(page);
> +
> +	return page;
> +}
> +
> +dma_addr_t
> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
> +				unsigned long n)
> +{
> +	struct scatterlist *sg;
> +	unsigned int offset;
> +
> +	sg = i915_gem_object_get_sg(obj, n, &offset);
> +	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 59989e8ee5dc..24bad4e60ef0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -594,8 +594,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>   	if (obj->pages == NULL)
>   		goto cleanup;
>   
> -	obj->get_page.sg = obj->pages->sgl;
> -	obj->get_page.last = 0;
> +	obj->get_page.sg_pos = obj->pages->sgl;
> +	obj->get_page.sg_idx = 0;
>   
>   	i915_gem_object_pin_pages(obj);
>   	obj->stolen = stolen;
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 1c891b92ac80..cb95789da76e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   			if (ret == 0) {
>   				list_add_tail(&obj->global_list,
>   					      &to_i915(dev)->mm.unbound_list);
> -				obj->get_page.sg = obj->pages->sgl;
> -				obj->get_page.last = 0;
> +				obj->get_page.sg_pos = obj->pages->sgl;
> +				obj->get_page.sg_idx = 0;
>   				pinned = 0;
>   			}
>   		}

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 23/42] drm/i915: Move object release to a freelist + worker
  2016-10-07  9:46 ` [PATCH 23/42] drm/i915: Move object release to a freelist + worker Chris Wilson
@ 2016-10-11  9:52   ` John Harrison
  0 siblings, 0 replies; 107+ messages in thread
From: John Harrison @ 2016-10-11  9:52 UTC (permalink / raw)
  To: intel-gfx

On 07/10/2016 10:46, Chris Wilson wrote:
> We want to hide the latency of releasing objects and their backing
> storage from the submission, so we move the actual free to a worker.
> This allows us to switch to struct_mutex freeing of the object in the
> next patch.
>
> Furthermore, if we know that the object we are dereferencing remains valid
> for the duration of our access, we can forgo the usual synchronisation
> barriers and atomic reference counting. To ensure this we defer freeing
> an object til after an RCU grace period, such that any lookup of the
> object within an RCU read critical section will remain valid until
> after we exit that critical section. We also employ this delay for
> rate-limiting the serialisation on reallocation - we have to slow down
> object creation in order to prevent resource starvation (in particular,
> files).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  15 ++-
>   drivers/gpu/drm/i915/i915_drv.c          |   3 +
>   drivers/gpu/drm/i915/i915_drv.h          |  44 +++++++-
>   drivers/gpu/drm/i915/i915_gem.c          | 166 +++++++++++++++++++++----------
>   drivers/gpu/drm/i915/i915_gem_shrinker.c |  14 ++-
>   drivers/gpu/drm/i915/i915_gem_tiling.c   |  21 ++--
>   6 files changed, 193 insertions(+), 70 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index b807ddf65e04..144013875513 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4872,10 +4872,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
>   #define DROP_BOUND 0x2
>   #define DROP_RETIRE 0x4
>   #define DROP_ACTIVE 0x8
> -#define DROP_ALL (DROP_UNBOUND | \
> -		  DROP_BOUND | \
> -		  DROP_RETIRE | \
> -		  DROP_ACTIVE)
> +#define DROP_FREED 0x10
> +#define DROP_ALL (DROP_UNBOUND	| \
> +		  DROP_BOUND	| \
> +		  DROP_RETIRE	| \
> +		  DROP_ACTIVE	| \
> +		  DROP_FREED)
>   static int
>   i915_drop_caches_get(void *data, u64 *val)
>   {
> @@ -4919,6 +4921,11 @@ i915_drop_caches_set(void *data, u64 val)
>   unlock:
>   	mutex_unlock(&dev->struct_mutex);
>   
> +	if (val & DROP_FREED) {
> +		synchronize_rcu();
> +		flush_work(&dev_priv->mm.free_work);
> +	}
> +
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 89d322215c84..f7d48f97993d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -563,6 +563,9 @@ static void i915_gem_fini(struct drm_device *dev)
>   	i915_gem_context_fini(dev);
>   	mutex_unlock(&dev->struct_mutex);
>   
> +	synchronize_rcu();
> +	flush_work(&dev_priv->mm.free_work);
> +
>   	WARN_ON(!list_empty(&to_i915(dev)->context_list));
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7e9df190c891..e79a5cb78b5d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1354,11 +1354,17 @@ struct i915_gem_mm {
>   	struct list_head bound_list;
>   	/**
>   	 * List of objects which are not bound to the GTT (thus
> -	 * are idle and not used by the GPU) but still have
> -	 * (presumably uncached) pages still attached.
> +	 * are idle and not used by the GPU). These objects may or may
> +	 * not actually have any pages attached.
>   	 */
>   	struct list_head unbound_list;
>   
> +	/**
> +	 * List of objects which are pending destruction.
> +	 */
> +	struct llist_head free_list;
> +	struct work_struct free_work;
> +
>   	/** Usable portion of the GTT for GEM */
>   	unsigned long stolen_base; /* limited to low memory (32-bit) */
>   
> @@ -2214,6 +2220,10 @@ struct drm_i915_gem_object {
>   	/** Stolen memory for this object, instead of being backed by shmem. */
>   	struct drm_mm_node *stolen;
>   	struct list_head global_list;
> +	union {
> +		struct rcu_head rcu;
> +		struct llist_node freed;
> +	};
>   
>   	/** Used in execbuf to temporarily hold a ref */
>   	struct list_head obj_exec_link;
> @@ -2333,10 +2343,38 @@ to_intel_bo(struct drm_gem_object *gem)
>   	return container_of(gem, struct drm_i915_gem_object, base);
>   }
>   
> +/**
> + * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
> + * @filp: DRM file private date
> + * @handle: userspace handle
> + *
> + * Returns:
> + *
> + * A pointer to the object named by the handle if such exists on @filp, NULL
> + * otherwise. This object is only valid whilst under the RCU read lock, and
> + * note carefully the object may be in the process of being destroyed.
> + */
> +static inline struct drm_i915_gem_object *
> +i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
> +{
> +#ifdef CONFIG_LOCKDEP
> +	WARN_ON(!lock_is_held(&rcu_lock_map));
> +#endif
> +	return idr_find(&file->object_idr, handle);
> +}
> +
>   static inline struct drm_i915_gem_object *
>   i915_gem_object_lookup(struct drm_file *file, u32 handle)
>   {
> -	return to_intel_bo(drm_gem_object_lookup(file, handle));
> +	struct drm_i915_gem_object *obj;
> +
> +	rcu_read_lock();
> +	obj = i915_gem_object_lookup_rcu(file, handle);
> +	if (obj && !kref_get_unless_zero(&obj->base.refcount))
> +		obj = NULL;
> +	rcu_read_unlock();
> +
> +	return obj;
>   }
>   
>   __deprecated
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 80069f0fa9ac..7cd49dd1d3f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -42,6 +42,7 @@
>   #include <linux/pci.h>
>   #include <linux/dma-buf.h>
>   
> +static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
>   static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
>   static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
>   
> @@ -647,6 +648,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
>   {
>   	struct drm_i915_gem_create *args = data;
>   
> +	i915_gem_flush_free_objects(to_i915(dev));
> +
>   	return i915_gem_create(file, dev,
>   			       args->size, &args->handle);
>   }
> @@ -3469,10 +3472,14 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>   {
>   	struct drm_i915_gem_caching *args = data;
>   	struct drm_i915_gem_object *obj;
> +	int err = 0;
>   
> -	obj = i915_gem_object_lookup(file, args->handle);
> -	if (!obj)
> -		return -ENOENT;
> +	rcu_read_lock();
> +	obj = i915_gem_object_lookup_rcu(file, args->handle);
> +	if (!obj) {
> +		err = -ENOENT;
> +		goto out;
> +	}
>   
>   	switch (obj->cache_level) {
>   	case I915_CACHE_LLC:
> @@ -3488,9 +3495,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
>   		args->caching = I915_CACHING_NONE;
>   		break;
>   	}
> -
> -	i915_gem_object_put_unlocked(obj);
> -	return 0;
> +out:
> +	rcu_read_unlock();
> +	return err;
>   }
>   
>   int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
> @@ -4001,10 +4008,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>   	struct drm_i915_gem_busy *args = data;
>   	struct drm_i915_gem_object *obj;
>   	unsigned long active;
> +	int err;
>   
> -	obj = i915_gem_object_lookup(file, args->handle);
> -	if (!obj)
> -		return -ENOENT;
> +	rcu_read_lock();
> +	obj = i915_gem_object_lookup_rcu(file, args->handle);
> +	if (!obj) {
> +		err = -ENOENT;
> +		goto out;
> +	}
>   
>   	args->busy = 0;
>   	active = __I915_BO_ACTIVE(obj);
> @@ -4034,7 +4045,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>   		 * are busy is not completely reliable - we only guarantee
>   		 * that the object was busy.
>   		 */
> -		rcu_read_lock();
>   
>   		for_each_active(active, idx)
>   			args->busy |= busy_check_reader(&obj->last_read[idx]);
> @@ -4052,12 +4062,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>   		 * the result.
>   		 */
>   		args->busy |= busy_check_writer(&obj->last_write);
> -
> -		rcu_read_unlock();
>   	}
>   
> -	i915_gem_object_put_unlocked(obj);
> -	return 0;
> +out:
> +	rcu_read_unlock();
> +	return err;
>   }
>   
>   int
> @@ -4204,7 +4213,6 @@ struct drm_i915_gem_object *i915_gem_object_create(struct drm_device *dev,
>   
>   fail:
>   	i915_gem_object_free(obj);
> -
>   	return ERR_PTR(ret);
>   }
>   
> @@ -4232,16 +4240,69 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
>   	return atomic_long_read(&obj->base.filp->f_count) == 1;
>   }
>   
> -void i915_gem_free_object(struct drm_gem_object *gem_obj)
> +static void __i915_gem_free_objects(struct drm_i915_private *i915,
> +				    struct llist_node *freed)
>   {
> -	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
> -	struct drm_device *dev = obj->base.dev;
> -	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_vma *vma, *next;
> +	struct drm_i915_gem_object *obj, *on;
>   
> -	intel_runtime_pm_get(dev_priv);
> +	mutex_lock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_get(i915);
> +	llist_for_each_entry(obj, freed, freed) {
> +		struct i915_vma *vma, *vn;
> +
> +		trace_i915_gem_object_destroy(obj);
> +
> +		GEM_BUG_ON(i915_gem_object_is_active(obj));
> +		list_for_each_entry_safe(vma, vn,
> +					 &obj->vma_list, obj_link) {
> +			GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> +			GEM_BUG_ON(i915_vma_is_active(vma));
> +			vma->flags &= ~I915_VMA_PIN_MASK;
> +			i915_vma_close(vma);
> +		}
> +
> +		list_del(&obj->global_list);
> +	}
> +	intel_runtime_pm_put(i915);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	llist_for_each_entry_safe(obj, on, freed, freed) {
> +		GEM_BUG_ON(obj->bind_count);
> +		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
> +
> +		if (obj->ops->release)
> +			obj->ops->release(obj);
> +
> +		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
> +			atomic_set(&obj->mm.pages_pin_count, 0);
> +		__i915_gem_object_put_pages(obj);
> +		GEM_BUG_ON(obj->mm.pages);
> +
> +		if (obj->base.import_attach)
> +			drm_prime_gem_destroy(&obj->base, NULL);
> +
> +		drm_gem_object_release(&obj->base);
> +		i915_gem_info_remove_obj(i915, obj->base.size);
> +
> +		kfree(obj->bit_17);
> +		i915_gem_object_free(obj);
> +	}
> +}
> +
> +static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
> +{
> +	struct llist_node *freed;
>   
> -	trace_i915_gem_object_destroy(obj);
> +	freed = llist_del_all(&i915->mm.free_list);
> +	if (unlikely(freed))
> +		__i915_gem_free_objects(i915, freed);
> +}
> +
> +static void __i915_gem_free_work(struct work_struct *work)
> +{
> +	struct drm_i915_private *i915 =
> +		container_of(work, struct drm_i915_private, mm.free_work);
> +	struct llist_node *freed;
>   
>   	/* All file-owned VMA should have been released by this point through
>   	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
> @@ -4250,42 +4311,44 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>   	 * the GTT either for the user or for scanout). Those VMA still need to
>   	 * unbound now.
>   	 */
> -	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
> -		GEM_BUG_ON(!i915_vma_is_ggtt(vma));
> -		GEM_BUG_ON(i915_vma_is_active(vma));
> -		vma->flags &= ~I915_VMA_PIN_MASK;
> -		i915_vma_close(vma);
> -	}
> -	GEM_BUG_ON(obj->bind_count);
>   
> -	WARN_ON(atomic_read(&obj->frontbuffer_bits));
> +	while ((freed = llist_del_all(&i915->mm.free_list)))
> +		__i915_gem_free_objects(i915, freed);
> +}
>   
> -	if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED &&
> -	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
> -	    i915_gem_object_is_tiled(obj))
> -		__i915_gem_object_unpin_pages(obj);
> +static void __i915_gem_free_object_rcu(struct rcu_head *head)
> +{
> +	struct drm_i915_gem_object *obj =
> +		container_of(head, typeof(*obj), rcu);
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +
> +	/* We can't simply use call_rcu() from i915_gem_free_object()
> +	 * as we need to block whilst unbinding, and the call_rcu
> +	 * task may be called from softirq context. So we take a
> +	 * detour through a worker.
> +	 */
> +	if (llist_add(&obj->freed, &i915->mm.free_list))
> +		schedule_work(&i915->mm.free_work);
> +}
>   
> -	if (obj->ops->release)
> -		obj->ops->release(obj);
> +void i915_gem_free_object(struct drm_gem_object *gem_obj)
> +{
> +	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
>   
> -	if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
> -		atomic_set(&obj->mm.pages_pin_count, 0);
>   	if (discard_backing_storage(obj))
>   		obj->mm.madv = I915_MADV_DONTNEED;
> -	__i915_gem_object_put_pages(obj);
>   
> -	GEM_BUG_ON(obj->mm.pages);
> -
> -	if (obj->base.import_attach)
> -		drm_prime_gem_destroy(&obj->base, NULL);
> -
> -	drm_gem_object_release(&obj->base);
> -	i915_gem_info_remove_obj(dev_priv, obj->base.size);
> -
> -	kfree(obj->bit_17);
> -	i915_gem_object_free(obj);
> +	if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED &&
> +	    to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
> +	    i915_gem_object_is_tiled(obj))
> +		__i915_gem_object_unpin_pages(obj);
>   
> -	intel_runtime_pm_put(dev_priv);
> +	/* Before we free the object, make sure any pure RCU-only
> +	 * read-side critical sections are complete, e.g.
> +	 * i915_gem_busy_ioctl(). For the corresponding synchronized
> +	 * lookup see i915_gem_object_lookup_rcu().
> +	 */
> +	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
>   }
>   
>   void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
> @@ -4334,6 +4397,7 @@ int i915_gem_suspend(struct drm_device *dev)
>   	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
>   	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
>   	flush_delayed_work(&dev_priv->gt.idle_work);
> +	flush_work(&dev_priv->mm.free_work);
>   
>   	/* Assert that we sucessfully flushed all the work and
>   	 * reset the GPU back to its idle, low power state.
> @@ -4630,6 +4694,8 @@ i915_gem_load_init(struct drm_device *dev)
>   				  NULL);
>   
>   	INIT_LIST_HEAD(&dev_priv->context_list);
> +	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
> +	init_llist_head(&dev_priv->mm.free_list);
>   	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
>   	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
>   	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 868da0bae751..fa72473dc7f5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -201,6 +201,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   						       typeof(*obj),
>   						       global_list))) {
>   			list_move_tail(&obj->global_list, &still_in_list);
> +			if (!obj->mm.pages) {
> +				list_del_init(&obj->global_list);
> +				continue;
> +			}
>   
>   			if (flags & I915_SHRINK_PURGEABLE &&
>   			    obj->mm.madv != I915_MADV_DONTNEED)
> @@ -217,8 +221,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   			if (!can_release_pages(obj))
>   				continue;
>   
> -			i915_gem_object_get(obj);
> -
>   			if (unsafe_drop_pages(obj)) {
>   				mutex_lock(&obj->mm.lock);
>   				if (!obj->mm.pages) {
> @@ -227,8 +229,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   				}
>   				mutex_unlock(&obj->mm.lock);
>   			}
> -
> -			i915_gem_object_put(obj);
>   		}
>   		list_splice(&still_in_list, phase->list);
>   	}
> @@ -395,12 +395,18 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
>   	 */
>   	unbound = bound = unevictable = 0;
>   	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
> +		if (!obj->mm.pages)
> +			continue;
> +
>   		if (!can_release_pages(obj))
>   			unevictable += obj->base.size >> PAGE_SHIFT;
>   		else
>   			unbound += obj->base.size >> PAGE_SHIFT;
>   	}
>   	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> +		if (!obj->mm.pages)
> +			continue;
> +
>   		if (!can_release_pages(obj))
>   			unevictable += obj->base.size >> PAGE_SHIFT;
>   		else
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 11e2e0f57ac1..ec48e403adfe 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -328,12 +328,17 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
>   	struct drm_i915_gem_get_tiling *args = data;
>   	struct drm_i915_private *dev_priv = to_i915(dev);
>   	struct drm_i915_gem_object *obj;
> +	int err = -ENOENT;
> +
> +	rcu_read_lock();
> +	obj = i915_gem_object_lookup_rcu(file, args->handle);
> +	if (obj) {
> +		args->tiling_mode =
> +			READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
> +		err = 0;
> +	}
> +	rcu_read_unlock();
>   
Should it not just return -ENOENT here if !obj rather than going through 
the rest of the function and having to modify that to ignore unknown 
tiling modes (because it is doing the switch on uninitialised data)?

> -	obj = i915_gem_object_lookup(file, args->handle);
> -	if (!obj)
> -		return -ENOENT;
> -
> -	args->tiling_mode = READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
>   	switch (args->tiling_mode) {
>   	case I915_TILING_X:
>   		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
> @@ -341,11 +346,10 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
>   	case I915_TILING_Y:
>   		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
>   		break;
> +	default:
>   	case I915_TILING_NONE:
>   		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
>   		break;
> -	default:
> -		DRM_ERROR("unknown tiling mode\n");
>   	}
>   
>   	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
> @@ -358,6 +362,5 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
>   	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
>   		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
>   
> -	i915_gem_object_put_unlocked(obj);
> -	return 0;
> +	return err;
>   }

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 24/42] drm/i915: Treat a framebuffer reference as an active reference whilst shrinking
  2016-10-07  9:46 ` [PATCH 24/42] drm/i915: Treat a framebuffer reference as an active reference whilst shrinking Chris Wilson
@ 2016-10-11  9:54   ` John Harrison
  0 siblings, 0 replies; 107+ messages in thread
From: John Harrison @ 2016-10-11  9:54 UTC (permalink / raw)
  To: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 1270 bytes --]

On 07/10/2016 10:46, Chris Wilson wrote:
> Treat a framebuffer reference with the same priority as an active
> reference whilst shrinking. Framebuffers are likely to be reused and
> typically cost more to migrate to and from GPU memory (on LLC
> architectures we need to clflush), so defer the temptation to purge them
> during a kswapd run until we have run out of cheap buffers.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++--
>   1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index fa72473dc7f5..0241658af16b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -214,8 +214,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   			    !is_vmalloc_addr(obj->mm.mapping))
>   				continue;
>   
> -			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
> -			    i915_gem_object_is_active(obj))
> +			if (!(flags & I915_SHRINK_ACTIVE) &&
> +			    (i915_gem_object_is_active(obj) ||
> +			     obj->framebuffer_references))
>   				continue;
>   
>   			if (!can_release_pages(obj))

Reviewed-by: John Harrison <john.c.harrison@intel.com>


[-- Attachment #1.2: Type: text/html, Size: 1746 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 25/42] drm/i915: Use lockless object free
  2016-10-07  9:46 ` [PATCH 25/42] drm/i915: Use lockless object free Chris Wilson
@ 2016-10-11  9:56   ` John Harrison
  0 siblings, 0 replies; 107+ messages in thread
From: John Harrison @ 2016-10-11  9:56 UTC (permalink / raw)
  To: intel-gfx

Needs a description?

On 07/10/2016 10:46, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.c         |  2 +-
>   drivers/gpu/drm/i915/i915_drv.h         | 10 +---------
>   drivers/gpu/drm/i915/i915_gem.c         | 20 ++++++++++----------
>   drivers/gpu/drm/i915/i915_gem_tiling.c  |  2 +-
>   drivers/gpu/drm/i915/i915_gem_userptr.c |  4 ++--
>   drivers/gpu/drm/i915/intel_display.c    |  6 +++---
>   drivers/gpu/drm/i915/intel_overlay.c    |  4 ++--
>   drivers/gpu/drm/i915/intel_pm.c         |  2 +-
>   8 files changed, 21 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index f7d48f97993d..2e29eedd21b2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2606,7 +2606,7 @@ static struct drm_driver driver = {
>   	.set_busid = drm_pci_set_busid,
>   
>   	.gem_close_object = i915_gem_close_object,
> -	.gem_free_object = i915_gem_free_object,
> +	.gem_free_object_unlocked = i915_gem_free_object,
>   	.gem_vm_ops = &i915_gem_vm_ops,
>   
>   	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e79a5cb78b5d..89d3b5a16826 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2396,19 +2396,12 @@ __attribute__((nonnull))
>   static inline void
>   i915_gem_object_put(struct drm_i915_gem_object *obj)
>   {
> -	drm_gem_object_unreference(&obj->base);
> +	__drm_gem_object_unreference(&obj->base);
>   }
>   
>   __deprecated
>   extern void drm_gem_object_unreference(struct drm_gem_object *);
>   
> -__attribute__((nonnull))
> -static inline void
> -i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
> -{
> -	drm_gem_object_unreference_unlocked(&obj->base);
> -}
> -
>   __deprecated
>   extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
>   
> @@ -2510,7 +2503,6 @@ static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
>   
>   static inline void i915_vma_put(struct i915_vma *vma)
>   {
> -	lockdep_assert_held(&vma->vm->dev->struct_mutex);
>   	i915_gem_object_put(vma->obj);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 7cd49dd1d3f8..28e1064baad5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -616,7 +616,7 @@ i915_gem_create(struct drm_file *file,
>   
>   	ret = drm_gem_handle_create(file, &obj->base, &handle);
>   	/* drop reference from allocate - handle holds it now */
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	if (ret)
>   		return ret;
>   
> @@ -1115,7 +1115,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>   
>   	i915_gem_object_unpin_pages(obj);
>   out:
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	return ret;
>   }
>   
> @@ -1450,7 +1450,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>   
>   	i915_gem_object_unpin_pages(obj);
>   err:
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	return ret;
>   }
>   
> @@ -1560,7 +1560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>   err_pages:
>   	i915_gem_object_unpin_pages(obj);
>   err_unlocked:
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	return ret;
>   }
>   
> @@ -1591,7 +1591,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>   		}
>   	}
>   
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	return err;
>   }
>   
> @@ -1637,7 +1637,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>   	 * pages from.
>   	 */
>   	if (!obj->base.filp) {
> -		i915_gem_object_put_unlocked(obj);
> +		i915_gem_object_put(obj);
>   		return -EINVAL;
>   	}
>   
> @@ -1649,7 +1649,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>   		struct vm_area_struct *vma;
>   
>   		if (down_write_killable(&mm->mmap_sem)) {
> -			i915_gem_object_put_unlocked(obj);
> +			i915_gem_object_put(obj);
>   			return -EINTR;
>   		}
>   		vma = find_vma(mm, addr);
> @@ -1663,7 +1663,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>   		/* This may race, but that's ok, it only gets set */
>   		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
>   	}
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	if (IS_ERR((void *)addr))
>   		return addr;
>   
> @@ -2073,7 +2073,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
>   	if (ret == 0)
>   		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
>   
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	return ret;
>   }
>   
> @@ -2881,7 +2881,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>   			args->timeout_ns = 0;
>   	}
>   
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index ec48e403adfe..d8fc4782b8db 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -200,7 +200,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>   
>   	if (!i915_tiling_ok(dev,
>   			    args->stride, obj->base.size, args->tiling_mode)) {
> -		i915_gem_object_put_unlocked(obj);
> +		i915_gem_object_put(obj);
>   		return -EINVAL;
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 136c493b15b2..6f7ac0e783ec 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -542,7 +542,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   	release_pages(pvec, pinned, 0);
>   	drm_free_large(pvec);
>   
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	put_task_struct(work->task);
>   	kfree(work);
>   }
> @@ -802,7 +802,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file
>   		ret = drm_gem_handle_create(file, &obj->base, &handle);
>   
>   	/* drop reference from allocate - handle holds it now */
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	if (ret)
>   		return ret;
>   
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 5ee7bab6e560..8beec0b651cd 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11026,7 +11026,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev,
>   
>   	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
>   	if (IS_ERR(fb))
> -		i915_gem_object_put_unlocked(obj);
> +		i915_gem_object_put(obj);
>   
>   	return fb;
>   }
> @@ -12330,7 +12330,7 @@ cleanup:
>   	crtc->primary->fb = old_fb;
>   	update_state_fb(crtc->primary);
>   
> -	i915_gem_object_put_unlocked(obj);
> +	i915_gem_object_put(obj);
>   	drm_framebuffer_unreference(work->old_fb);
>   
>   	spin_lock_irq(&dev->event_lock);
> @@ -15837,7 +15837,7 @@ intel_user_framebuffer_create(struct drm_device *dev,
>   
>   	fb = intel_framebuffer_create(dev, &mode_cmd, obj);
>   	if (IS_ERR(fb))
> -		i915_gem_object_put_unlocked(obj);
> +		i915_gem_object_put(obj);
>   
>   	return fb;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 7c392547711f..58d5c5eed487 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -1222,7 +1222,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
>   out_unlock:
>   	mutex_unlock(&dev->struct_mutex);
>   	drm_modeset_unlock_all(dev);
> -	i915_gem_object_put_unlocked(new_bo);
> +	i915_gem_object_put(new_bo);
>   out_free:
>   	kfree(params);
>   
> @@ -1466,7 +1466,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv)
>   	 * hardware should be off already */
>   	WARN_ON(dev_priv->overlay->active);
>   
> -	i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo);
> +	i915_gem_object_put(dev_priv->overlay->reg_bo);
>   	kfree(dev_priv->overlay);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 7f1748a1e614..27fc4c9c8cd0 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5864,7 +5864,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv)
>   	if (WARN_ON(!dev_priv->vlv_pctx))
>   		return;
>   
> -	i915_gem_object_put_unlocked(dev_priv->vlv_pctx);
> +	i915_gem_object_put(dev_priv->vlv_pctx);
>   	dev_priv->vlv_pctx = NULL;
>   }
>   

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage
  2016-10-11  9:32   ` Tvrtko Ursulin
@ 2016-10-11 10:15     ` John Harrison
  0 siblings, 0 replies; 107+ messages in thread
From: John Harrison @ 2016-10-11 10:15 UTC (permalink / raw)
  To: intel-gfx

On 11/10/2016 10:32, Tvrtko Ursulin wrote:
>
> On 07/10/2016 10:46, Chris Wilson wrote:
>> A while ago we switched from a contiguous array of pages into an sglist,
>> for that was both more convenient for mapping to hardware and avoided
>> the requirement for a vmalloc array of pages on every object. However,
>> certain GEM API calls (like pwrite, pread as well as performing
>> relocations) do desire access to individual struct pages. A quick hack
>> was to introduce a cache of the last access such that finding the
>> following page was quick - this works so long as the caller desired
>> sequential access. Walking backwards, or multiple callers, still hits a
>> slow linear search for each page. One solution is to store each
>> successful lookup in a radix tree.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h         |  57 ++++--------
>>   drivers/gpu/drm/i915/i915_gem.c         | 149 
>> ++++++++++++++++++++++++++++----
>>   drivers/gpu/drm/i915/i915_gem_stolen.c  |   4 +-
>>   drivers/gpu/drm/i915/i915_gem_userptr.c |   4 +-
>>   4 files changed, 154 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index bad97f1e5265..a96b446d8db4 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2278,9 +2278,12 @@ struct drm_i915_gem_object {
>>         struct sg_table *pages;
>>       int pages_pin_count;
>> -    struct get_page {
>> -        struct scatterlist *sg;
>> -        int last;
>> +    struct i915_gem_object_page_iter {
>> +        struct scatterlist *sg_pos;
>> +        unsigned long sg_idx;
>> +
>> +        struct radix_tree_root radix;
>> +        struct mutex lock;
>>       } get_page;
>>       void *mapping;
>>   @@ -3168,45 +3171,21 @@ static inline int __sg_page_count(struct 
>> scatterlist *sg)
>>       return sg->length >> PAGE_SHIFT;
>>   }
>>   -struct page *
>> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n);
>> -
>> -static inline dma_addr_t
>> -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
>> -{
>> -    if (n < obj->get_page.last) {
>> -        obj->get_page.sg = obj->pages->sgl;
>> -        obj->get_page.last = 0;
>> -    }
>> -
>> -    while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= 
>> n) {
>> -        obj->get_page.last += __sg_page_count(obj->get_page.sg++);
>> -        if (unlikely(sg_is_chain(obj->get_page.sg)))
>> -            obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
>> -    }
>> -
>> -    return sg_dma_address(obj->get_page.sg) + ((n - 
>> obj->get_page.last) << PAGE_SHIFT);
>> -}
>> -
>> -static inline struct page *
>> -i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
>> -{
>> -    if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT))
>> -        return NULL;
>> +struct scatterlist *
>> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +               unsigned long n, unsigned int *offset);
>>   -    if (n < obj->get_page.last) {
>> -        obj->get_page.sg = obj->pages->sgl;
>> -        obj->get_page.last = 0;
>> -    }
>> +struct page *
>> +i915_gem_object_get_page(struct drm_i915_gem_object *obj,
>> +             unsigned long n);
>>   -    while (obj->get_page.last + __sg_page_count(obj->get_page.sg) 
>> <= n) {
>> -        obj->get_page.last += __sg_page_count(obj->get_page.sg++);
>> -        if (unlikely(sg_is_chain(obj->get_page.sg)))
>> -            obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
>> -    }
>> +struct page *
>> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>> +                   unsigned long n);
>>   -    return nth_page(sg_page(obj->get_page.sg), n - 
>> obj->get_page.last);
>> -}
>> +dma_addr_t
>> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>> +                unsigned long n);
>>     static inline void i915_gem_object_pin_pages(struct 
>> drm_i915_gem_object *obj)
>>   {
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index ada837e393a7..af7d51f16658 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2292,6 +2292,15 @@ i915_gem_object_put_pages_gtt(struct 
>> drm_i915_gem_object *obj)
>>       kfree(obj->pages);
>>   }
>>   +static void __i915_gem_object_reset_page_iter(struct 
>> drm_i915_gem_object *obj)
>> +{
>> +    struct radix_tree_iter iter;
>> +    void **slot;
>> +
>> +    radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
>> +        radix_tree_delete(&obj->get_page.radix, iter.index);
>> +}
>> +
>>   int
>>   i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>>   {
>> @@ -2324,6 +2333,8 @@ i915_gem_object_put_pages(struct 
>> drm_i915_gem_object *obj)
>>           obj->mapping = NULL;
>>       }
>>   +    __i915_gem_object_reset_page_iter(obj);
>> +
>>       ops->put_pages(obj);
>>       obj->pages = NULL;
>>   @@ -2488,8 +2499,8 @@ i915_gem_object_get_pages(struct 
>> drm_i915_gem_object *obj)
>>         list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>>   -    obj->get_page.sg = obj->pages->sgl;
>> -    obj->get_page.last = 0;
>> +    obj->get_page.sg_pos = obj->pages->sgl;
>> +    obj->get_page.sg_idx = 0;
>>         return 0;
>>   }
>> @@ -4242,6 +4253,8 @@ void i915_gem_object_init(struct 
>> drm_i915_gem_object *obj,
>>         obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
>>       obj->madv = I915_MADV_WILLNEED;
>> +    INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
>> +    mutex_init(&obj->get_page.lock);
>>         i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
>>   }
>> @@ -4904,21 +4917,6 @@ void i915_gem_track_fb(struct 
>> drm_i915_gem_object *old,
>>       }
>>   }
>>   -/* Like i915_gem_object_get_page(), but mark the returned page 
>> dirty */
>> -struct page *
>> -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
>> -{
>> -    struct page *page;
>> -
>> -    /* Only default objects have per-page dirty tracking */
>> -    if (WARN_ON(!i915_gem_object_has_struct_page(obj)))
>> -        return NULL;
>> -
>> -    page = i915_gem_object_get_page(obj, n);
>> -    set_page_dirty(page);
>> -    return page;
>> -}
>> -
>>   /* Allocate a new GEM object and fill it with the supplied data */
>>   struct drm_i915_gem_object *
>>   i915_gem_object_create_from_data(struct drm_device *dev,
>> @@ -4959,3 +4957,120 @@ fail:
>>       i915_gem_object_put(obj);
>>       return ERR_PTR(ret);
>>   }
>> +
>> +static struct scatterlist *
>> +__i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +             unsigned long n, unsigned int *offset)
>> +{
>> +    struct scatterlist *sg = obj->pages->sgl;
>> +    int idx = 0;
>> +
>> +    while (idx + __sg_page_count(sg) <= n) {
>> +        idx += __sg_page_count(sg++);
>> +        if (unlikely(sg_is_chain(sg)))
>> +            sg = sg_chain_ptr(sg);
>> +    }
>> +
>> +    *offset = n - idx;
>> +    return sg;
>> +}
>> +
>> +struct scatterlist *
>> +i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>> +               unsigned long n,
>> +               unsigned int *offset)
>> +{
>> +    struct i915_gem_object_page_iter *iter = &obj->get_page;
>> +    struct scatterlist *sg;
>> +
>> +    GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
>> +    GEM_BUG_ON(obj->pages_pin_count == 0);
>> +
>> +    if (n < READ_ONCE(iter->sg_idx))
>> +        goto lookup;
>> +
>
> Ok, so on lookup of "n" you build the radix tree for all sg entries 
> from zero to n. Therefore for a lookup below the current index, there 
> must be a radix tree entry, correct?
That is my understanding.

>
>> +    mutex_lock(&iter->lock);
>> +    if (n >= iter->sg_idx &&
>> +        n < iter->sg_idx + __sg_page_count(iter->sg_pos)) {
>> +        sg = iter->sg_pos;
>> +        *offset = n - iter->sg_idx;
>> +        mutex_unlock(&iter->lock);
>> +        return sg;
>> +    }
>> +
>> +    while (iter->sg_idx <= n) {
>> +        unsigned long exception;
>> +        unsigned int count, i;
>> +
>> +        radix_tree_insert(&iter->radix,
>> +                  iter->sg_idx,
>> +                  iter->sg_pos);
>> +
>> +        exception =
>> +            RADIX_TREE_EXCEPTIONAL_ENTRY |
>> +            iter->sg_idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
>> +        count = __sg_page_count(iter->sg_pos);
>> +        for (i = 1; i < count; i++)
>> +            radix_tree_insert(&iter->radix,
>> +                      iter->sg_idx + i,
>> +                      (void *)exception);
>> +
>> +        iter->sg_idx += count;
>> +        iter->sg_pos = __sg_next(iter->sg_pos);
>> +    }
>> +    mutex_unlock(&iter->lock);
>
> Why not avoid falling through the lookup and return the previous sg 
> from here?
I thought I worked through this before and decided there was a good 
reason. However, looking again a few days later, I can't obviously see 
why. As you note below, this is non-trivial stuff and would definitely 
benefit from some explanatory comments.

>
>> +
>> +lookup:
>> +    rcu_read_lock();
>> +    sg = radix_tree_lookup(&iter->radix, n);
>> +    rcu_read_unlock();
>> +
>> +    if (unlikely(!sg))
>> +        return __i915_gem_object_get_sg(obj, n, offset);
>> +
>
> Considering the first observation from above, when does it then happen 
> that there is no radix tree entry at this point?
>
> Or in other words, maybe some comments should be added to this patch - 
> there are none and it is not that trivial.
>
>> +    *offset = 0;
>> +    if (unlikely(radix_tree_exception(sg))) {
>> +        unsigned long base =
>> +            (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
>> +        sg = radix_tree_lookup(&iter->radix, base);
>> +        *offset = n - base;
>> +    }
>> +    return sg;
>> +}
>> +
>> +struct page *
>> +i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned 
>> long n)
>> +{
>> +    struct scatterlist *sg;
>> +    unsigned int offset;
>> +
>> +    GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
>> +
>> +    sg = i915_gem_object_get_sg(obj, n, &offset);
>> +    return nth_page(sg_page(sg), offset);
>> +}
>> +
>> +/* Like i915_gem_object_get_page(), but mark the returned page dirty */
>> +struct page *
>> +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>> +                   unsigned long n)
>> +{
>> +    struct page *page;
>> +
>> +    page = i915_gem_object_get_page(obj, n);
>> +    if (!obj->dirty)
>> +        set_page_dirty(page);
>> +
>> +    return page;
>> +}
>> +
>> +dma_addr_t
>> +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>> +                unsigned long n)
>> +{
>> +    struct scatterlist *sg;
>> +    unsigned int offset;
>> +
>> +    sg = i915_gem_object_get_sg(obj, n, &offset);
>> +    return sg_dma_address(sg) + (offset << PAGE_SHIFT);
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c 
>> b/drivers/gpu/drm/i915/i915_gem_stolen.c
>> index 59989e8ee5dc..24bad4e60ef0 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
>> @@ -594,8 +594,8 @@ _i915_gem_object_create_stolen(struct drm_device 
>> *dev,
>>       if (obj->pages == NULL)
>>           goto cleanup;
>>   -    obj->get_page.sg = obj->pages->sgl;
>> -    obj->get_page.last = 0;
>> +    obj->get_page.sg_pos = obj->pages->sgl;
>> +    obj->get_page.sg_idx = 0;
>>         i915_gem_object_pin_pages(obj);
>>       obj->stolen = stolen;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
>> b/drivers/gpu/drm/i915/i915_gem_userptr.c
>> index 1c891b92ac80..cb95789da76e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
>> @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct 
>> work_struct *_work)
>>               if (ret == 0) {
>>                   list_add_tail(&obj->global_list,
>> &to_i915(dev)->mm.unbound_list);
>> -                obj->get_page.sg = obj->pages->sgl;
>> -                obj->get_page.last = 0;
>> +                obj->get_page.sg_pos = obj->pages->sgl;
>> +                obj->get_page.sg_idx = 0;
>>                   pinned = 0;
>>               }
>>           }
>
> Regards,
>
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 16/42] drm/i915: Refactor object page API
  2016-10-07  9:46 ` [PATCH 16/42] drm/i915: Refactor object page API Chris Wilson
  2016-10-10 10:54   ` John Harrison
@ 2016-10-11 11:23   ` Tvrtko Ursulin
  2016-10-13 11:04   ` Joonas Lahtinen
  2 siblings, 0 replies; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-11 11:23 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 07/10/2016 10:46, Chris Wilson wrote:
> The plan is to make obtaining the backing storage for the object avoid
> struct_mutex (i.e. use its own locking). The first step is to update the
> API so that normal users only call pin/unpin whilst working on the
> backing storage.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_cmd_parser.c       |   2 +-
>   drivers/gpu/drm/i915/i915_debugfs.c          |  17 +--
>   drivers/gpu/drm/i915/i915_drv.h              |  89 ++++++++----
>   drivers/gpu/drm/i915/i915_gem.c              | 207 +++++++++++++--------------
>   drivers/gpu/drm/i915/i915_gem_batch_pool.c   |   3 +-
>   drivers/gpu/drm/i915/i915_gem_dmabuf.c       |  14 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |   2 +-
>   drivers/gpu/drm/i915/i915_gem_fence.c        |   4 +-
>   drivers/gpu/drm/i915/i915_gem_gtt.c          |  10 +-
>   drivers/gpu/drm/i915/i915_gem_internal.c     |  19 +--
>   drivers/gpu/drm/i915/i915_gem_render_state.c |   2 +-
>   drivers/gpu/drm/i915/i915_gem_shrinker.c     |  10 +-
>   drivers/gpu/drm/i915/i915_gem_stolen.c       |  24 ++--
>   drivers/gpu/drm/i915/i915_gem_tiling.c       |   8 +-
>   drivers/gpu/drm/i915/i915_gem_userptr.c      |  30 ++--
>   drivers/gpu/drm/i915/i915_gpu_error.c        |   4 +-
>   drivers/gpu/drm/i915/intel_lrc.c             |   6 +-
>   17 files changed, 234 insertions(+), 217 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
> index 70980f82a15b..8d20020cb9f9 100644
> --- a/drivers/gpu/drm/i915/i915_cmd_parser.c
> +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
> @@ -1290,7 +1290,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
>   	}
>   
>   	if (ret == 0 && needs_clflush_after)
> -		drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len);
> +		drm_clflush_virt_range(shadow_batch_obj->mm.mapping, batch_len);
>   	i915_gem_object_unpin_map(shadow_batch_obj);
>   
>   	return ret;
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e4b5ba771bea..b807ddf65e04 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -112,7 +112,7 @@ static char get_global_flag(struct drm_i915_gem_object *obj)
>   
>   static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
>   {
> -	return obj->mapping ? 'M' : ' ';
> +	return obj->mm.mapping ? 'M' : ' ';
>   }
>   
>   static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
> @@ -158,8 +158,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>   		   i915_gem_active_get_seqno(&obj->last_write,
>   					     &obj->base.dev->struct_mutex),
>   		   i915_cache_level_str(dev_priv, obj->cache_level),
> -		   obj->dirty ? " dirty" : "",
> -		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> +		   obj->mm.dirty ? " dirty" : "",
> +		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
>   	if (obj->base.name)
>   		seq_printf(m, " (name: %d)", obj->base.name);
>   	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> @@ -411,12 +411,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   		size += obj->base.size;
>   		++count;
>   
> -		if (obj->madv == I915_MADV_DONTNEED) {
> +		if (obj->mm.madv == I915_MADV_DONTNEED) {
>   			purgeable_size += obj->base.size;
>   			++purgeable_count;
>   		}
>   
> -		if (obj->mapping) {
> +		if (obj->mm.mapping) {
>   			mapped_count++;
>   			mapped_size += obj->base.size;
>   		}
> @@ -433,12 +433,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>   			++dpy_count;
>   		}
>   
> -		if (obj->madv == I915_MADV_DONTNEED) {
> +		if (obj->mm.madv == I915_MADV_DONTNEED) {
>   			purgeable_size += obj->base.size;
>   			++purgeable_count;
>   		}
>   
> -		if (obj->mapping) {
> +		if (obj->mm.mapping) {
>   			mapped_count++;
>   			mapped_size += obj->base.size;
>   		}
> @@ -2018,7 +2018,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>   		seq_printf(m, "\tBound in GGTT at 0x%08x\n",
>   			   i915_ggtt_offset(vma));
>   
> -	if (i915_gem_object_get_pages(vma->obj)) {
> +	if (i915_gem_object_pin_pages(vma->obj)) {
>   		seq_puts(m, "\tFailed to get pages for context object\n\n");
>   		return;
>   	}
> @@ -2037,6 +2037,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
>   		kunmap_atomic(reg_state);
>   	}
>   
> +	i915_gem_object_unpin_pages(vma->obj);
>   	seq_putc(m, '\n');
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a96b446d8db4..3c22d49005fe 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2238,17 +2238,6 @@ struct drm_i915_gem_object {
>   #define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
>   
>   	/**
> -	 * This is set if the object has been written to since last bound
> -	 * to the GTT
> -	 */
> -	unsigned int dirty:1;
> -
> -	/**
> -	 * Advice: are the backing pages purgeable?
> -	 */
> -	unsigned int madv:2;
> -
> -	/**
>   	 * Whether the current gtt mapping needs to be mappable (and isn't just
>   	 * mappable by accident). Track pin and fault separate for a more
>   	 * accurate mappable working set.
> @@ -2276,16 +2265,31 @@ struct drm_i915_gem_object {
>   	unsigned int bind_count;
>   	unsigned int pin_display;
>   
> -	struct sg_table *pages;
> -	int pages_pin_count;
> -	struct i915_gem_object_page_iter {
> -		struct scatterlist *sg_pos;
> -		unsigned long sg_idx;
> +	struct {
> +		unsigned int pages_pin_count;
> +
> +		struct sg_table *pages;
> +		void *mapping;
> +
> +		struct i915_gem_object_page_iter {
> +			struct scatterlist *sg_pos;
> +			unsigned long sg_idx;
>   
> -		struct radix_tree_root radix;
> -		struct mutex lock;
> -	} get_page;
> -	void *mapping;
> +			struct radix_tree_root radix;
> +			struct mutex lock;
> +		} get_page;
> +
> +		/**
> +		 * Advice: are the backing pages purgeable?
> +		 */
> +		unsigned int madv:2;
> +
> +		/**
> +		 * This is set if the object has been written to since the
> +		 * pages were last acquired.
> +		 */
> +		unsigned int dirty:1;
> +	} mm;
>   
>   	/** Breadcrumb of last rendering to the buffer.
>   	 * There can only be one writer, but we allow for multiple readers.
> @@ -3160,13 +3164,10 @@ void i915_vma_close(struct i915_vma *vma);
>   void i915_vma_destroy(struct i915_vma *vma);
>   
>   int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
> -int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>   void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
>   void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>   
> -int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> -
> -static inline int __sg_page_count(struct scatterlist *sg)
> +static inline int __sg_page_count(const struct scatterlist *sg)
>   {
>   	return sg->length >> PAGE_SHIFT;
>   }
> @@ -3187,18 +3188,48 @@ dma_addr_t
>   i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>   				unsigned long n);
>   
> -static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> +int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> +
> +static inline int __must_check
> +i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>   {
> -	BUG_ON(obj->pages == NULL);
> -	obj->pages_pin_count++;
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	if (obj->mm.pages_pin_count++)
> +		return 0;
> +
> +	return __i915_gem_object_get_pages(obj);
> +}
> +
> +static inline void
> +__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	GEM_BUG_ON(!obj->mm.pages);
> +	obj->mm.pages_pin_count++;
> +}
> +
> +static inline bool
> +i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
> +{
> +	return obj->mm.pages_pin_count;
> +}
> +
> +static inline void
> +__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> +	GEM_BUG_ON(!obj->mm.pages);
> +	obj->mm.pages_pin_count--;
>   }
>   
>   static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   {
> -	BUG_ON(obj->pages_pin_count == 0);
> -	obj->pages_pin_count--;
> +	__i915_gem_object_unpin_pages(obj);
>   }
>   
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
> +
>   enum i915_map_type {
>   	I915_MAP_WB = 0,
>   	I915_MAP_WC,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index af7d51f16658..df774ddf62ae 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -216,7 +216,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>   	sg_dma_address(sg) = obj->phys_handle->busaddr;
>   	sg_dma_len(sg) = obj->base.size;
>   
> -	obj->pages = st;
> +	obj->mm.pages = st;
>   	return 0;
>   }
>   
> @@ -225,7 +225,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>   {
>   	int ret;
>   
> -	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
>   
>   	ret = i915_gem_object_set_to_cpu_domain(obj, true);
>   	if (WARN_ON(ret)) {
> @@ -235,10 +235,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>   		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
>   	}
>   
> -	if (obj->madv == I915_MADV_DONTNEED)
> -		obj->dirty = 0;
> +	if (obj->mm.madv == I915_MADV_DONTNEED)
> +		obj->mm.dirty = false;
>   
> -	if (obj->dirty) {
> +	if (obj->mm.dirty) {
>   		struct address_space *mapping = obj->base.filp->f_mapping;
>   		char *vaddr = obj->phys_handle->vaddr;
>   		int i;
> @@ -257,22 +257,23 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>   			kunmap_atomic(dst);
>   
>   			set_page_dirty(page);
> -			if (obj->madv == I915_MADV_WILLNEED)
> +			if (obj->mm.madv == I915_MADV_WILLNEED)
>   				mark_page_accessed(page);
>   			put_page(page);
>   			vaddr += PAGE_SIZE;
>   		}
> -		obj->dirty = 0;
> +		obj->mm.dirty = false;
>   	}
>   
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
>   static void
>   i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
>   {
>   	drm_pci_free(obj->base.dev, obj->phys_handle);
> +	i915_gem_object_unpin_pages(obj);
>   }
>   
>   static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> @@ -506,7 +507,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   		return 0;
>   	}
>   
> -	if (obj->madv != I915_MADV_WILLNEED)
> +	if (obj->mm.madv != I915_MADV_WILLNEED)
>   		return -EFAULT;
>   
>   	if (obj->base.filp == NULL)
> @@ -516,7 +517,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_put_pages(obj);
> +	ret = __i915_gem_object_put_pages(obj);
>   	if (ret)
>   		return ret;
>   
> @@ -528,7 +529,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   	obj->phys_handle = phys;
>   	obj->ops = &i915_gem_phys_ops;
>   
> -	return i915_gem_object_get_pages(obj);
> +	return i915_gem_object_pin_pages(obj);
>   }
>   
>   static int
> @@ -724,12 +725,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	i915_gem_object_flush_gtt_write_domain(obj);
>   
>   	/* If we're not in the cpu read domain, set ourself into the gtt
> @@ -777,12 +776,10 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	i915_gem_object_flush_gtt_write_domain(obj);
>   
>   	/* If we're not in the cpu write domain, set ourself into the
> @@ -812,7 +809,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>   		obj->cache_dirty = true;
>   
>   	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
> -	obj->dirty = 1;
> +	obj->mm.dirty = true;
>   	/* return with the pages pinned */
>   	return 0;
>   
> @@ -949,13 +946,11 @@ i915_gem_gtt_pread(struct drm_device *dev,
>   		if (ret)
>   			goto out;
>   
> -		ret = i915_gem_object_get_pages(obj);
> +		ret = i915_gem_object_pin_pages(obj);
>   		if (ret) {
>   			remove_mappable_node(&node);
>   			goto out;
>   		}
> -
> -		i915_gem_object_pin_pages(obj);
>   	}
>   
>   	ret = i915_gem_object_set_to_gtt_domain(obj, false);
> @@ -1062,7 +1057,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
>   	offset = args->offset;
>   	remain = args->size;
>   
> -	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
> +	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
>   			 offset >> PAGE_SHIFT) {
>   		struct page *page = sg_page_iter_page(&sg_iter);
>   
> @@ -1254,13 +1249,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>   		if (ret)
>   			goto out;
>   
> -		ret = i915_gem_object_get_pages(obj);
> +		ret = i915_gem_object_pin_pages(obj);
>   		if (ret) {
>   			remove_mappable_node(&node);
>   			goto out;
>   		}
> -
> -		i915_gem_object_pin_pages(obj);
>   	}
>   
>   	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> @@ -1268,7 +1261,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>   		goto out_unpin;
>   
>   	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
> -	obj->dirty = true;
> +	obj->mm.dirty = true;
>   
>   	user_data = u64_to_user_ptr(args->data_ptr);
>   	offset = args->offset;
> @@ -1439,7 +1432,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>   	offset = args->offset;
>   	remain = args->size;
>   
> -	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
> +	for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents,
>   			 offset >> PAGE_SHIFT) {
>   		struct page *page = sg_page_iter_page(&sg_iter);
>   		int partial_cacheline_write;
> @@ -2228,7 +2221,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
>   	 * backing pages, *now*.
>   	 */
>   	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
> -	obj->madv = __I915_MADV_PURGED;
> +	obj->mm.madv = __I915_MADV_PURGED;
>   }
>   
>   /* Try to discard unwanted pages */
> @@ -2237,7 +2230,7 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
>   {
>   	struct address_space *mapping;
>   
> -	switch (obj->madv) {
> +	switch (obj->mm.madv) {
>   	case I915_MADV_DONTNEED:
>   		i915_gem_object_truncate(obj);
>   	case __I915_MADV_PURGED:
> @@ -2258,7 +2251,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>   	struct page *page;
>   	int ret;
>   
> -	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
>   
>   	ret = i915_gem_object_set_to_cpu_domain(obj, true);
>   	if (WARN_ON(ret)) {
> @@ -2274,22 +2267,22 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>   	if (i915_gem_object_needs_bit17_swizzle(obj))
>   		i915_gem_object_save_bit_17_swizzle(obj);
>   
> -	if (obj->madv == I915_MADV_DONTNEED)
> -		obj->dirty = 0;
> +	if (obj->mm.madv == I915_MADV_DONTNEED)
> +		obj->mm.dirty = false;
>   
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> -		if (obj->dirty)
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +		if (obj->mm.dirty)
>   			set_page_dirty(page);
>   
> -		if (obj->madv == I915_MADV_WILLNEED)
> +		if (obj->mm.madv == I915_MADV_WILLNEED)
>   			mark_page_accessed(page);
>   
>   		put_page(page);
>   	}
> -	obj->dirty = 0;
> +	obj->mm.dirty = false;
>   
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
>   static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
> @@ -2297,21 +2290,20 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>   	struct radix_tree_iter iter;
>   	void **slot;
>   
> -	radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0)
> -		radix_tree_delete(&obj->get_page.radix, iter.index);
> +	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
> +		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
>   }
>   
> -int
> -i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   {
>   	const struct drm_i915_gem_object_ops *ops = obj->ops;
>   
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   
> -	if (obj->pages == NULL)
> +	if (!obj->mm.pages)
>   		return 0;
>   
> -	if (obj->pages_pin_count)
> +	if (i915_gem_object_has_pinned_pages(obj))
>   		return -EBUSY;
>   
>   	GEM_BUG_ON(obj->bind_count);
> @@ -2321,22 +2313,22 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   	 * lists early. */
>   	list_del(&obj->global_list);
>   
> -	if (obj->mapping) {
> +	if (obj->mm.mapping) {
>   		void *ptr;
>   
> -		ptr = ptr_mask_bits(obj->mapping);
> +		ptr = ptr_mask_bits(obj->mm.mapping);
>   		if (is_vmalloc_addr(ptr))
>   			vunmap(ptr);
>   		else
>   			kunmap(kmap_to_page(ptr));
>   
> -		obj->mapping = NULL;
> +		obj->mm.mapping = NULL;
>   	}
>   
>   	__i915_gem_object_reset_page_iter(obj);
>   
>   	ops->put_pages(obj);
> -	obj->pages = NULL;
> +	obj->mm.pages = NULL;
>   
>   	i915_gem_object_invalidate(obj);
>   
> @@ -2431,7 +2423,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>   	if (!swiotlb_nr_tbl())
>   #endif
>   		sg_mark_end(sg);
> -	obj->pages = st;
> +	obj->mm.pages = st;
>   
>   	ret = i915_gem_gtt_prepare_object(obj);
>   	if (ret)
> @@ -2442,7 +2434,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>   
>   	if (i915_gem_object_is_tiled(obj) &&
>   	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
> -		i915_gem_object_pin_pages(obj);
> +		__i915_gem_object_pin_pages(obj);
>   
>   	return 0;
>   
> @@ -2474,8 +2466,7 @@ err_pages:
>    * either as a result of memory pressure (reaping pages under the shrinker)
>    * or as the object is itself released.
>    */
> -int
> -i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> +int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>   	const struct drm_i915_gem_object_ops *ops = obj->ops;
> @@ -2483,24 +2474,25 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   
> -	if (obj->pages)
> +	if (obj->mm.pages)
>   		return 0;
>   
> -	if (obj->madv != I915_MADV_WILLNEED) {
> +	if (obj->mm.madv != I915_MADV_WILLNEED) {
>   		DRM_DEBUG("Attempting to obtain a purgeable object\n");
> +		__i915_gem_object_unpin_pages(obj);
>   		return -EFAULT;
>   	}
>   
> -	BUG_ON(obj->pages_pin_count);
> -
>   	ret = ops->get_pages(obj);
> -	if (ret)
> +	if (ret) {
> +		__i915_gem_object_unpin_pages(obj);
>   		return ret;
> +	}
>   
>   	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>   
> -	obj->get_page.sg_pos = obj->pages->sgl;
> -	obj->get_page.sg_idx = 0;
> +	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> +	obj->mm.get_page.sg_idx = 0;
>   
>   	return 0;
>   }
> @@ -2510,7 +2502,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
>   				 enum i915_map_type type)
>   {
>   	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
> -	struct sg_table *sgt = obj->pages;
> +	struct sg_table *sgt = obj->mm.pages;
>   	struct sgt_iter sgt_iter;
>   	struct page *page;
>   	struct page *stack_pages[32];
> @@ -2564,14 +2556,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ERR_PTR(ret);
>   
> -	i915_gem_object_pin_pages(obj);
> -	pinned = obj->pages_pin_count > 1;
> +	pinned = obj->mm.pages_pin_count > 1;
>   
> -	ptr = ptr_unpack_bits(obj->mapping, has_type);
> +	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
>   	if (ptr && has_type != type) {
>   		if (pinned) {
>   			ret = -EBUSY;
> @@ -2583,7 +2574,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   		else
>   			kunmap(kmap_to_page(ptr));
>   
> -		ptr = obj->mapping = NULL;
> +		ptr = obj->mm.mapping = NULL;
>   	}
>   
>   	if (!ptr) {
> @@ -2593,7 +2584,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
>   			goto err;
>   		}
>   
> -		obj->mapping = ptr_pack_bits(ptr, type);
> +		obj->mm.mapping = ptr_pack_bits(ptr, type);
>   	}
>   
>   	return ptr;
> @@ -3030,7 +3021,7 @@ int i915_vma_unbind(struct i915_vma *vma)
>   		goto destroy;
>   
>   	GEM_BUG_ON(obj->bind_count == 0);
> -	GEM_BUG_ON(!obj->pages);
> +	GEM_BUG_ON(!obj->mm.pages);
>   
>   	if (i915_vma_is_map_and_fenceable(vma)) {
>   		/* release the fence reg _after_ flushing */
> @@ -3054,7 +3045,7 @@ int i915_vma_unbind(struct i915_vma *vma)
>   	drm_mm_remove_node(&vma->node);
>   	list_move_tail(&vma->vm_link, &vma->vm->unbound_list);
>   
> -	if (vma->pages != obj->pages) {
> +	if (vma->pages != obj->mm.pages) {
>   		GEM_BUG_ON(!vma->pages);
>   		sg_free_table(vma->pages);
>   		kfree(vma->pages);
> @@ -3186,12 +3177,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>   		return -E2BIG;
>   	}
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	if (flags & PIN_OFFSET_FIXED) {
>   		u64 offset = flags & PIN_OFFSET_MASK;
>   		if (offset & (alignment - 1) || offset > end - size) {
> @@ -3270,7 +3259,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   	 * to GPU, and we can ignore the cache flush because it'll happen
>   	 * again at bind time.
>   	 */
> -	if (obj->pages == NULL)
> +	if (!obj->mm.pages)
>   		return false;
>   
>   	/*
> @@ -3294,7 +3283,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>   	}
>   
>   	trace_i915_gem_object_clflush(obj);
> -	drm_clflush_sg(obj->pages);
> +	drm_clflush_sg(obj->mm.pages);
>   	obj->cache_dirty = false;
>   
>   	return true;
> @@ -3408,7 +3397,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   	 * continue to assume that the obj remained out of the CPU cached
>   	 * domain.
>   	 */
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ret;
>   
> @@ -3432,7 +3421,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   	if (write) {
>   		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
>   		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
> -		obj->dirty = 1;
> +		obj->mm.dirty = true;
>   	}
>   
>   	trace_i915_gem_object_change_domain(obj,
> @@ -3441,6 +3430,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
>   
>   	/* And bump the LRU for this access */
>   	i915_gem_object_bump_inactive_ggtt(obj);
> +	i915_gem_object_unpin_pages(obj);
>   
>   	return 0;
>   }
> @@ -4210,23 +4200,23 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>   		goto unlock;
>   	}
>   
> -	if (obj->pages &&
> +	if (obj->mm.pages &&
>   	    i915_gem_object_is_tiled(obj) &&
>   	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
> -		if (obj->madv == I915_MADV_WILLNEED)
> -			i915_gem_object_unpin_pages(obj);
> +		if (obj->mm.madv == I915_MADV_WILLNEED)
> +			__i915_gem_object_unpin_pages(obj);
>   		if (args->madv == I915_MADV_WILLNEED)
> -			i915_gem_object_pin_pages(obj);
> +			__i915_gem_object_pin_pages(obj);
>   	}
>   
> -	if (obj->madv != __I915_MADV_PURGED)
> -		obj->madv = args->madv;
> +	if (obj->mm.madv != __I915_MADV_PURGED)
> +		obj->mm.madv = args->madv;
>   
>   	/* if the object is no longer attached, discard its backing storage */
> -	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
> +	if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages)
>   		i915_gem_object_truncate(obj);
>   
> -	args->retained = obj->madv != __I915_MADV_PURGED;
> +	args->retained = obj->mm.madv != __I915_MADV_PURGED;
>   
>   	i915_gem_object_put(obj);
>   unlock:
> @@ -4252,9 +4242,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   	obj->ops = ops;
>   
>   	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
> -	obj->madv = I915_MADV_WILLNEED;
> -	INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL);
> -	mutex_init(&obj->get_page.lock);
> +
> +	obj->mm.madv = I915_MADV_WILLNEED;
> +	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL);
> +	mutex_init(&obj->mm.get_page.lock);
>   
>   	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
>   }
> @@ -4331,7 +4322,7 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj)
>   	 * back the contents from the GPU.
>   	 */
>   
> -	if (obj->madv != I915_MADV_WILLNEED)
> +	if (obj->mm.madv != I915_MADV_WILLNEED)
>   		return false;
>   
>   	if (obj->base.filp == NULL)
> @@ -4373,32 +4364,27 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>   	}
>   	GEM_BUG_ON(obj->bind_count);
>   
> -	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
> -	 * before progressing. */
> -	if (obj->stolen)
> -		i915_gem_object_unpin_pages(obj);
> -
>   	WARN_ON(atomic_read(&obj->frontbuffer_bits));
>   
> -	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
> +	if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED &&
>   	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
>   	    i915_gem_object_is_tiled(obj))
> -		i915_gem_object_unpin_pages(obj);
> +		__i915_gem_object_unpin_pages(obj);
>   
> -	if (WARN_ON(obj->pages_pin_count))
> -		obj->pages_pin_count = 0;
> +	if (obj->ops->release)
> +		obj->ops->release(obj);
> +
> +	if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
> +		obj->mm.pages_pin_count = 0;
>   	if (discard_backing_storage(obj))
> -		obj->madv = I915_MADV_DONTNEED;
> -	i915_gem_object_put_pages(obj);
> +		obj->mm.madv = I915_MADV_DONTNEED;
> +	__i915_gem_object_put_pages(obj);
>   
> -	BUG_ON(obj->pages);
> +	GEM_BUG_ON(obj->mm.pages);
>   
>   	if (obj->base.import_attach)
>   		drm_prime_gem_destroy(&obj->base, NULL);
>   
> -	if (obj->ops->release)
> -		obj->ops->release(obj);
> -
>   	drm_gem_object_release(&obj->base);
>   	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>   
> @@ -4935,14 +4921,13 @@ i915_gem_object_create_from_data(struct drm_device *dev,
>   	if (ret)
>   		goto fail;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		goto fail;
>   
> -	i915_gem_object_pin_pages(obj);
> -	sg = obj->pages;
> +	sg = obj->mm.pages;
>   	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
> -	obj->dirty = 1;		/* Backing store is now out of date */
> +	obj->mm.dirty = true; /* Backing store is now out of date */
>   	i915_gem_object_unpin_pages(obj);
>   
>   	if (WARN_ON(bytes != size)) {
> @@ -4962,7 +4947,7 @@ static struct scatterlist *
>   __i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>   			 unsigned long n, unsigned int *offset)
>   {
> -	struct scatterlist *sg = obj->pages->sgl;
> +	struct scatterlist *sg = obj->mm.pages->sgl;
>   	int idx = 0;
>   
>   	while (idx + __sg_page_count(sg) <= n) {
> @@ -4980,11 +4965,11 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
>   		       unsigned long n,
>   		       unsigned int *offset)
>   {
> -	struct i915_gem_object_page_iter *iter = &obj->get_page;
> +	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
>   	struct scatterlist *sg;
>   
>   	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
> -	GEM_BUG_ON(obj->pages_pin_count == 0);
> +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>   
>   	if (n < READ_ONCE(iter->sg_idx))
>   		goto lookup;
> @@ -5058,7 +5043,7 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
>   	struct page *page;
>   
>   	page = i915_gem_object_get_page(obj, n);
> -	if (!obj->dirty)
> +	if (!obj->mm.dirty)
>   		set_page_dirty(page);
>   
>   	return page;
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index 3934c9103cf2..6b656822bb3a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -131,11 +131,10 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>   			return obj;
>   	}
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		return ERR_PTR(ret);
>   
>   	list_move_tail(&obj->batch_pool_link, list);
> -	i915_gem_object_pin_pages(obj);
>   	return obj;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 97c9d68b45df..10441dc72e73 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -48,12 +48,10 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>   	if (ret)
>   		goto err;
>   
> -	ret = i915_gem_object_get_pages(obj);
> +	ret = i915_gem_object_pin_pages(obj);
>   	if (ret)
>   		goto err_unlock;
>   
> -	i915_gem_object_pin_pages(obj);
> -
>   	/* Copy sg so that we make an independent mapping */
>   	st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
>   	if (st == NULL) {
> @@ -61,13 +59,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>   		goto err_unpin;
>   	}
>   
> -	ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
> +	ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
>   	if (ret)
>   		goto err_free;
>   
> -	src = obj->pages->sgl;
> +	src = obj->mm.pages->sgl;
>   	dst = st->sgl;
> -	for (i = 0; i < obj->pages->nents; i++) {
> +	for (i = 0; i < obj->mm.pages->nents; i++) {
>   		sg_set_page(dst, sg_page(src), src->length, 0);
>   		dst = sg_next(dst);
>   		src = sg_next(src);
> @@ -299,14 +297,14 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
>   	if (IS_ERR(sg))
>   		return PTR_ERR(sg);
>   
> -	obj->pages = sg;
> +	obj->mm.pages = sg;
>   	return 0;
>   }
>   
>   static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
>   {
>   	dma_buf_unmap_attachment(obj->base.import_attach,
> -				 obj->pages, DMA_BIDIRECTIONAL);
> +				 obj->mm.pages, DMA_BIDIRECTIONAL);
>   }
>   
>   static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0deecd4e3b6c..062e098b0909 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1290,7 +1290,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>   
>   	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>   
> -	obj->dirty = 1; /* be paranoid  */
> +	obj->mm.dirty = true; /* be paranoid  */
>   
>   	/* Add a reference if we're newly entering the active list.
>   	 * The order in which we add operations to the retirement queue is
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
> index 8df1fa7234e8..398856160656 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence.c
> @@ -648,7 +648,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
>   		return;
>   
>   	i = 0;
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
>   		char new_bit_17 = page_to_phys(page) >> 17;
>   		if ((new_bit_17 & 0x1) !=
>   		    (test_bit(i, obj->bit_17) != 0)) {
> @@ -687,7 +687,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
>   
>   	i = 0;
>   
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
>   		if (page_to_phys(page) & (1 << 17))
>   			__set_bit(i, obj->bit_17);
>   		else
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7e0c98576e72..3c711e0b8a3f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -175,7 +175,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
>   {
>   	u32 pte_flags = 0;
>   
> -	vma->pages = vma->obj->pages;
> +	vma->pages = vma->obj->mm.pages;
>   
>   	/* Currently applicable only to VLV */
>   	if (vma->obj->gt_ro)
> @@ -2295,7 +2295,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
>   int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>   {
>   	if (!dma_map_sg(&obj->base.dev->pdev->dev,
> -			obj->pages->sgl, obj->pages->nents,
> +			obj->mm.pages->sgl, obj->mm.pages->nents,
>   			PCI_DMA_BIDIRECTIONAL))
>   		return -ENOSPC;
>   
> @@ -2685,7 +2685,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
>   		}
>   	}
>   
> -	dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents,
> +	dma_unmap_sg(kdev, obj->mm.pages->sgl, obj->mm.pages->nents,
>   		     PCI_DMA_BIDIRECTIONAL);
>   }
>   
> @@ -3526,7 +3526,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info,
>   
>   	/* Populate source page list from the object. */
>   	i = 0;
> -	for_each_sgt_dma(dma_addr, sgt_iter, obj->pages)
> +	for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
>   		page_addr_list[i++] = dma_addr;
>   
>   	GEM_BUG_ON(i != n_pages);
> @@ -3617,7 +3617,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>   		return 0;
>   
>   	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
> -		vma->pages = vma->obj->pages;
> +		vma->pages = vma->obj->mm.pages;
>   	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
>   		vma->pages =
>   			intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
> index 534a61c1aba2..24eb347f9e0a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_internal.c
> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> @@ -28,10 +28,11 @@
>   
>   static void internal_free_pages(struct sg_table *st)
>   {
> -	struct sg_page_iter sg_iter;
> +	struct sgt_iter iter;
> +	struct page *page;
>   
> -	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
> -		put_page(sg_page_iter_page(&sg_iter));
> +	for_each_sgt_page(page, iter, st)
> +		put_page(page);
>   
>   	sg_free_table(st);
>   	kfree(st);
> @@ -94,10 +95,10 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   	if (!swiotlb_nr_tbl())
>   #endif
>   		sg_mark_end(sg);
> -	obj->pages = st;
> +	obj->mm.pages = st;
>   
>   	if (i915_gem_gtt_prepare_object(obj)) {
> -		obj->pages = NULL;
> +		obj->mm.pages = NULL;
>   		goto err;
>   	}
>   
> @@ -106,7 +107,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   	 * and the caller is expected to repopulate - the contents of this
>   	 * object are only valid whilst active and pinned.
>   	 */
> -	obj->madv = I915_MADV_DONTNEED;
> +	obj->mm.madv = I915_MADV_DONTNEED;
>   	return 0;
>   
>   err:
> @@ -117,10 +118,10 @@ err:
>   
>   static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
>   {
> -	internal_free_pages(obj->pages);
> +	internal_free_pages(obj->mm.pages);
>   
> -	obj->dirty = 0;
> -	obj->madv = I915_MADV_WILLNEED;
> +	obj->mm.dirty = false;
> +	obj->mm.madv = I915_MADV_WILLNEED;
>   }
>   
>   static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index adb00c5125ad..64d9712ec789 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -230,7 +230,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
>   		return 0;
>   
>   	/* Recreate the page after shrinking */
> -	if (!so->vma->obj->pages)
> +	if (!so->vma->obj->mm.pages)
>   		so->batch_offset = -1;
>   
>   	ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 1c237d02f30b..e0e6d68fe470 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -78,7 +78,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>   	 * to the GPU, simply unbinding from the GPU is not going to succeed
>   	 * in releasing our pin count on the pages themselves.
>   	 */
> -	if (obj->pages_pin_count > obj->bind_count)
> +	if (obj->mm.pages_pin_count > obj->bind_count)
>   		return false;
>   
>   	if (any_vma_pinned(obj))
> @@ -88,7 +88,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>   	 * discard the contents (because the user has marked them as being
>   	 * purgeable) or if we can move their contents out to swap.
>   	 */
> -	return swap_available() || obj->madv == I915_MADV_DONTNEED;
> +	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
>   }
>   
>   /**
> @@ -175,11 +175,11 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   			list_move_tail(&obj->global_list, &still_in_list);
>   
>   			if (flags & I915_SHRINK_PURGEABLE &&
> -			    obj->madv != I915_MADV_DONTNEED)
> +			    obj->mm.madv != I915_MADV_DONTNEED)
>   				continue;
>   
>   			if (flags & I915_SHRINK_VMAPS &&
> -			    !is_vmalloc_addr(obj->mapping))
> +			    !is_vmalloc_addr(obj->mm.mapping))
>   				continue;
>   
>   			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
> @@ -193,7 +193,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   
>   			/* For the unbound phase, this should be a no-op! */
>   			i915_gem_object_unbind(obj);
> -			if (i915_gem_object_put_pages(obj) == 0)
> +			if (__i915_gem_object_put_pages(obj) == 0)
>   				count += obj->base.size >> PAGE_SHIFT;
>   
>   			i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 24bad4e60ef0..6d139d6b4609 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -554,16 +554,17 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
>   static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
>   {
>   	/* Should only be called during free */
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
> -
>   static void
>   i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>   
> +	__i915_gem_object_unpin_pages(obj);
> +
>   	if (obj->stolen) {
>   		i915_gem_stolen_remove_node(dev_priv, obj->stolen);
>   		kfree(obj->stolen);
> @@ -589,15 +590,16 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>   	drm_gem_private_object_init(dev, &obj->base, stolen->size);
>   	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
>   
> -	obj->pages = i915_pages_create_for_stolen(dev,
> -						  stolen->start, stolen->size);
> -	if (obj->pages == NULL)
> +	obj->mm.pages = i915_pages_create_for_stolen(dev,
> +						     stolen->start,
> +						     stolen->size);
> +	if (!obj->mm.pages)
>   		goto cleanup;
>   
> -	obj->get_page.sg_pos = obj->pages->sgl;
> -	obj->get_page.sg_idx = 0;
> +	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> +	obj->mm.get_page.sg_idx = 0;
>   
> -	i915_gem_object_pin_pages(obj);
> +	__i915_gem_object_pin_pages(obj);
>   	obj->stolen = stolen;
>   
>   	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
> @@ -717,14 +719,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>   		goto err;
>   	}
>   
> -	vma->pages = obj->pages;
> +	vma->pages = obj->mm.pages;
>   	vma->flags |= I915_VMA_GLOBAL_BIND;
>   	__i915_vma_set_map_and_fenceable(vma);
>   	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
>   	obj->bind_count++;
>   
>   	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
> -	i915_gem_object_pin_pages(obj);
> +	__i915_gem_object_pin_pages(obj);
>   
>   	return obj;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index a14b1e3d4c78..7286de7bd25e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -260,13 +260,13 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>   		if (!err) {
>   			struct i915_vma *vma;
>   
> -			if (obj->pages &&
> -			    obj->madv == I915_MADV_WILLNEED &&
> +			if (obj->mm.pages &&
> +			    obj->mm.madv == I915_MADV_WILLNEED &&
>   			    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
>   				if (args->tiling_mode == I915_TILING_NONE)
> -					i915_gem_object_unpin_pages(obj);
> +					__i915_gem_object_unpin_pages(obj);
>   				if (!i915_gem_object_is_tiled(obj))
> -					i915_gem_object_pin_pages(obj);
> +					__i915_gem_object_pin_pages(obj);
>   			}
>   
>   			list_for_each_entry(vma, &obj->vma_list, obj_link) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index cb95789da76e..0fdd1d6723d1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -73,10 +73,10 @@ static void cancel_userptr(struct work_struct *work)
>   	/* Cancel any active worker and force us to re-evaluate gup */
>   	obj->userptr.work = NULL;
>   
> -	if (obj->pages != NULL) {
> +	if (obj->mm.pages) {
>   		/* We are inside a kthread context and can't be interrupted */
>   		WARN_ON(i915_gem_object_unbind(obj));
> -		WARN_ON(i915_gem_object_put_pages(obj));
> +		WARN_ON(__i915_gem_object_put_pages(obj));
>   	}
>   
>   	i915_gem_object_put(obj);
> @@ -432,15 +432,15 @@ __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
>   {
>   	int ret;
>   
> -	ret = st_set_pages(&obj->pages, pvec, num_pages);
> +	ret = st_set_pages(&obj->mm.pages, pvec, num_pages);
>   	if (ret)
>   		return ret;
>   
>   	ret = i915_gem_gtt_prepare_object(obj);
>   	if (ret) {
> -		sg_free_table(obj->pages);
> -		kfree(obj->pages);
> -		obj->pages = NULL;
> +		sg_free_table(obj->mm.pages);
> +		kfree(obj->mm.pages);
> +		obj->mm.pages = NULL;
>   	}
>   
>   	return ret;
> @@ -526,8 +526,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   			if (ret == 0) {
>   				list_add_tail(&obj->global_list,
>   					      &to_i915(dev)->mm.unbound_list);
> -				obj->get_page.sg_pos = obj->pages->sgl;
> -				obj->get_page.sg_idx = 0;
> +				obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> +				obj->mm.get_page.sg_idx = 0;
>   				pinned = 0;
>   			}
>   		}
> @@ -668,22 +668,22 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
>   	BUG_ON(obj->userptr.work != NULL);
>   	__i915_gem_userptr_set_active(obj, false);
>   
> -	if (obj->madv != I915_MADV_WILLNEED)
> -		obj->dirty = 0;
> +	if (obj->mm.madv != I915_MADV_WILLNEED)
> +		obj->mm.dirty = false;
>   
>   	i915_gem_gtt_finish_object(obj);
>   
> -	for_each_sgt_page(page, sgt_iter, obj->pages) {
> -		if (obj->dirty)
> +	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +		if (obj->mm.dirty)
>   			set_page_dirty(page);
>   
>   		mark_page_accessed(page);
>   		put_page(page);
>   	}
> -	obj->dirty = 0;
> +	obj->mm.dirty = false;
>   
> -	sg_free_table(obj->pages);
> -	kfree(obj->pages);
> +	sg_free_table(obj->mm.pages);
> +	kfree(obj->mm.pages);
>   }
>   
>   static void
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6c22be28ba01..628d5cdf9200 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -881,8 +881,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>   	err->write_domain = obj->base.write_domain;
>   	err->fence_reg = vma->fence ? vma->fence->id : -1;
>   	err->tiling = i915_gem_object_get_tiling(obj);
> -	err->dirty = obj->dirty;
> -	err->purgeable = obj->madv != I915_MADV_WILLNEED;
> +	err->dirty = obj->mm.dirty;
> +	err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
>   	err->userptr = obj->userptr.mm != NULL;
>   	err->cache_level = obj->cache_level;
>   }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 02c48d390148..6acecfc41f5e 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -744,7 +744,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx,
>   	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
>   		i915_ggtt_offset(ce->ring->vma);
>   
> -	ce->state->obj->dirty = true;
> +	ce->state->obj->mm.dirty = true;
>   
>   	/* Invalidate GuC TLB. */
>   	if (i915.enable_guc_submission) {
> @@ -2042,7 +2042,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>   		DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
>   		return ret;
>   	}
> -	ctx_obj->dirty = true;
> +	ctx_obj->mm.dirty = true;
>   
>   	/* The second page of the context object contains some fields which must
>   	 * be set up prior to the first execution. */
> @@ -2179,7 +2179,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
>   			reg[CTX_RING_HEAD+1] = 0;
>   			reg[CTX_RING_TAIL+1] = 0;
>   
> -			ce->state->obj->dirty = true;
> +			ce->state->obj->mm.dirty = true;
>   			i915_gem_object_unpin_map(ce->state->obj);
>   
>   			ce->ring->head = ce->ring->tail = 0;

Did not spot any issues. Hopefully CI is good enough to helps us if we 
missed something.

It even looks better (simpler) than the current API which was always 
slightly confusing.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 20/42] drm/i915: Implement pread without struct-mutex
  2016-10-07  9:46 ` [PATCH 20/42] drm/i915: Implement pread without struct-mutex Chris Wilson
@ 2016-10-12 12:53   ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-12 12:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> @@ -871,7 +845,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length,
>  /* Only difference to the fast-path function is that this can handle bit17
>   * and uses non-atomic copy and kmap functions. */
>  static int
> -shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pread_slow(struct page *page, int offset, int length,

Lets maybe call this shmem_pread_cpu? I understand it's been faster on
recent generations even for swizzling. Can be added as follow-up.

> +static int
> +i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
> +		     struct drm_i915_gem_pread *args)
> +{
> +	char __user *user_data;
> +	u64 remain;
> +	unsigned int obj_do_bit17_swizzling;
> +	unsigned int needs_clflush;
> +	unsigned int idx, offset;
> +	int ret;
> +
> +	obj_do_bit17_swizzling = 0;
> +	if (i915_gem_object_needs_bit17_swizzle(obj))
> +		obj_do_bit17_swizzling = 1 << 17;

Could use BIT(17) to make it super explicit.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas

PS. Something like C context-aware diff would be super, would make
reading these much more fun. Or maybe patchwork 2-way diff view.
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 16/42] drm/i915: Refactor object page API
  2016-10-07  9:46 ` [PATCH 16/42] drm/i915: Refactor object page API Chris Wilson
  2016-10-10 10:54   ` John Harrison
  2016-10-11 11:23   ` Tvrtko Ursulin
@ 2016-10-13 11:04   ` Joonas Lahtinen
  2016-10-13 11:10     ` Chris Wilson
  2 siblings, 1 reply; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-13 11:04 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> +static inline int __must_check
> +i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>  {
> -	BUG_ON(obj->pages == NULL);
> -	obj->pages_pin_count++;
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);

\n here.

> +	if (obj->mm.pages_pin_count++)
> +		return 0;
> +
> +	return __i915_gem_object_get_pages(obj);

I also second John here, what's up with the dummy wrapper convetion.
Add TODO comments if you intend to introduce new stuff relying on that.

> +}
> +
> +static inline void
> +__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);

Ditto.

> +	GEM_BUG_ON(!obj->mm.pages);
> +	obj->mm.pages_pin_count++;
> +}
> +
> +static inline bool
> +i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
> +{
> +	return obj->mm.pages_pin_count;
> +}
> +
> +static inline void
> +__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> +	GEM_BUG_ON(!obj->mm.pages);

\n at least here.

> +	obj->mm.pages_pin_count--;
>  }
>  

<SNIP>

> @@ -812,7 +809,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
>  		obj->cache_dirty = true;
>  
>  	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
> -	obj->dirty = 1;
> +	obj->mm.dirty = true;

The type is not bool dirty : 1, do we get yelled at by compiler?

> @@ -1268,7 +1261,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>  		goto out_unpin;
>  
>  	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
> -	obj->dirty = true;
> +	obj->mm.dirty = true;

I assume not as this has not been changed. Consistency is good anyway.

> @@ -2483,24 +2474,25 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	if (obj->pages)
> +	if (obj->mm.pages)
>  		return 0;
>  
> -	if (obj->madv != I915_MADV_WILLNEED) {
> +	if (obj->mm.madv != I915_MADV_WILLNEED) {
>  		DRM_DEBUG("Attempting to obtain a purgeable object\n");
> +		__i915_gem_object_unpin_pages(obj);

Confusing to have teardown of another function in here.

>  		return -EFAULT;
>  	}
>  
> -	BUG_ON(obj->pages_pin_count);
> -
>  	ret = ops->get_pages(obj);
> -	if (ret)
> +	if (ret) {
> +		__i915_gem_object_unpin_pages(obj);

And if you like *really* have to, at least try not to duplicate code.
Bonus points form moving this to be proper teardown path where it has a
counter-part.

Strange side effects on failing function call do not lead to very
understandable code structure, but major spaghetti. Can be fixed in
follow-up patch too.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 16/42] drm/i915: Refactor object page API
  2016-10-13 11:04   ` Joonas Lahtinen
@ 2016-10-13 11:10     ` Chris Wilson
  0 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-13 11:10 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Oct 13, 2016 at 02:04:18PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> > @@ -2483,24 +2474,25 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> >  
> >  	lockdep_assert_held(&obj->base.dev->struct_mutex);
> >  
> > -	if (obj->pages)
> > +	if (obj->mm.pages)
> >  		return 0;
> >  
> > -	if (obj->madv != I915_MADV_WILLNEED) {
> > +	if (obj->mm.madv != I915_MADV_WILLNEED) {
> >  		DRM_DEBUG("Attempting to obtain a purgeable object\n");
> > +		__i915_gem_object_unpin_pages(obj);
> 
> Confusing to have teardown of another function in here.
> 
> >  		return -EFAULT;
> >  	}
> >  
> > -	BUG_ON(obj->pages_pin_count);
> > -
> >  	ret = ops->get_pages(obj);
> > -	if (ret)
> > +	if (ret) {
> > +		__i915_gem_object_unpin_pages(obj);
> 
> And if you like *really* have to, at least try not to duplicate code.
> Bonus points form moving this to be proper teardown path where it has a
> counter-part.

It is here, so that the unlikely error handling is not inside the
inlined function, but where we expect the code to grow to handle the
more complex locking requirements.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 21/42] drm/i915: Implement pwrite without struct-mutex
  2016-10-07  9:46 ` [PATCH 21/42] drm/i915: Implement pwrite " Chris Wilson
@ 2016-10-13 11:17   ` Joonas Lahtinen
  2016-10-13 11:54     ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-13 11:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> +/* Per-page copy function for the shmem pwrite fastpath.
> + * Flushes invalid cachelines before writing to the target if
> + * needs_clflush_before is set and flushes out any written cachelines after
> + * writing if needs_clflush is set.
> + */
>  static int
> -i915_gem_shmem_pwrite(struct drm_device *dev,
> -		      struct drm_i915_gem_object *obj,
> -		      struct drm_i915_gem_pwrite *args,
> -		      struct drm_file *file)
> +shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
> +	     bool page_do_bit17_swizzling,
> +	     bool needs_clflush_before,
> +	     bool needs_clflush_after)

I remember having complaints of two bool arguments in same func. Could
these tree be fixed while mangling the code otherwise too? Or as a
follow-up.

> +static int
> +i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> +		      const struct drm_i915_gem_pwrite *args)
> +{
> +	void __user *user_data;
> +	u64 remain;
> +	unsigned int obj_do_bit17_swizzling;
> +	unsigned int partial_cacheline_write;

partial_cacheline_mask might be more descriptive?

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 22/42] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain
  2016-10-07  9:46 ` [PATCH 22/42] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
@ 2016-10-13 11:47   ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-13 11:47 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> @@ -1499,25 +1523,40 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
>  				   MAX_SCHEDULE_TIMEOUT,
>  				   to_rps_client(file));
>  	if (ret)
> -		goto err;
> +		goto err_unlocked;
> +
> +	/* Flush and acquire obj->pages so that we are coherent through
> +	 * direct access in memory with previous cached writes through
> +	 * shmemfs and that our cache domain tracking remains valid.
> +	 * For example, if the obj->filp was moved to swap without us
> +	 * being notified and releasing the pages, we would mistakenly
> +	 * continue to assume that the obj remained out of the CPU cached
> +	 * domain.
> +	 */
> +	ret = i915_gem_object_pin_pages(obj);
> +	if (ret)
> +		goto err_unlocked;
>  
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
> -		goto err;
> +		goto err_pages;
>  
>  	if (read_domains & I915_GEM_DOMAIN_GTT)
>  		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
>  	else
>  		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
>  
> -	if (write_domain != 0)
> -		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
> +	/* And bump the LRU for this access */
> +	i915_gem_object_bump_inactive_ggtt(obj);
>  
> -	i915_gem_object_put(obj);
>  	mutex_unlock(&dev->struct_mutex);
> -	return ret;
>  
> -err:
> +	if (write_domain != 0)
> +		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
> +
> +err_pages:

out_pages: as it's a shared path now.

Code motion would still be great as separate patches.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 21/42] drm/i915: Implement pwrite without struct-mutex
  2016-10-13 11:17   ` Joonas Lahtinen
@ 2016-10-13 11:54     ` Chris Wilson
  2016-10-14  7:08       ` Joonas Lahtinen
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-13 11:54 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Oct 13, 2016 at 02:17:52PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> > +/* Per-page copy function for the shmem pwrite fastpath.
> > + * Flushes invalid cachelines before writing to the target if
> > + * needs_clflush_before is set and flushes out any written cachelines after
> > + * writing if needs_clflush is set.
> > + */
> >  static int
> > -i915_gem_shmem_pwrite(struct drm_device *dev,
> > -		      struct drm_i915_gem_object *obj,
> > -		      struct drm_i915_gem_pwrite *args,
> > -		      struct drm_file *file)
> > +shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
> > +	     bool page_do_bit17_swizzling,
> > +	     bool needs_clflush_before,
> > +	     bool needs_clflush_after)
> 
> I remember having complaints of two bool arguments in same func. Could
> these tree be fixed while mangling the code otherwise too? Or as a
> follow-up.

They are three different evaluations on each loop. Looks to be bit of a
hassle to amalgamate them into one in the caller. Though

(pages_to_phys() & BIT(17)) |
((offset | length) & 63) |
(needs_clflush & 2)

almost works (just need to choose another bit for needs_clflush). I'm
not convinced you'd like the unpacking in shmem_pwrite() ;)
Something like:

static int
shmem_pwrite_slow(struct page *page, int offset, int length,
                  char __user *user_data,
                  unsigned flags)
{
        char *vaddr;
        int ret;

        vaddr = kmap(page);
        if (unlikely(flags & (BIT(17) | 63)))
                shmem_clflush_swizzled_range(vaddr + offset, length, flags);
        if (flags & BIT(17))
                ret = __copy_from_user_swizzled(vaddr, offset, user_data,
                                                length);
        else
                ret = __copy_from_user(vaddr + offset, user_data, length);
        if (flags & BIT(16))
                shmem_clflush_swizzled_range(vaddr + offset, length, flags));
        kunmap(page);

        return ret ? -EFAULT : 0;
}

> > +static int
> > +i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
> > +		      const struct drm_i915_gem_pwrite *args)
> > +{
> > +	void __user *user_data;
> > +	u64 remain;
> > +	unsigned int obj_do_bit17_swizzling;
> > +	unsigned int partial_cacheline_write;
> 
> partial_cacheline_mask might be more descriptive?

To me that says we want to mask off the partial cacheline, or use them
constructively. Imho, 

	len & partial_cacheline_write

reads more clearly as a boolean question than

	len & partial_cacheline_mask.

-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 19/42] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex
  2016-10-07  9:46 ` [PATCH 19/42] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
@ 2016-10-13 11:54   ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-13 11:54 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> @@ -72,21 +68,18 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>  	}
>  
>  	if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
> -		ret =-ENOMEM;
> +		ret = -ENOMEM;
>  		goto err_free_sg;
>  	}
>  
> -	mutex_unlock(&obj->base.dev->struct_mutex);
>  	return st;
>  
>  err_free_sg:
>  	sg_free_table(st);
>  err_free:
>  	kfree(st);
> -err_unpin:
> +err_put_pages:
>  	i915_gem_object_unpin_pages(obj);

I do not quite follow the logic leading to rename in here.

> @@ -177,15 +155,22 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
>  {
>  	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
>  	struct drm_device *dev = obj->base.dev;
> -	int ret;
>  	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
> +	int ret;
>  
> -	ret = i915_mutex_lock_interruptible(dev);
> +	ret = i915_gem_object_pin_pages(obj);
>  	if (ret)
>  		return ret;
>  
> +	ret = i915_mutex_lock_interruptible(dev);
> +	if (ret)
> +		goto err;
> +
>  	ret = i915_gem_object_set_to_cpu_domain(obj, write);
>  	mutex_unlock(&dev->struct_mutex);
> +
> +err:

combined path, 'out:'
 
> @@ -195,13 +180,19 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
>  	struct drm_device *dev = obj->base.dev;
>  	int ret;
>  
> -	ret = i915_mutex_lock_interruptible(dev);
> +	ret = i915_gem_object_pin_pages(obj);
>  	if (ret)
>  		return ret;
>  
> +	ret = i915_mutex_lock_interruptible(dev);
> +	if (ret)
> +		goto err;
> +
>  	ret = i915_gem_object_set_to_gtt_domain(obj, false);
> 	mutex_unlock(&dev->struct_mutex);
>  
> +err:

Ditto.

With above fixed;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 18/42] drm/i915: Move object backing storage manipulation to its own locking
  2016-10-07  9:46 ` [PATCH 18/42] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
@ 2016-10-13 12:46   ` Joonas Lahtinen
  2016-10-13 12:56     ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-13 12:46 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> @@ -4211,10 +4240,10 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>  		i915_gem_object_truncate(obj);
>  
>  	args->retained = obj->mm.madv != __I915_MADV_PURGED;
> +	mutex_unlock(&obj->mm.lock);
>  
> +err:

Shared, so 'out:'

Other than that, looks good.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Am I to assume that now when we introduced a new lock, every single
function that assumes the lock taken is now tagged with
assert_lockdep_held?

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 18/42] drm/i915: Move object backing storage manipulation to its own locking
  2016-10-13 12:46   ` Joonas Lahtinen
@ 2016-10-13 12:56     ` Chris Wilson
  0 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-13 12:56 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Thu, Oct 13, 2016 at 03:46:04PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> > @@ -4211,10 +4240,10 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
> >  		i915_gem_object_truncate(obj);
> >  
> >  	args->retained = obj->mm.madv != __I915_MADV_PURGED;
> > +	mutex_unlock(&obj->mm.lock);
> >  
> > +err:
> 
> Shared, so 'out:'
> 
> Other than that, looks good.
> 
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> 
> Am I to assume that now when we introduced a new lock, every single
> function that assumes the lock taken is now tagged with
> assert_lockdep_held?

There's are two external calls that assume the caller holds the obj->mm.lock,
and yes they are so marked.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 21/42] drm/i915: Implement pwrite without struct-mutex
  2016-10-13 11:54     ` Chris Wilson
@ 2016-10-14  7:08       ` Joonas Lahtinen
  0 siblings, 0 replies; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-14  7:08 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On to, 2016-10-13 at 12:54 +0100, Chris Wilson wrote:
> To me that says we want to mask off the partial cacheline, or use them
> constructively. Imho, 
> 
> 	len & partial_cacheline_write
> 
> reads more clearly as a boolean question than
> 
> 	len & partial_cacheline_mask.

For me they convey the same value in this context, but alone,
partial_cacheline_write sounds like bool. You can combine it too into
write_mask as an option, too. But not a big deal after all.

Regards, joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend
  2016-10-07  9:46 ` [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
@ 2016-10-14  9:12   ` Joonas Lahtinen
  2016-10-14  9:24     ` Chris Wilson
  2016-10-14  9:28   ` Tvrtko Ursulin
  1 sibling, 1 reply; 107+ messages in thread
From: Joonas Lahtinen @ 2016-10-14  9:12 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> @@ -2376,6 +2374,19 @@ __deprecated
>  extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
>  
>  static inline bool
> +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
> +{
> +	return atomic_read(&obj->base.refcount.refcount) == 0;
> +}

Seems like this ought to be in DRM? Todo item.

> +
> +#if IS_ENABLED(CONFIG_LOCKDEP)
> +#define lockdep_assert_held_unless(lock, cond) \
> +	GEM_BUG_ON(!lockdep_is_held(lock) && !(cond))
> +#else
> +#define lockdep_assert_held_unless(lock, cond)
> +#endif

Ugh.

> @@ -646,21 +647,22 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>  
>  	active = false;
>  	if (pinned < 0)
> -		ret = pinned, pinned = 0;
> +		pages = ERR_PTR(pinned), pinned = 0;

Just convert to curly braces.

> 	else if (pinned < num_pages)
> -		ret = __i915_gem_userptr_get_pages_schedule(obj, &active);
> +		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
>  	else
> -		ret = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
> -	if (ret) {
> +		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
> +	if (IS_ERR(pages)) {
>  		__i915_gem_userptr_set_active(obj, active);
>  		release_pages(pvec, pinned, 0);
>  	}
>  	drm_free_large(pvec);
> -	return ret;
> +	return pages;
>  }
>  

Horrible patch to review, mix of different changes all in one.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regars, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend
  2016-10-14  9:12   ` Joonas Lahtinen
@ 2016-10-14  9:24     ` Chris Wilson
  0 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-14  9:24 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, Oct 14, 2016 at 12:12:32PM +0300, Joonas Lahtinen wrote:
> On pe, 2016-10-07 at 10:46 +0100, Chris Wilson wrote:
> > @@ -2376,6 +2374,19 @@ __deprecated
> >  extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
> >  
> >  static inline bool
> > +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
> > +{
> > +	return atomic_read(&obj->base.refcount.refcount) == 0;
> > +}
> 
> Seems like this ought to be in DRM? Todo item.
> > +
> > +#if IS_ENABLED(CONFIG_LOCKDEP)
> > +#define lockdep_assert_held_unless(lock, cond) \
> > +	GEM_BUG_ON(!lockdep_is_held(lock) && !(cond))
> > +#else
> > +#define lockdep_assert_held_unless(lock, cond)
> > +#endif
> 
> Ugh.

These are temporaries to keep lockdep asserts working until removed later
in the series (but I hope they illustrate the change in locking regime
being prepared for).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend
  2016-10-07  9:46 ` [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
  2016-10-14  9:12   ` Joonas Lahtinen
@ 2016-10-14  9:28   ` Tvrtko Ursulin
  2016-10-14  9:43     ` Chris Wilson
  1 sibling, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-14  9:28 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 36128 bytes --]


[sending the draft reply until I get another look at it]

On 07/10/2016 10:46, Chris Wilson wrote:
> The plan is to move obj->pages out from under the struct_mutex into its
> own per-object lock. We need to prune any assumption of the struct_mutex
> from the get_pages/put_pages backends, and to make it easier we pass
> around the sg_table to operate on rather than indirectly via the obj.
>
> Signed-off-by: Chris Wilson<chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |  36 +++++--
>   drivers/gpu/drm/i915/i915_gem.c          | 172 +++++++++++++++----------------
>   drivers/gpu/drm/i915/i915_gem_dmabuf.c   |  20 ++--
>   drivers/gpu/drm/i915/i915_gem_fence.c    |  18 ++--
>   drivers/gpu/drm/i915/i915_gem_gtt.c      |  19 ++--
>   drivers/gpu/drm/i915/i915_gem_gtt.h      |   6 +-
>   drivers/gpu/drm/i915/i915_gem_internal.c |  22 ++--
>   drivers/gpu/drm/i915/i915_gem_shrinker.c |  11 +-
>   drivers/gpu/drm/i915/i915_gem_stolen.c   |  43 ++++----
>   drivers/gpu/drm/i915/i915_gem_userptr.c  |  88 ++++++++--------
>   10 files changed, 227 insertions(+), 208 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3c22d49005fe..271e63c8f037 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2175,8 +2175,8 @@ struct drm_i915_gem_object_ops {
>   	 * being released or under memory pressure (where we attempt to
>   	 * reap pages for the shrinker).
>   	 */
> -	int (*get_pages)(struct drm_i915_gem_object *);
> -	void (*put_pages)(struct drm_i915_gem_object *);
> +	struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
> +	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);

Idea is that put_pages vfunc does not need struct mutex?  Or it acquires 
it on demand inside it, which means struct mutex will nest inside the 
mm.lock?

Would it be cleaner to have two vfuncs?

>   
>   	int (*dmabuf_export)(struct drm_i915_gem_object *);
>   	void (*release)(struct drm_i915_gem_object *);
> @@ -2313,8 +2313,6 @@ struct drm_i915_gem_object {
>   	struct i915_gem_userptr {
>   		uintptr_t ptr;
>   		unsigned read_only :1;
> -		unsigned workers :4;
> -#define I915_GEM_USERPTR_MAX_WORKERS 15

Why are you removing this? Perhaps I am misremembering that the reason 
for having it was to avoid queuing up to many userptr get_pages workers? 
It cannot happen any longer?

[this is as far as I got to the other day, then I started looking 
forward in patches to try to pick up the design but it became too much]

Regards,

Tvrtko

>   
>   		struct i915_mm_struct *mm;
>   		struct i915_mmu_object *mmu_object;
> @@ -2376,6 +2374,19 @@ __deprecated
>   extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
>   
>   static inline bool
> +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
> +{
> +	return atomic_read(&obj->base.refcount.refcount) == 0;
> +}
> +
> +#if IS_ENABLED(CONFIG_LOCKDEP)
> +#define lockdep_assert_held_unless(lock, cond) \
> +	GEM_BUG_ON(!lockdep_is_held(lock) && !(cond))
> +#else
> +#define lockdep_assert_held_unless(lock, cond)
> +#endif
> +
> +static inline bool
>   i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
>   {
>   	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
> @@ -3188,6 +3199,8 @@ dma_addr_t
>   i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
>   				unsigned long n);
>   
> +void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
> +				 struct sg_table *pages);
>   int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
>   
>   static inline int __must_check
> @@ -3203,7 +3216,8 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>   static inline void
>   __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
>   {
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
> +				   i915_gem_object_is_dead(obj));
>   	GEM_BUG_ON(!obj->mm.pages);
>   	obj->mm.pages_pin_count++;
>   }
> @@ -3217,7 +3231,8 @@ i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
>   static inline void
>   __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   {
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +	lockdep_assert_held_unless(&obj->base.dev->struct_mutex,
> +				   i915_gem_object_is_dead(obj));
>   	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
>   	GEM_BUG_ON(!obj->mm.pages);
>   	obj->mm.pages_pin_count--;
> @@ -3228,7 +3243,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>   	__i915_gem_object_unpin_pages(obj);
>   }
>   
> -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
> +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
> +void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
>   
>   enum i915_map_type {
>   	I915_MAP_WB = 0,
> @@ -3451,8 +3467,10 @@ i915_vma_unpin_fence(struct i915_vma *vma)
>   void i915_gem_restore_fences(struct drm_device *dev);
>   
>   void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
> -void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj);
> -void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
> +void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
> +				       struct sg_table *pages);
> +void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
> +					 struct sg_table *pages);
>   
>   /* i915_gem_context.c */
>   int __must_check i915_gem_context_init(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index df774ddf62ae..620fe8ac6477 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -169,7 +169,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   }
>   
> -static int
> +static struct sg_table *
>   i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>   {
>   	struct address_space *mapping = obj->base.filp->f_mapping;
> @@ -179,7 +179,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>   	int i;
>   
>   	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> -		return -EINVAL;
> +		return ERR_PTR(-EINVAL);
>   
>   	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
>   		struct page *page;
> @@ -187,7 +187,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>   
>   		page = shmem_read_mapping_page(mapping, i);
>   		if (IS_ERR(page))
> -			return PTR_ERR(page);
> +			return ERR_CAST(page);
>   
>   		src = kmap_atomic(page);
>   		memcpy(vaddr, src, PAGE_SIZE);
> @@ -202,11 +202,11 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>   
>   	st = kmalloc(sizeof(*st), GFP_KERNEL);
>   	if (st == NULL)
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   
>   	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
>   		kfree(st);
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   	}
>   
>   	sg = st->sgl;
> @@ -216,28 +216,30 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>   	sg_dma_address(sg) = obj->phys_handle->busaddr;
>   	sg_dma_len(sg) = obj->base.size;
>   
> -	obj->mm.pages = st;
> -	return 0;
> +	return st;
>   }
>   
>   static void
> -i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj)
>   {
> -	int ret;
> -
>   	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
>   
> -	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> -	if (WARN_ON(ret)) {
> -		/* In the event of a disaster, abandon all caches and
> -		 * hope for the best.
> -		 */
> -		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> -	}
> -
>   	if (obj->mm.madv == I915_MADV_DONTNEED)
>   		obj->mm.dirty = false;
>   
> +	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
> +		i915_gem_clflush_object(obj, false);
> +
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
> +			       struct sg_table *pages)
> +{
> +	__i915_gem_object_release_shmem(obj);
> +
>   	if (obj->mm.dirty) {
>   		struct address_space *mapping = obj->base.filp->f_mapping;
>   		char *vaddr = obj->phys_handle->vaddr;
> @@ -265,8 +267,8 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
>   		obj->mm.dirty = false;
>   	}
>   
> -	sg_free_table(obj->mm.pages);
> -	kfree(obj->mm.pages);
> +	sg_free_table(pages);
> +	kfree(pages);
>   }
>   
>   static void
> @@ -517,9 +519,9 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   	if (ret)
>   		return ret;
>   
> -	ret = __i915_gem_object_put_pages(obj);
> -	if (ret)
> -		return ret;
> +	__i915_gem_object_put_pages(obj);
> +	if (obj->mm.pages)
> +		return -EBUSY;
>   
>   	/* create a new object */
>   	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
> @@ -535,7 +537,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>   static int
>   i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>   		     struct drm_i915_gem_pwrite *args,
> -		     struct drm_file *file_priv)
> +		     struct drm_file *file)
>   {
>   	struct drm_device *dev = obj->base.dev;
>   	void *vaddr = obj->phys_handle->vaddr + args->offset;
> @@ -551,7 +553,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>   				   I915_WAIT_LOCKED |
>   				   I915_WAIT_ALL,
>   				   MAX_SCHEDULE_TIMEOUT,
> -				   to_rps_client(file_priv));
> +				   to_rps_client(file));
>   	if (ret)
>   		return ret;
>   
> @@ -2225,8 +2227,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
>   }
>   
>   /* Try to discard unwanted pages */
> -static void
> -i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
> +void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
>   {
>   	struct address_space *mapping;
>   
> @@ -2245,32 +2246,20 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
>   }
>   
>   static void
> -i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
> +i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
> +			      struct sg_table *pages)
>   {
>   	struct sgt_iter sgt_iter;
>   	struct page *page;
> -	int ret;
>   
> -	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
> -
> -	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> -	if (WARN_ON(ret)) {
> -		/* In the event of a disaster, abandon all caches and
> -		 * hope for the best.
> -		 */
> -		i915_gem_clflush_object(obj, true);
> -		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> -	}
> +	__i915_gem_object_release_shmem(obj);
>   
> -	i915_gem_gtt_finish_object(obj);
> +	i915_gem_gtt_finish_pages(obj, pages);
>   
>   	if (i915_gem_object_needs_bit17_swizzle(obj))
> -		i915_gem_object_save_bit_17_swizzle(obj);
> -
> -	if (obj->mm.madv == I915_MADV_DONTNEED)
> -		obj->mm.dirty = false;
> +		i915_gem_object_save_bit_17_swizzle(obj, pages);
>   
> -	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +	for_each_sgt_page(page, sgt_iter, pages) {
>   		if (obj->mm.dirty)
>   			set_page_dirty(page);
>   
> @@ -2281,8 +2270,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
>   	}
>   	obj->mm.dirty = false;
>   
> -	sg_free_table(obj->mm.pages);
> -	kfree(obj->mm.pages);
> +	sg_free_table(pages);
> +	kfree(pages);
>   }
>   
>   static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
> @@ -2294,24 +2283,22 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
>   		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
>   }
>   
> -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   {
> -	const struct drm_i915_gem_object_ops *ops = obj->ops;
> +	struct sg_table *pages;
>   
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   
> -	if (!obj->mm.pages)
> -		return 0;
> -
>   	if (i915_gem_object_has_pinned_pages(obj))
> -		return -EBUSY;
> +		return;
>   
>   	GEM_BUG_ON(obj->bind_count);
>   
>   	/* ->put_pages might need to allocate memory for the bit17 swizzle
>   	 * array, hence protect them from being reaped by removing them from gtt
>   	 * lists early. */
> -	list_del(&obj->global_list);
> +	pages = fetch_and_zero(&obj->mm.pages);
> +	GEM_BUG_ON(!pages);
>   
>   	if (obj->mm.mapping) {
>   		void *ptr;
> @@ -2327,15 +2314,10 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
>   
>   	__i915_gem_object_reset_page_iter(obj);
>   
> -	ops->put_pages(obj);
> -	obj->mm.pages = NULL;
> -
> -	i915_gem_object_invalidate(obj);
> -
> -	return 0;
> +	obj->ops->put_pages(obj, pages);
>   }
>   
> -static int
> +static struct sg_table *
>   i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> @@ -2353,17 +2335,17 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>   	 * wasn't in the GTT, there shouldn't be any way it could have been in
>   	 * a GPU cache
>   	 */
> -	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
> -	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
> +	GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
> +	GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
>   
>   	st = kmalloc(sizeof(*st), GFP_KERNEL);
>   	if (st == NULL)
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   
>   	page_count = obj->base.size / PAGE_SIZE;
>   	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
>   		kfree(st);
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   	}
>   
>   	/* Get the list of pages out of our struct file.  They'll be pinned
> @@ -2423,20 +2405,19 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>   	if (!swiotlb_nr_tbl())
>   #endif
>   		sg_mark_end(sg);
> -	obj->mm.pages = st;
>   
> -	ret = i915_gem_gtt_prepare_object(obj);
> +	ret = i915_gem_gtt_prepare_pages(obj, st);
>   	if (ret)
>   		goto err_pages;
>   
>   	if (i915_gem_object_needs_bit17_swizzle(obj))
> -		i915_gem_object_do_bit_17_swizzle(obj);
> +		i915_gem_object_do_bit_17_swizzle(obj, st);
>   
>   	if (i915_gem_object_is_tiled(obj) &&
>   	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
>   		__i915_gem_object_pin_pages(obj);
>   
> -	return 0;
> +	return st;
>   
>   err_pages:
>   	sg_mark_end(sg);
> @@ -2456,7 +2437,35 @@ err_pages:
>   	if (ret == -ENOSPC)
>   		ret = -ENOMEM;
>   
> -	return ret;
> +	return ERR_PTR(ret);
> +}
> +
> +void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
> +				 struct sg_table *pages)
> +{
> +	lockdep_assert_held(&obj->base.dev->struct_mutex);
> +
> +	obj->mm.get_page.sg_pos = pages->sgl;
> +	obj->mm.get_page.sg_idx = 0;
> +
> +	obj->mm.pages = pages;
> +}
> +
> +static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct sg_table *pages;
> +
> +	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
> +		DRM_DEBUG("Attempting to obtain a purgeable object\n");
> +		return -EFAULT;
> +	}
> +
> +	pages = obj->ops->get_pages(obj);
> +	if (unlikely(IS_ERR(pages)))
> +		return PTR_ERR(pages);
> +
> +	__i915_gem_object_set_pages(obj, pages);
> +	return 0;
>   }
>   
>   /* Ensure that the associated pages are gathered from the backing storage
> @@ -2468,33 +2477,18 @@ err_pages:
>    */
>   int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
>   {
> -	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> -	const struct drm_i915_gem_object_ops *ops = obj->ops;
> -	int ret;
> +	int err;
>   
>   	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   
>   	if (obj->mm.pages)
>   		return 0;
>   
> -	if (obj->mm.madv != I915_MADV_WILLNEED) {
> -		DRM_DEBUG("Attempting to obtain a purgeable object\n");
> -		__i915_gem_object_unpin_pages(obj);
> -		return -EFAULT;
> -	}
> -
> -	ret = ops->get_pages(obj);
> -	if (ret) {
> +	err = ____i915_gem_object_get_pages(obj);
> +	if (err)
>   		__i915_gem_object_unpin_pages(obj);
> -		return ret;
> -	}
>   
> -	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
> -
> -	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> -	obj->mm.get_page.sg_idx = 0;
> -
> -	return 0;
> +	return err;
>   }
>   
>   /* The 'mapping' part of i915_gem_object_pin_map() below */
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 10441dc72e73..2abd524aba14 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -289,22 +289,18 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
>   	return dma_buf;
>   }
>   
> -static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
> +static struct sg_table *
> +i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
>   {
> -	struct sg_table *sg;
> -
> -	sg = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL);
> -	if (IS_ERR(sg))
> -		return PTR_ERR(sg);
> -
> -	obj->mm.pages = sg;
> -	return 0;
> +	return dma_buf_map_attachment(obj->base.import_attach,
> +				      DMA_BIDIRECTIONAL);
>   }
>   
> -static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj)
> +static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
> +					     struct sg_table *pages)
>   {
> -	dma_buf_unmap_attachment(obj->base.import_attach,
> -				 obj->mm.pages, DMA_BIDIRECTIONAL);
> +	dma_buf_unmap_attachment(obj->base.import_attach, pages,
> +				 DMA_BIDIRECTIONAL);
>   }
>   
>   static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
> index 398856160656..834d649496f3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence.c
> @@ -628,6 +628,7 @@ i915_gem_swizzle_page(struct page *page)
>   /**
>    * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling
>    * @obj: i915 GEM buffer object
> + * @pages: the scattergather list of physical pages
>    *
>    * This function fixes up the swizzling in case any page frame number for this
>    * object has changed in bit 17 since that state has been saved with
> @@ -638,7 +639,8 @@ i915_gem_swizzle_page(struct page *page)
>    * by swapping them out and back in again).
>    */
>   void
> -i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
> +i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
> +				  struct sg_table *pages)
>   {
>   	struct sgt_iter sgt_iter;
>   	struct page *page;
> @@ -648,10 +650,9 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
>   		return;
>   
>   	i = 0;
> -	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +	for_each_sgt_page(page, sgt_iter, pages) {
>   		char new_bit_17 = page_to_phys(page) >> 17;
> -		if ((new_bit_17 & 0x1) !=
> -		    (test_bit(i, obj->bit_17) != 0)) {
> +		if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) {
>   			i915_gem_swizzle_page(page);
>   			set_page_dirty(page);
>   		}
> @@ -662,21 +663,22 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
>   /**
>    * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling
>    * @obj: i915 GEM buffer object
> + * @pages: the scattergather list of physical pages
>    *
>    * This function saves the bit 17 of each page frame number so that swizzling
>    * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must
>    * be called before the backing storage can be unpinned.
>    */
>   void
> -i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
> +i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
> +				    struct sg_table *pages)
>   {
>   	struct sgt_iter sgt_iter;
>   	struct page *page;
> -	int page_count = obj->base.size >> PAGE_SHIFT;
>   	int i;
>   
>   	if (obj->bit_17 == NULL) {
> -		obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
> +		obj->bit_17 = kcalloc(BITS_TO_LONGS(obj->base.size >> PAGE_SHIFT),
>   				      sizeof(long), GFP_KERNEL);
>   		if (obj->bit_17 == NULL) {
>   			DRM_ERROR("Failed to allocate memory for bit 17 "
> @@ -687,7 +689,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
>   
>   	i = 0;
>   
> -	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +	for_each_sgt_page(page, sgt_iter, pages) {
>   		if (page_to_phys(page) & (1 << 17))
>   			__set_bit(i, obj->bit_17);
>   		else
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 3c711e0b8a3f..578026584f42 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2292,14 +2292,15 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
>   	i915_ggtt_flush(dev_priv);
>   }
>   
> -int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> +int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
> +			       struct sg_table *pages)
>   {
> -	if (!dma_map_sg(&obj->base.dev->pdev->dev,
> -			obj->mm.pages->sgl, obj->mm.pages->nents,
> -			PCI_DMA_BIDIRECTIONAL))
> -		return -ENOSPC;
> +	if (dma_map_sg(&obj->base.dev->pdev->dev,
> +		       pages->sgl, pages->nents,
> +		       PCI_DMA_BIDIRECTIONAL))
> +		return 0;
>   
> -	return 0;
> +	return -ENOSPC;
>   }
>   
>   static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> @@ -2671,7 +2672,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
>   					 true);
>   }
>   
> -void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
> +void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
> +			       struct sg_table *pages)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
>   	struct device *kdev = &dev_priv->drm.pdev->dev;
> @@ -2685,8 +2687,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
>   		}
>   	}
>   
> -	dma_unmap_sg(kdev, obj->mm.pages->sgl, obj->mm.pages->nents,
> -		     PCI_DMA_BIDIRECTIONAL);
> +	dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
>   }
>   
>   static void i915_gtt_color_adjust(struct drm_mm_node *node,
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index bd93fb8f99d2..c34945f21323 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -629,8 +629,10 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
>   void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
>   void i915_gem_restore_gtt_mappings(struct drm_device *dev);
>   
> -int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
> -void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
> +int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
> +					    struct sg_table *pages);
> +void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
> +			       struct sg_table *pages);
>   
>   /* Flags used by pin/bind&friends. */
>   #define PIN_NONBLOCK		BIT(0)
> diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
> index 24eb347f9e0a..b0c56b311248 100644
> --- a/drivers/gpu/drm/i915/i915_gem_internal.c
> +++ b/drivers/gpu/drm/i915/i915_gem_internal.c
> @@ -38,7 +38,8 @@ static void internal_free_pages(struct sg_table *st)
>   	kfree(st);
>   }
>   
> -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
> +static struct sg_table *
> +i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   {
>   	const unsigned int npages = obj->base.size / PAGE_SIZE;
>   	struct sg_table *st;
> @@ -49,11 +50,11 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   
>   	st = kmalloc(sizeof(*st), GFP_KERNEL);
>   	if (!st)
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   
>   	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
>   		kfree(st);
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   	}
>   
>   	sg = st->sgl;
> @@ -95,12 +96,9 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   	if (!swiotlb_nr_tbl())
>   #endif
>   		sg_mark_end(sg);
> -	obj->mm.pages = st;
>   
> -	if (i915_gem_gtt_prepare_object(obj)) {
> -		obj->mm.pages = NULL;
> +	if (i915_gem_gtt_prepare_pages(obj, st))
>   		goto err;
> -	}
>   
>   	/* Mark the pages as dontneed whilst they are still pinned. As soon
>   	 * as they are unpinned they are allowed to be reaped by the shrinker,
> @@ -108,17 +106,19 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>   	 * object are only valid whilst active and pinned.
>   	 */
>   	obj->mm.madv = I915_MADV_DONTNEED;
> -	return 0;
> +	return st;
>   
>   err:
>   	sg_mark_end(sg);
>   	internal_free_pages(st);
> -	return -ENOMEM;
> +	return ERR_PTR(-ENOMEM);
>   }
>   
> -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj)
> +static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
> +					       struct sg_table *pages)
>   {
> -	internal_free_pages(obj->mm.pages);
> +	i915_gem_gtt_finish_pages(obj, pages);
> +	internal_free_pages(pages);
>   
>   	obj->mm.dirty = false;
>   	obj->mm.madv = I915_MADV_WILLNEED;
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index e0e6d68fe470..a8c3d37b95b9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -91,6 +91,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
>   	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
>   }
>   
> +static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	if (i915_gem_object_unbind(obj) == 0)
> +		__i915_gem_object_put_pages(obj);
> +	return !obj->mm.pages;
> +}
> +
>   /**
>    * i915_gem_shrink - Shrink buffer object caches
>    * @dev_priv: i915 device
> @@ -191,9 +198,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>   
>   			i915_gem_object_get(obj);
>   
> -			/* For the unbound phase, this should be a no-op! */
> -			i915_gem_object_unbind(obj);
> -			if (__i915_gem_object_put_pages(obj) == 0)
> +			if (unsafe_drop_pages(obj))
>   				count += obj->base.size >> PAGE_SHIFT;
>   
>   			i915_gem_object_put(obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 6d139d6b4609..4166eafa25dd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -545,17 +545,20 @@ i915_pages_create_for_stolen(struct drm_device *dev,
>   	return st;
>   }
>   
> -static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
> +static struct sg_table *
> +i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
>   {
> -	BUG();
> -	return -EINVAL;
> +	return i915_pages_create_for_stolen(obj->base.dev,
> +					    obj->stolen->start,
> +					    obj->stolen->size);
>   }
>   
> -static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj)
> +static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
> +					     struct sg_table *pages)
>   {
>   	/* Should only be called during free */
> -	sg_free_table(obj->mm.pages);
> -	kfree(obj->mm.pages);
> +	sg_free_table(pages);
> +	kfree(pages);
>   }
>   
>   static void
> @@ -590,21 +593,13 @@ _i915_gem_object_create_stolen(struct drm_device *dev,
>   	drm_gem_private_object_init(dev, &obj->base, stolen->size);
>   	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
>   
> -	obj->mm.pages = i915_pages_create_for_stolen(dev,
> -						     stolen->start,
> -						     stolen->size);
> -	if (!obj->mm.pages)
> -		goto cleanup;
> -
> -	obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> -	obj->mm.get_page.sg_idx = 0;
> -
> -	__i915_gem_object_pin_pages(obj);
>   	obj->stolen = stolen;
> -
>   	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
>   	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
>   
> +	if (i915_gem_object_pin_pages(obj))
> +		goto cleanup;
> +
>   	return obj;
>   
>   cleanup:
> @@ -699,10 +694,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>   	if (gtt_offset == I915_GTT_OFFSET_NONE)
>   		return obj;
>   
> +	ret = i915_gem_object_pin_pages(obj);
> +	if (ret)
> +		goto err;
> +
>   	vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL);
>   	if (IS_ERR(vma)) {
>   		ret = PTR_ERR(vma);
> -		goto err;
> +		goto err_pages;
>   	}
>   
>   	/* To simplify the initialisation sequence between KMS and GTT,
> @@ -716,20 +715,20 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev,
>   	ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node);
>   	if (ret) {
>   		DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
> -		goto err;
> +		goto err_pages;
>   	}
>   
>   	vma->pages = obj->mm.pages;
>   	vma->flags |= I915_VMA_GLOBAL_BIND;
>   	__i915_vma_set_map_and_fenceable(vma);
>   	list_move_tail(&vma->vm_link, &ggtt->base.inactive_list);
> +	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
>   	obj->bind_count++;
>   
> -	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
> -	__i915_gem_object_pin_pages(obj);
> -
>   	return obj;
>   
> +err_pages:
> +	i915_gem_object_unpin_pages(obj);
>   err:
>   	i915_gem_object_put(obj);
>   	return NULL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 0fdd1d6723d1..e97c7b589439 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -73,11 +73,14 @@ static void cancel_userptr(struct work_struct *work)
>   	/* Cancel any active worker and force us to re-evaluate gup */
>   	obj->userptr.work = NULL;
>   
> -	if (obj->mm.pages) {
> -		/* We are inside a kthread context and can't be interrupted */
> -		WARN_ON(i915_gem_object_unbind(obj));
> -		WARN_ON(__i915_gem_object_put_pages(obj));
> -	}
> +	/* We are inside a kthread context and can't be interrupted */
> +	if (i915_gem_object_unbind(obj) == 0)
> +		__i915_gem_object_put_pages(obj);
> +	WARN_ONCE(obj->mm.pages,
> +		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n",
> +		  obj->bind_count,
> +		  obj->mm.pages_pin_count,
> +		  obj->pin_display);
>   
>   	i915_gem_object_put(obj);
>   	mutex_unlock(&dev->struct_mutex);
> @@ -426,24 +429,25 @@ err:
>   	return ret;
>   }
>   
> -static int
> +static struct sg_table *
>   __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
>   			     struct page **pvec, int num_pages)
>   {
> +	struct sg_table *pages;
>   	int ret;
>   
> -	ret = st_set_pages(&obj->mm.pages, pvec, num_pages);
> +	ret = st_set_pages(&pages, pvec, num_pages);
>   	if (ret)
> -		return ret;
> +		return ERR_PTR(ret);
>   
> -	ret = i915_gem_gtt_prepare_object(obj);
> +	ret = i915_gem_gtt_prepare_pages(obj, pages);
>   	if (ret) {
> -		sg_free_table(obj->mm.pages);
> -		kfree(obj->mm.pages);
> -		obj->mm.pages = NULL;
> +		sg_free_table(pages);
> +		kfree(pages);
> +		return ERR_PTR(ret);
>   	}
>   
> -	return ret;
> +	return pages;
>   }
>   
>   static int
> @@ -521,20 +525,20 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   
>   	mutex_lock(&dev->struct_mutex);
>   	if (obj->userptr.work == &work->work) {
> +		struct sg_table *pages = ERR_PTR(ret);
> +
>   		if (pinned == npages) {
> -			ret = __i915_gem_userptr_set_pages(obj, pvec, npages);
> -			if (ret == 0) {
> -				list_add_tail(&obj->global_list,
> -					      &to_i915(dev)->mm.unbound_list);
> -				obj->mm.get_page.sg_pos = obj->mm.pages->sgl;
> -				obj->mm.get_page.sg_idx = 0;
> +			pages = __i915_gem_userptr_set_pages(obj, pvec, npages);
> +			if (!IS_ERR(pages)) {
> +				__i915_gem_object_set_pages(obj, pages);
>   				pinned = 0;
> +				pages = NULL;
>   			}
>   		}
> -		obj->userptr.work = ERR_PTR(ret);
> +
> +		obj->userptr.work = ERR_CAST(pages);
>   	}
>   
> -	obj->userptr.workers--;
>   	i915_gem_object_put(obj);
>   	mutex_unlock(&dev->struct_mutex);
>   
> @@ -545,7 +549,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>   	kfree(work);
>   }
>   
> -static int
> +static struct sg_table *
>   __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
>   				      bool *active)
>   {
> @@ -570,15 +574,11 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
>   	 * that error back to this function through
>   	 * obj->userptr.work = ERR_PTR.
>   	 */
> -	if (obj->userptr.workers >= I915_GEM_USERPTR_MAX_WORKERS)
> -		return -EAGAIN;
> -
>   	work = kmalloc(sizeof(*work), GFP_KERNEL);
>   	if (work == NULL)
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   
>   	obj->userptr.work = &work->work;
> -	obj->userptr.workers++;
>   
>   	work->obj = i915_gem_object_get(obj);
>   
> @@ -589,14 +589,15 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj,
>   	schedule_work(&work->work);
>   
>   	*active = true;
> -	return -EAGAIN;
> +	return ERR_PTR(-EAGAIN);
>   }
>   
> -static int
> +static struct sg_table *
>   i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>   {
>   	const int num_pages = obj->base.size >> PAGE_SHIFT;
>   	struct page **pvec;
> +	struct sg_table *pages;
>   	int pinned, ret;
>   	bool active;
>   
> @@ -620,15 +621,15 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>   	if (obj->userptr.work) {
>   		/* active flag should still be held for the pending work */
>   		if (IS_ERR(obj->userptr.work))
> -			return PTR_ERR(obj->userptr.work);
> +			return ERR_CAST(obj->userptr.work);
>   		else
> -			return -EAGAIN;
> +			return ERR_PTR(-EAGAIN);
>   	}
>   
>   	/* Let the mmu-notifier know that we have begun and need cancellation */
>   	ret = __i915_gem_userptr_set_active(obj, true);
>   	if (ret)
> -		return ret;
> +		return ERR_PTR(ret);
>   
>   	pvec = NULL;
>   	pinned = 0;
> @@ -637,7 +638,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>   				      GFP_TEMPORARY);
>   		if (pvec == NULL) {
>   			__i915_gem_userptr_set_active(obj, false);
> -			return -ENOMEM;
> +			return ERR_PTR(-ENOMEM);
>   		}
>   
>   		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> @@ -646,21 +647,22 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>   
>   	active = false;
>   	if (pinned < 0)
> -		ret = pinned, pinned = 0;
> +		pages = ERR_PTR(pinned), pinned = 0;
>   	else if (pinned < num_pages)
> -		ret = __i915_gem_userptr_get_pages_schedule(obj, &active);
> +		pages = __i915_gem_userptr_get_pages_schedule(obj, &active);
>   	else
> -		ret = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
> -	if (ret) {
> +		pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
> +	if (IS_ERR(pages)) {
>   		__i915_gem_userptr_set_active(obj, active);
>   		release_pages(pvec, pinned, 0);
>   	}
>   	drm_free_large(pvec);
> -	return ret;
> +	return pages;
>   }
>   
>   static void
> -i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
> +			   struct sg_table *pages)
>   {
>   	struct sgt_iter sgt_iter;
>   	struct page *page;
> @@ -671,9 +673,9 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
>   	if (obj->mm.madv != I915_MADV_WILLNEED)
>   		obj->mm.dirty = false;
>   
> -	i915_gem_gtt_finish_object(obj);
> +	i915_gem_gtt_finish_pages(obj, pages);
>   
> -	for_each_sgt_page(page, sgt_iter, obj->mm.pages) {
> +	for_each_sgt_page(page, sgt_iter, pages) {
>   		if (obj->mm.dirty)
>   			set_page_dirty(page);
>   
> @@ -682,8 +684,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
>   	}
>   	obj->mm.dirty = false;
>   
> -	sg_free_table(obj->mm.pages);
> -	kfree(obj->mm.pages);
> +	sg_free_table(pages);
> +	kfree(pages);
>   }
>   
>   static void


[-- Attachment #1.2: Type: text/html, Size: 35369 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend
  2016-10-14  9:28   ` Tvrtko Ursulin
@ 2016-10-14  9:43     ` Chris Wilson
  2016-10-17 10:52       ` Tvrtko Ursulin
  0 siblings, 1 reply; 107+ messages in thread
From: Chris Wilson @ 2016-10-14  9:43 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Oct 14, 2016 at 10:28:42AM +0100, Tvrtko Ursulin wrote:
>  diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>  index 3c22d49005fe..271e63c8f037 100644
>  --- a/drivers/gpu/drm/i915/i915_drv.h
>  +++ b/drivers/gpu/drm/i915/i915_drv.h
>  @@ -2175,8 +2175,8 @@ struct drm_i915_gem_object_ops {
>           * being released or under memory pressure (where we attempt to
>           * reap pages for the shrinker).
>           */
>  -       int (*get_pages)(struct drm_i915_gem_object *);
>  -       void (*put_pages)(struct drm_i915_gem_object *);
>  +       struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
>  +       void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
> 
>    Idea is that put_pages vfunc does not need struct mutex?  Or it acquires
>    it on demand inside it, which means struct mutex will nest inside the
>    mm.lock?

Right, it loses the mutex within get/put pages and I am no longer
concerned about the number of workers. Though that is mainly because of
another patch to improve execbuf + userptr workloads.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend
  2016-10-14  9:43     ` Chris Wilson
@ 2016-10-17 10:52       ` Tvrtko Ursulin
  2016-10-17 11:08         ` Chris Wilson
  0 siblings, 1 reply; 107+ messages in thread
From: Tvrtko Ursulin @ 2016-10-17 10:52 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx


On 14/10/2016 10:43, Chris Wilson wrote:
> On Fri, Oct 14, 2016 at 10:28:42AM +0100, Tvrtko Ursulin wrote:
>>   diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>   index 3c22d49005fe..271e63c8f037 100644
>>   --- a/drivers/gpu/drm/i915/i915_drv.h
>>   +++ b/drivers/gpu/drm/i915/i915_drv.h
>>   @@ -2175,8 +2175,8 @@ struct drm_i915_gem_object_ops {
>>            * being released or under memory pressure (where we attempt to
>>            * reap pages for the shrinker).
>>            */
>>   -       int (*get_pages)(struct drm_i915_gem_object *);
>>   -       void (*put_pages)(struct drm_i915_gem_object *);
>>   +       struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
>>   +       void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
>>
>>     Idea is that put_pages vfunc does not need struct mutex?  Or it acquires
>>     it on demand inside it, which means struct mutex will nest inside the
>>     mm.lock?
> Right, it loses the mutex within get/put pages and I am no longer
> concerned about the number of workers. Though that is mainly because of
> another patch to improve execbuf + userptr workloads.

Is that other patch before or after this one? Or in a different series 
altogether? In any case, why should this change be in this patch, 
especially when it is not commented anywhere?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

* Re: [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend
  2016-10-17 10:52       ` Tvrtko Ursulin
@ 2016-10-17 11:08         ` Chris Wilson
  0 siblings, 0 replies; 107+ messages in thread
From: Chris Wilson @ 2016-10-17 11:08 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Oct 17, 2016 at 11:52:27AM +0100, Tvrtko Ursulin wrote:
> 
> On 14/10/2016 10:43, Chris Wilson wrote:
> >On Fri, Oct 14, 2016 at 10:28:42AM +0100, Tvrtko Ursulin wrote:
> >>  diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >>  index 3c22d49005fe..271e63c8f037 100644
> >>  --- a/drivers/gpu/drm/i915/i915_drv.h
> >>  +++ b/drivers/gpu/drm/i915/i915_drv.h
> >>  @@ -2175,8 +2175,8 @@ struct drm_i915_gem_object_ops {
> >>           * being released or under memory pressure (where we attempt to
> >>           * reap pages for the shrinker).
> >>           */
> >>  -       int (*get_pages)(struct drm_i915_gem_object *);
> >>  -       void (*put_pages)(struct drm_i915_gem_object *);
> >>  +       struct sg_table *(*get_pages)(struct drm_i915_gem_object *);
> >>  +       void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
> >>
> >>    Idea is that put_pages vfunc does not need struct mutex?  Or it acquires
> >>    it on demand inside it, which means struct mutex will nest inside the
> >>    mm.lock?
> >Right, it loses the mutex within get/put pages and I am no longer
> >concerned about the number of workers. Though that is mainly because of
> >another patch to improve execbuf + userptr workloads.
> 
> Is that other patch before or after this one? Or in a different
> series altogether? In any case, why should this change be in this
> patch, especially when it is not commented anywhere?

Actually, it follows from 

commit 364c8172edb5ff0b4650bbd4c45eaab46b84b008
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu Aug 18 17:16:58 2016 +0100

    drm/i915/userptr: Make gup errors stickier

which was the prelude to the patch I was thinking of. The current code
already doesn't use more than one worker simultaneously.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 107+ messages in thread

end of thread, other threads:[~2016-10-17 11:08 UTC | newest]

Thread overview: 107+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-07  9:45 Explicit fencing on multiple timelines, again Chris Wilson
2016-10-07  9:45 ` [PATCH 01/42] drm/i915: Allow disabling error capture Chris Wilson
2016-10-07  9:45 ` [PATCH 02/42] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
2016-10-07 10:11   ` Joonas Lahtinen
2016-10-07  9:45 ` [PATCH 03/42] drm/i915: Always use the GTT for error capture Chris Wilson
2016-10-07  9:45 ` [PATCH 04/42] drm/i915: Consolidate error object printing Chris Wilson
2016-10-07  9:45 ` [PATCH 05/42] drm/i915: Compress GPU objects in error state Chris Wilson
2016-10-07  9:45 ` [PATCH 06/42] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
2016-10-07  9:56   ` Joonas Lahtinen
2016-10-07 15:51   ` Tvrtko Ursulin
2016-10-07 16:12     ` Chris Wilson
2016-10-07 16:16       ` Tvrtko Ursulin
2016-10-07 16:37         ` Chris Wilson
2016-10-08  8:23           ` Tvrtko Ursulin
2016-10-08  8:58             ` Chris Wilson
2016-10-07  9:46 ` [PATCH 07/42] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
2016-10-07 16:10   ` Tvrtko Ursulin
2016-10-07 16:22     ` Chris Wilson
2016-10-08  8:21       ` Tvrtko Ursulin
2016-10-07  9:46 ` [PATCH 08/42] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
2016-10-07  9:58   ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 09/42] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
2016-10-07  9:46 ` [PATCH 10/42] drm/i915: Defer active reference until required Chris Wilson
2016-10-07 16:35   ` Tvrtko Ursulin
2016-10-07 16:58     ` Chris Wilson
2016-10-08  8:18       ` Tvrtko Ursulin
2016-10-07  9:46 ` [PATCH 11/42] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2016-10-07 10:01   ` Joonas Lahtinen
2016-10-07 16:52   ` Tvrtko Ursulin
2016-10-07 17:08     ` Chris Wilson
2016-10-08  8:12       ` Tvrtko Ursulin
2016-10-08  8:32         ` Chris Wilson
2016-10-08  8:34         ` [PATCH v2] " Chris Wilson
2016-10-10  7:01           ` Joonas Lahtinen
2016-10-10  8:11           ` Tvrtko Ursulin
2016-10-10  8:19             ` Chris Wilson
2016-10-10  8:25               ` Tvrtko Ursulin
2016-10-07  9:46 ` [PATCH 12/42] drm/i915: Reuse the active golden render state batch Chris Wilson
2016-10-07  9:46 ` [PATCH 13/42] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
2016-10-07  9:46 ` [PATCH 14/42] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
2016-10-07 10:12   ` Joonas Lahtinen
2016-10-07 11:05     ` Chris Wilson
2016-10-07 11:33       ` Joonas Lahtinen
2016-10-07 13:36   ` John Harrison
2016-10-11  9:32   ` Tvrtko Ursulin
2016-10-11 10:15     ` John Harrison
2016-10-07  9:46 ` [PATCH 15/42] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
2016-10-07 13:46   ` John Harrison
2016-10-07  9:46 ` [PATCH 16/42] drm/i915: Refactor object page API Chris Wilson
2016-10-10 10:54   ` John Harrison
2016-10-11 11:23   ` Tvrtko Ursulin
2016-10-13 11:04   ` Joonas Lahtinen
2016-10-13 11:10     ` Chris Wilson
2016-10-07  9:46 ` [PATCH 17/42] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
2016-10-14  9:12   ` Joonas Lahtinen
2016-10-14  9:24     ` Chris Wilson
2016-10-14  9:28   ` Tvrtko Ursulin
2016-10-14  9:43     ` Chris Wilson
2016-10-17 10:52       ` Tvrtko Ursulin
2016-10-17 11:08         ` Chris Wilson
2016-10-07  9:46 ` [PATCH 18/42] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
2016-10-13 12:46   ` Joonas Lahtinen
2016-10-13 12:56     ` Chris Wilson
2016-10-07  9:46 ` [PATCH 19/42] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
2016-10-13 11:54   ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 20/42] drm/i915: Implement pread without struct-mutex Chris Wilson
2016-10-12 12:53   ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 21/42] drm/i915: Implement pwrite " Chris Wilson
2016-10-13 11:17   ` Joonas Lahtinen
2016-10-13 11:54     ` Chris Wilson
2016-10-14  7:08       ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 22/42] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
2016-10-13 11:47   ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 23/42] drm/i915: Move object release to a freelist + worker Chris Wilson
2016-10-11  9:52   ` John Harrison
2016-10-07  9:46 ` [PATCH 24/42] drm/i915: Treat a framebuffer reference as an active reference whilst shrinking Chris Wilson
2016-10-11  9:54   ` John Harrison
2016-10-07  9:46 ` [PATCH 25/42] drm/i915: Use lockless object free Chris Wilson
2016-10-11  9:56   ` John Harrison
2016-10-07  9:46 ` [PATCH 26/42] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
2016-10-07 10:10   ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 27/42] drm: Add reference counting to drm_atomic_state Chris Wilson
2016-10-07  9:46 ` [PATCH 28/42] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
2016-10-07  9:46 ` [PATCH 29/42] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
2016-10-07  9:46 ` [PATCH 30/42] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
2016-10-07  9:46 ` [PATCH 31/42] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
2016-10-07  9:46 ` [PATCH 32/42] drm/i915: Introduce a global_seqno for each request Chris Wilson
2016-10-07  9:46 ` [PATCH 33/42] drm/i915: Rename ->emit_request to ->emit_breadcrumb Chris Wilson
2016-10-07  9:46 ` [PATCH 34/42] drm/i915: Record space required for breadcrumb emission Chris Wilson
2016-10-07  9:46 ` [PATCH 35/42] drm/i915: Defer " Chris Wilson
2016-10-07  9:46 ` [PATCH 36/42] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
2016-10-07  9:46 ` [PATCH 37/42] drm/i915: Create a unique name for the context Chris Wilson
2016-10-07  9:46 ` [PATCH 38/42] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
2016-10-07  9:46 ` [PATCH 39/42] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
2016-10-07 10:25   ` Joonas Lahtinen
2016-10-07 10:27   ` Joonas Lahtinen
2016-10-07 11:03     ` Chris Wilson
2016-10-07 11:10       ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 40/42] drm/i915: Enable multiple timelines Chris Wilson
2016-10-07 10:29   ` Joonas Lahtinen
2016-10-07 11:00     ` Chris Wilson
2016-10-07 11:07       ` Joonas Lahtinen
2016-10-07  9:46 ` [PATCH 41/42] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-10-07  9:46 ` [PATCH 42/42] drm/i915: Support explicit fencing for execbuf Chris Wilson
2016-10-07 10:19 ` ✗ Fi.CI.BAT: warning for series starting with [01/42] drm/i915: Allow disabling error capture Patchwork
2016-10-10  7:23 ` Patchwork
2016-10-10 15:31 ` ✗ Fi.CI.BAT: failure for series starting with [01/42] drm/i915: Allow disabling error capture (rev2) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.