All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/3] Support for mapping an object page by page
@ 2015-11-09 10:56 ankitprasad.r.sharma
  2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel

From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>

It is possible that when we want to map an object to the aperture, either
we run out of aperture space or the size of the object is larger than
the mappable aperture. In such cases we might not be able to map the whole
object to the aperture. For cases as such, here we introduce insert_page()
which allows us to map a single page in to the mappable aperture space
(which has a higher probabilty of succeeding). This can be iterated
over to access the whole object by using space as meagre as page size.

Here we try to use insert_page() for pwrite_fast in case a nonblocking
pin for the whole object fails, which helps us to iterate over the whole
object and perform the pwrite without mapping the whole object to the
mappable aperture.

We also introduce i915_gem_object_get_dma_address() to perform fast
sequential lookup of the dma address associated with any page within the
object.

Ankitprasad Sharma (1):
  drm/i915: Use insert_page for pwrite_fast

Chris Wilson (2):
  drm/i915: Add support for mapping an object page by page
  drm/i915: Introduce i915_gem_object_get_dma_address()

 drivers/char/agp/intel-gtt.c        |  9 +++++
 drivers/gpu/drm/i915/i915_drv.h     | 17 +++++++++
 drivers/gpu/drm/i915/i915_gem.c     | 75 ++++++++++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c | 49 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h |  5 +++
 include/drm/intel-gtt.h             |  3 ++
 6 files changed, 136 insertions(+), 22 deletions(-)

-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/3] drm/i915: Add support for mapping an object page by page
  2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
@ 2015-11-09 10:56 ` ankitprasad.r.sharma
  2015-11-13 14:57   ` Tvrtko Ursulin
  2015-11-09 10:56 ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address() ankitprasad.r.sharma
  2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
  2 siblings, 1 reply; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel

From: Chris Wilson <chris@chris-wilson.co.uk>

Introduced a new vm specfic callback insert_page() to program a single pte in
ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
space. This can be iterated over to access the whole object by using space as
meagre as page size.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/char/agp/intel-gtt.c        |  9 +++++++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 49 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h |  5 ++++
 include/drm/intel-gtt.h             |  3 +++
 4 files changed, 66 insertions(+)

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 1341a94..7c68576 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags)
 	return false;
 }
 
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags)
+{
+	intel_private.driver->write_entry(addr, pg, flags);
+	wmb();
+}
+EXPORT_SYMBOL(intel_gtt_insert_page);
+
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 016739e..9fd1f857 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2345,6 +2345,23 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 #endif
 }
 
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 unused)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen8_pte_t __iomem *pte =
+		(gen8_pte_t __iomem *)dev_priv->gtt.gsm +
+		(offset >> PAGE_SHIFT);
+
+	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
+	wmb();
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
@@ -2385,6 +2402,23 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen6_pte_t __iomem *pte =
+		(gen6_pte_t __iomem *)dev_priv->gtt.gsm +
+		(offset >> PAGE_SHIFT);
+
+	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
+	wmb();
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
 /*
  * Binds an object into the global gtt with the specified cache level. The object
  * will be accessible to the GPU via commands whose operands reference offsets
@@ -2481,6 +2515,18 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	readl(gtt_base);
 }
 
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level cache_level,
+				  u32 unused)
+{
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *pages,
 				     uint64_t start,
@@ -2996,6 +3042,7 @@ static int gen8_gmch_probe(struct drm_device *dev,
 	ret = ggtt_probe_common(dev, gtt_size);
 
 	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
+	dev_priv->gtt.base.insert_page = gen8_ggtt_insert_page;
 	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
@@ -3038,6 +3085,7 @@ static int gen6_gmch_probe(struct drm_device *dev,
 	ret = ggtt_probe_common(dev, gtt_size);
 
 	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.base.insert_page = gen6_ggtt_insert_page;
 	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
 	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
@@ -3072,6 +3120,7 @@ static int i915_gmch_probe(struct drm_device *dev,
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
+	dev_priv->gtt.base.insert_page = i915_ggtt_insert_page;
 	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
 	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a216397..aae30df 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -320,6 +320,11 @@ struct i915_address_space {
 			    uint64_t start,
 			    uint64_t length,
 			    bool use_scratch);
+	void (*insert_page)(struct i915_address_space *vm,
+			    dma_addr_t addr,
+			    uint64_t offset,
+			    enum i915_cache_level cache_level,
+			    u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       uint64_t start,
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 9e9bddaa5..f49edec 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,6 +13,9 @@ void intel_gmch_remove(void);
 bool intel_enable_gtt(void);
 
 void intel_gtt_chipset_flush(void);
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags);
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address()
  2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
  2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
@ 2015-11-09 10:56 ` ankitprasad.r.sharma
  2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
  2 siblings, 0 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel

From: Chris Wilson <chris@chris-wilson.co.uk>

This utility function is a companion to i915_gem_object_get_page() that
uses the same cached iterator for the scatterlist to perform fast
sequential lookup of the dma address associated with any page within the
object.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d2a546a..548a0eb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2862,6 +2862,23 @@ static inline int __sg_page_count(struct scatterlist *sg)
 	return sg->length >> PAGE_SHIFT;
 }
 
+static inline dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
+{
+	if (n < obj->get_page.last) {
+		obj->get_page.sg = obj->pages->sgl;
+		obj->get_page.last = 0;
+	}
+
+	while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
+		obj->get_page.last += __sg_page_count(obj->get_page.sg++);
+		if (unlikely(sg_is_chain(obj->get_page.sg)))
+			obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
+	}
+
+	return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
+}
+
 static inline struct page *
 i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
 {
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
  2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
  2015-11-09 10:56 ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address() ankitprasad.r.sharma
@ 2015-11-09 10:56 ` ankitprasad.r.sharma
  2015-11-09 12:20   ` kbuild test robot
  2015-11-10  7:55   ` Mika Kuoppala
  2 siblings, 2 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel

From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>

In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).

v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)

v3: Combined loops based on local patch by Chris (Chris)

Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f1e3fde..9d2e6e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
  * user into the GTT, uncached.
  */
 static int
-i915_gem_gtt_pwrite_fast(struct drm_device *dev,
+i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
 			 struct drm_i915_gem_object *obj,
 			 struct drm_i915_gem_pwrite *args,
 			 struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	ssize_t remain;
-	loff_t offset, page_base;
+	struct drm_mm_node node;
+	uint64_t remain, offset;
 	char __user *user_data;
-	int page_offset, page_length, ret;
+	int ret;
 
 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
-	if (ret)
-		goto out;
+	if (ret) {
+		memset(&node, 0, sizeof(node));
+		ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
+							  &node, 4096, 0,
+							  I915_CACHE_NONE, 0,
+							  i915->gtt.mappable_end,
+							  DRM_MM_SEARCH_DEFAULT,
+							  DRM_MM_CREATE_DEFAULT);
+		if (ret)
+			goto out;
+
+		i915_gem_object_pin_pages(obj);
+	} else {
+		node.start = i915_gem_obj_ggtt_offset(obj);
+		node.allocated = false;
+	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
@@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 	if (ret)
 		goto out_unpin;
 
-	user_data = to_user_ptr(args->data_ptr);
-	remain = args->size;
-
-	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
 	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+	obj->dirty = true;
 
-	while (remain > 0) {
+	user_data = to_user_ptr(args->data_ptr);
+	offset = args->offset;
+	remain = args->size;
+	while (remain) {
 		/* Operation in this page
 		 *
 		 * page_base = page offset within aperture
 		 * page_offset = offset within page
 		 * page_length = bytes to copy for this page
 		 */
-		page_base = offset & PAGE_MASK;
-		page_offset = offset_in_page(offset);
-		page_length = remain;
-		if ((page_offset + remain) > PAGE_SIZE)
-			page_length = PAGE_SIZE - page_offset;
-
+		u32 page_base = node.start;
+		unsigned page_offset = offset_in_page(offset);
+		unsigned page_length = PAGE_SIZE - page_offset;
+		page_length = remain < page_length ? remain : page_length;
+		if (node.allocated) {
+			wmb();
+			i915->gtt.base.insert_page(&i915->gtt.base,
+						   i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
+						   node.start,
+						   I915_CACHE_NONE,
+						   0);
+			wmb();
+		} else {
+			page_base += offset & PAGE_MASK;
+		}
 		/* If we get a fault while copying data, then (presumably) our
 		 * source page isn't available.  Return the error and we'll
 		 * retry in the slow path.
 		 */
-		if (fast_user_write(dev_priv->gtt.mappable, page_base,
+		if (fast_user_write(i915->gtt.mappable, page_base,
 				    page_offset, user_data, page_length)) {
 			ret = -EFAULT;
 			goto out_flush;
@@ -821,7 +842,17 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 out_flush:
 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 out_unpin:
-	i915_gem_object_ggtt_unpin(obj);
+	if (node.allocated) {
+		wmb();
+		i915->gtt.base.clear_range(&i915->gtt.base,
+				node.start, node.size,
+				true);
+		drm_mm_remove_node(&node);
+		i915_gem_object_unpin_pages(obj);
+	}
+	else {
+		i915_gem_object_ggtt_unpin(obj);
+	}
 out:
 	return ret;
 }
@@ -1086,7 +1117,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (obj->tiling_mode == I915_TILING_NONE &&
 	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
 	    cpu_write_needs_clflush(obj)) {
-		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
+		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
 		 * textures). Fallback to the shmem path in that case. */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
@ 2015-11-09 12:20   ` kbuild test robot
  2015-11-10  7:55   ` Mika Kuoppala
  1 sibling, 0 replies; 26+ messages in thread
From: kbuild test robot @ 2015-11-09 12:20 UTC (permalink / raw)
  Cc: akash.goel, Ankitprasad Sharma, intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 19921 bytes --]

Hi Ankitprasad,

[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on v4.3 next-20151109]

url:    https://github.com/0day-ci/linux/commits/ankitprasad-r-sharma-intel-com/Support-for-mapping-an-object-page-by-page/20151109-191910
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
reproduce: make htmldocs

All warnings (new ones prefixed by >>):

   include/drm/drm_crtc.h:875: warning: No description found for parameter 'mutex'
   include/drm/drm_crtc.h:875: warning: No description found for parameter 'helper_private'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tile_idr'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'delayed_event'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'edid_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dpms_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'path_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tile_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'plane_type_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'rotation_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_x'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_y'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_w'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_h'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_x'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_y'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_w'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_h'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_fb_id'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_id'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_active'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_mode_id'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dvi_i_subconnector_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dvi_i_select_subconnector_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_subconnector_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_select_subconnector_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_mode_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_left_margin_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_right_margin_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_top_margin_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_bottom_margin_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_brightness_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_contrast_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_flicker_reduction_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_overscan_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_saturation_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_hue_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'scaling_mode_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'aspect_ratio_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dirty_info_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'suggested_x_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'suggested_y_property'
   include/drm/drm_crtc.h:1160: warning: No description found for parameter 'allow_fb_modifiers'
   include/drm/drm_fb_helper.h:148: warning: No description found for parameter 'connector_info'
   include/drm/drm_dp_helper.h:709: warning: No description found for parameter 'i2c_nack_count'
   include/drm/drm_dp_helper.h:709: warning: No description found for parameter 'i2c_defer_count'
   drivers/gpu/drm/drm_dp_mst_topology.c:2211: warning: No description found for parameter 'connector'
   include/drm/drm_dp_mst_helper.h:97: warning: No description found for parameter 'cached_edid'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'max_dpcd_transaction_bytes'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'sink_count'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'total_slots'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'avail_slots'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'total_pbn'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'qlock'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_msg_downq'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_msg_upq'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_down_in_progress'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_up_in_progress'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'payload_lock'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'proposed_vcpis'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'payloads'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'payload_mask'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'vcpi_mask'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_waitq'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'work'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_work'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'destroy_connector_list'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'destroy_connector_lock'
   include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'destroy_connector_work'
   drivers/gpu/drm/drm_dp_mst_topology.c:2211: warning: No description found for parameter 'connector'
   drivers/gpu/drm/drm_irq.c:173: warning: No description found for parameter 'flags'
   include/drm/drmP.h:164: warning: No description found for parameter 'fmt'
   include/drm/drmP.h:180: warning: No description found for parameter 'fmt'
   include/drm/drmP.h:198: warning: No description found for parameter 'fmt'
   include/drm/drmP.h:238: warning: No description found for parameter 'dev'
   include/drm/drmP.h:238: warning: No description found for parameter 'data'
   include/drm/drmP.h:238: warning: No description found for parameter 'file_priv'
   include/drm/drmP.h:271: warning: No description found for parameter 'ioctl'
   include/drm/drmP.h:271: warning: No description found for parameter '_func'
   include/drm/drmP.h:271: warning: No description found for parameter '_flags'
   include/drm/drmP.h:344: warning: cannot understand function prototype: 'struct drm_lock_data '
   include/drm/drmP.h:397: warning: cannot understand function prototype: 'struct drm_driver '
   include/drm/drmP.h:648: warning: cannot understand function prototype: 'struct drm_info_list '
   include/drm/drmP.h:658: warning: cannot understand function prototype: 'struct drm_info_node '
   include/drm/drmP.h:668: warning: cannot understand function prototype: 'struct drm_minor '
   include/drm/drmP.h:716: warning: cannot understand function prototype: 'struct drm_device '
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
   drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
   drivers/gpu/drm/i915/i915_gem.c:421: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:421: warning: No description found for parameter 'data'
   drivers/gpu/drm/i915/i915_gem.c:421: warning: No description found for parameter 'file'
   drivers/gpu/drm/i915/i915_gem.c:686: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:686: warning: No description found for parameter 'data'
   drivers/gpu/drm/i915/i915_gem.c:686: warning: No description found for parameter 'file'
>> drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'i915'
   drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'obj'
   drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'args'
   drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'file'
   drivers/gpu/drm/i915/i915_gem.c:1060: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:1060: warning: No description found for parameter 'data'
   drivers/gpu/drm/i915/i915_gem.c:1060: warning: No description found for parameter 'file'
   drivers/gpu/drm/i915/i915_gem.c:1225: warning: No description found for parameter 'rps'
   drivers/gpu/drm/i915/i915_gem.c:1431: warning: No description found for parameter 'req'
   drivers/gpu/drm/i915/i915_gem.c:1466: warning: No description found for parameter 'obj'
   drivers/gpu/drm/i915/i915_gem.c:1466: warning: No description found for parameter 'readonly'
   drivers/gpu/drm/i915/i915_gem.c:1589: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:1589: warning: No description found for parameter 'data'
   drivers/gpu/drm/i915/i915_gem.c:1589: warning: No description found for parameter 'file'
   drivers/gpu/drm/i915/i915_gem.c:1652: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:1652: warning: No description found for parameter 'data'
   drivers/gpu/drm/i915/i915_gem.c:1652: warning: No description found for parameter 'file'
   drivers/gpu/drm/i915/i915_gem.c:1697: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:1697: warning: No description found for parameter 'data'
   drivers/gpu/drm/i915/i915_gem.c:1697: warning: No description found for parameter 'file'
   drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'size'
   drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'tiling_mode'
   drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'fenced'
   drivers/gpu/drm/i915/i915_gem.c:1985: warning: Excess function parameter 'obj' description in 'i915_gem_get_gtt_alignment'
   drivers/gpu/drm/i915/i915_gem.c:2848: warning: No description found for parameter 'ring'
   drivers/gpu/drm/i915/i915_gem.c:2977: warning: No description found for parameter 'obj'
   drivers/gpu/drm/i915/i915_gem.c:3027: warning: No description found for parameter 'dev'
   drivers/gpu/drm/i915/i915_gem.c:3027: warning: No description found for parameter 'data'
   drivers/gpu/drm/i915/i915_gem.c:3027: warning: No description found for parameter 'file'
   drivers/gpu/drm/i915/i915_gem.c:3027: warning: Excess function parameter 'DRM_IOCTL_ARGS' description in 'i915_gem_wait_ioctl'
   drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'obj'
   drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'vm'
   drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'ggtt_view'
   drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'alignment'
   drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'flags'
   drivers/gpu/drm/i915/i915_gem.c:3631: warning: No description found for parameter 'obj'
   drivers/gpu/drm/i915/i915_gem.c:3631: warning: No description found for parameter 'write'
   drivers/gpu/drm/i915/i915_gem.c:3706: warning: No description found for parameter 'obj'
   drivers/gpu/drm/i915/i915_gem.c:3706: warning: No description found for parameter 'cache_level'
   drivers/gpu/drm/i915/i915_gem.c:3980: warning: No description found for parameter 'obj'
   drivers/gpu/drm/i915/i915_gem.c:3980: warning: No description found for parameter 'write'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: No description found for parameter 'params'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dev' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'file' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ring' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ctx' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'batch_obj' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'exec_start' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dispatch_flags' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: No description found for parameter 'params'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dev' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'file' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ring' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ctx' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'batch_obj' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'exec_start' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dispatch_flags' description in 'intel_execlists_submission'
   drivers/gpu/drm/i915/intel_guc_loader.c:1: warning: no structured comments found
       Was looking for 'GuC-specific firmware loader'.
   drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: No description found for parameter 'rq'
   drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ctx' description in 'i915_guc_submit'
   drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ring' description in 'i915_guc_submit'
   drivers/gpu/drm/i915/i915_guc_submission.c:741: warning: No description found for parameter 'ctx'
   drivers/gpu/drm/i915/i915_guc_submission.c:1: warning: no structured comments found
       Was looking for 'GuC-based command submissison'.
   drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: No description found for parameter 'rq'
   drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ctx' description in 'i915_guc_submit'
   drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ring' description in 'i915_guc_submit'
   drivers/gpu/drm/i915/i915_guc_submission.c:741: warning: No description found for parameter 'ctx'
   Warning: didn't use docs for i915_hotplug_interrupt_update
   Warning: didn't use docs for ilk_update_display_irq
   Warning: didn't use docs for ilk_update_gt_irq
   Warning: didn't use docs for snb_update_pm_irq
   Warning: didn't use docs for bdw_update_port_irq
   Warning: didn't use docs for ibx_display_interrupt_update
   Warning: didn't use docs for i915_enable_asle_pipestat
   Warning: didn't use docs for ivybridge_parity_work
   Warning: didn't use docs for i915_reset_and_wakeup
   Warning: didn't use docs for i915_handle_error
   Warning: didn't use docs for intel_irq_install
   Warning: didn't use docs for intel_irq_uninstall

vim +/i915 +767 drivers/gpu/drm/i915/i915_gem.c

4f0c7cfb Ben Widawsky       2012-04-16  751  	vaddr = (void __force*)vaddr_atomic + page_offset;
4f0c7cfb Ben Widawsky       2012-04-16  752  	unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 Keith Packard      2008-10-30  753  						      user_data, length);
3e4d3af5 Peter Zijlstra     2010-10-26  754  	io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d Chris Wilson       2010-10-14  755  	return unwritten;
0839ccb8 Keith Packard      2008-10-30  756  }
0839ccb8 Keith Packard      2008-10-30  757  
3de09aa3 Eric Anholt        2009-03-09  758  /**
3de09aa3 Eric Anholt        2009-03-09  759   * This is the fast pwrite path, where we copy the data directly from the
3de09aa3 Eric Anholt        2009-03-09  760   * user into the GTT, uncached.
3de09aa3 Eric Anholt        2009-03-09  761   */
673a394b Eric Anholt        2008-07-30  762  static int
25ec998a Ankitprasad Sharma 2015-11-09  763  i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
05394f39 Chris Wilson       2010-11-08  764  			 struct drm_i915_gem_object *obj,
673a394b Eric Anholt        2008-07-30  765  			 struct drm_i915_gem_pwrite *args,
05394f39 Chris Wilson       2010-11-08  766  			 struct drm_file *file)
673a394b Eric Anholt        2008-07-30 @767  {
25ec998a Ankitprasad Sharma 2015-11-09  768  	struct drm_mm_node node;
25ec998a Ankitprasad Sharma 2015-11-09  769  	uint64_t remain, offset;
673a394b Eric Anholt        2008-07-30  770  	char __user *user_data;
25ec998a Ankitprasad Sharma 2015-11-09  771  	int ret;
935aaa69 Daniel Vetter      2012-03-25  772  
1ec9e26d Daniel Vetter      2014-02-14  773  	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
25ec998a Ankitprasad Sharma 2015-11-09  774  	if (ret) {
25ec998a Ankitprasad Sharma 2015-11-09  775  		memset(&node, 0, sizeof(node));

:::::: The code at line 767 was first introduced by commit
:::::: 673a394b1e3b69be886ff24abfd6df97c52e8d08 drm: Add GEM ("graphics execution manager") to i915 driver.

:::::: TO: Eric Anholt <eric@anholt.net>
:::::: CC: Dave Airlie <airlied@linux.ie>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 6062 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
  2015-11-09 12:20   ` kbuild test robot
@ 2015-11-10  7:55   ` Mika Kuoppala
  2015-11-10  8:05     ` Ankitprasad Sharma
  2015-11-10  8:44     ` Chris Wilson
  1 sibling, 2 replies; 26+ messages in thread
From: Mika Kuoppala @ 2015-11-10  7:55 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel

ankitprasad.r.sharma@intel.com writes:

> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
>
> v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
>
> v3: Combined loops based on local patch by Chris (Chris)
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
>  1 file changed, 53 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f1e3fde..9d2e6e3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
>   * user into the GTT, uncached.
>   */
>  static int
> -i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
>  			 struct drm_i915_gem_object *obj,
>  			 struct drm_i915_gem_pwrite *args,
>  			 struct drm_file *file)
>  {
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	ssize_t remain;
> -	loff_t offset, page_base;
> +	struct drm_mm_node node;
> +	uint64_t remain, offset;
>  	char __user *user_data;
> -	int page_offset, page_length, ret;
> +	int ret;
>  
>  	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> -	if (ret)
> -		goto out;
> +	if (ret) {
> +		memset(&node, 0, sizeof(node));
> +		ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> +							  &node, 4096, 0,
> +							  I915_CACHE_NONE, 0,
> +							  i915->gtt.mappable_end,
> +							  DRM_MM_SEARCH_DEFAULT,
> +							  DRM_MM_CREATE_DEFAULT);
> +		if (ret)
> +			goto out;
> +
> +		i915_gem_object_pin_pages(obj);
> +	} else {
> +		node.start = i915_gem_obj_ggtt_offset(obj);
> +		node.allocated = false;
> +	}
>  
>  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
>  	if (ret)
> @@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  	if (ret)
>  		goto out_unpin;
>  
> -	user_data = to_user_ptr(args->data_ptr);
> -	remain = args->size;
> -
> -	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> -
>  	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> +	obj->dirty = true;
>  
> -	while (remain > 0) {
> +	user_data = to_user_ptr(args->data_ptr);
> +	offset = args->offset;
> +	remain = args->size;
> +	while (remain) {
>  		/* Operation in this page
>  		 *
>  		 * page_base = page offset within aperture
>  		 * page_offset = offset within page
>  		 * page_length = bytes to copy for this page
>  		 */
> -		page_base = offset & PAGE_MASK;
> -		page_offset = offset_in_page(offset);
> -		page_length = remain;
> -		if ((page_offset + remain) > PAGE_SIZE)
> -			page_length = PAGE_SIZE - page_offset;
> -
> +		u32 page_base = node.start;

You truncate here as node.start is 64bit offset into the vm area.

-Mika


> +		unsigned page_offset = offset_in_page(offset);
> +		unsigned page_length = PAGE_SIZE - page_offset;
> +		page_length = remain < page_length ? remain : page_length;
> +		if (node.allocated) {
> +			wmb();
> +			i915->gtt.base.insert_page(&i915->gtt.base,
> +						   i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
> +						   node.start,
> +						   I915_CACHE_NONE,
> +						   0);
> +			wmb();
> +		} else {
> +			page_base += offset & PAGE_MASK;
> +		}
>  		/* If we get a fault while copying data, then (presumably) our
>  		 * source page isn't available.  Return the error and we'll
>  		 * retry in the slow path.
>  		 */
> -		if (fast_user_write(dev_priv->gtt.mappable, page_base,
> +		if (fast_user_write(i915->gtt.mappable, page_base,
>  				    page_offset, user_data, page_length)) {
>  			ret = -EFAULT;
>  			goto out_flush;
> @@ -821,7 +842,17 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  out_flush:
>  	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
>  out_unpin:
> -	i915_gem_object_ggtt_unpin(obj);
> +	if (node.allocated) {
> +		wmb();
> +		i915->gtt.base.clear_range(&i915->gtt.base,
> +				node.start, node.size,
> +				true);
> +		drm_mm_remove_node(&node);
> +		i915_gem_object_unpin_pages(obj);
> +	}
> +	else {
> +		i915_gem_object_ggtt_unpin(obj);
> +	}
>  out:
>  	return ret;
>  }
> @@ -1086,7 +1117,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
>  	    cpu_write_needs_clflush(obj)) {
> -		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
> +		ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
>  		/* Note that the gtt paths might fail with non-page-backed user
>  		 * pointers (e.g. gtt mappings when moving data between
>  		 * textures). Fallback to the shmem path in that case. */
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-10  7:55   ` Mika Kuoppala
@ 2015-11-10  8:05     ` Ankitprasad Sharma
  2015-11-10  8:44     ` Chris Wilson
  1 sibling, 0 replies; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-11-10  8:05 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: intel-gfx, akash.goel

On Tue, 2015-11-10 at 09:55 +0200, Mika Kuoppala wrote:
> ankitprasad.r.sharma@intel.com writes:
> 
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> >
> > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> >
> > v3: Combined loops based on local patch by Chris (Chris)
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
> >  1 file changed, 53 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index f1e3fde..9d2e6e3 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
> >   * user into the GTT, uncached.
> >   */
> >  static int
> > -i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> > +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >  			 struct drm_i915_gem_object *obj,
> >  			 struct drm_i915_gem_pwrite *args,
> >  			 struct drm_file *file)
> >  {
> > -	struct drm_i915_private *dev_priv = dev->dev_private;
> > -	ssize_t remain;
> > -	loff_t offset, page_base;
> > +	struct drm_mm_node node;
> > +	uint64_t remain, offset;
> >  	char __user *user_data;
> > -	int page_offset, page_length, ret;
> > +	int ret;
> >  
> >  	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> > -	if (ret)
> > -		goto out;
> > +	if (ret) {
> > +		memset(&node, 0, sizeof(node));
> > +		ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> > +							  &node, 4096, 0,
> > +							  I915_CACHE_NONE, 0,
> > +							  i915->gtt.mappable_end,
> > +							  DRM_MM_SEARCH_DEFAULT,
> > +							  DRM_MM_CREATE_DEFAULT);
> > +		if (ret)
> > +			goto out;
> > +
> > +		i915_gem_object_pin_pages(obj);
> > +	} else {
> > +		node.start = i915_gem_obj_ggtt_offset(obj);
> > +		node.allocated = false;
> > +	}
> >  
> >  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> >  	if (ret)
> > @@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> >  	if (ret)
> >  		goto out_unpin;
> >  
> > -	user_data = to_user_ptr(args->data_ptr);
> > -	remain = args->size;
> > -
> > -	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> > -
> >  	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> > +	obj->dirty = true;
> >  
> > -	while (remain > 0) {
> > +	user_data = to_user_ptr(args->data_ptr);
> > +	offset = args->offset;
> > +	remain = args->size;
> > +	while (remain) {
> >  		/* Operation in this page
> >  		 *
> >  		 * page_base = page offset within aperture
> >  		 * page_offset = offset within page
> >  		 * page_length = bytes to copy for this page
> >  		 */
> > -		page_base = offset & PAGE_MASK;
> > -		page_offset = offset_in_page(offset);
> > -		page_length = remain;
> > -		if ((page_offset + remain) > PAGE_SIZE)
> > -			page_length = PAGE_SIZE - page_offset;
> > -
> > +		u32 page_base = node.start;
> 
> You truncate here as node.start is 64bit offset into the vm area.
> 
True, but it will work here always as node.start is going to be less
than 256 MB for both the cases (whole object pin or page by page pwrite)

Thanks,
Ankit


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-10  7:55   ` Mika Kuoppala
  2015-11-10  8:05     ` Ankitprasad Sharma
@ 2015-11-10  8:44     ` Chris Wilson
  2015-11-10 11:51       ` Chris Wilson
  1 sibling, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-10  8:44 UTC (permalink / raw)
  To: Mika Kuoppala; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel

On Tue, Nov 10, 2015 at 09:55:18AM +0200, Mika Kuoppala wrote:
> ankitprasad.r.sharma@intel.com writes:
> 
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> >
> > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> >
> > v3: Combined loops based on local patch by Chris (Chris)
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
> >  1 file changed, 53 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index f1e3fde..9d2e6e3 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
> >   * user into the GTT, uncached.
> >   */
> >  static int
> > -i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> > +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> >  			 struct drm_i915_gem_object *obj,
> >  			 struct drm_i915_gem_pwrite *args,
> >  			 struct drm_file *file)
> >  {
> > -	struct drm_i915_private *dev_priv = dev->dev_private;
> > -	ssize_t remain;
> > -	loff_t offset, page_base;
> > +	struct drm_mm_node node;
> > +	uint64_t remain, offset;
> >  	char __user *user_data;
> > -	int page_offset, page_length, ret;
> > +	int ret;
> >  
> >  	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> > -	if (ret)
> > -		goto out;
> > +	if (ret) {
> > +		memset(&node, 0, sizeof(node));
> > +		ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> > +							  &node, 4096, 0,
> > +							  I915_CACHE_NONE, 0,
> > +							  i915->gtt.mappable_end,
> > +							  DRM_MM_SEARCH_DEFAULT,
> > +							  DRM_MM_CREATE_DEFAULT);
> > +		if (ret)
> > +			goto out;
> > +
> > +		i915_gem_object_pin_pages(obj);
> > +	} else {
> > +		node.start = i915_gem_obj_ggtt_offset(obj);
> > +		node.allocated = false;
> > +	}
> >  
> >  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
> >  	if (ret)
> > @@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> >  	if (ret)
> >  		goto out_unpin;
> >  
> > -	user_data = to_user_ptr(args->data_ptr);
> > -	remain = args->size;
> > -
> > -	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> > -
> >  	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> > +	obj->dirty = true;
> >  
> > -	while (remain > 0) {
> > +	user_data = to_user_ptr(args->data_ptr);
> > +	offset = args->offset;
> > +	remain = args->size;
> > +	while (remain) {
> >  		/* Operation in this page
> >  		 *
> >  		 * page_base = page offset within aperture
> >  		 * page_offset = offset within page
> >  		 * page_length = bytes to copy for this page
> >  		 */
> > -		page_base = offset & PAGE_MASK;
> > -		page_offset = offset_in_page(offset);
> > -		page_length = remain;
> > -		if ((page_offset + remain) > PAGE_SIZE)
> > -			page_length = PAGE_SIZE - page_offset;
> > -
> > +		u32 page_base = node.start;
> 
> You truncate here as node.start is 64bit offset into the vm area.

It's a bit of cheat since it can't be 64bit, but the code is equally
inconsistent. The io-mapping can only handle unsigned long.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-10  8:44     ` Chris Wilson
@ 2015-11-10 11:51       ` Chris Wilson
  0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2015-11-10 11:51 UTC (permalink / raw)
  To: Mika Kuoppala, ankitprasad.r.sharma, intel-gfx, akash.goel

On Tue, Nov 10, 2015 at 08:44:59AM +0000, Chris Wilson wrote:
> On Tue, Nov 10, 2015 at 09:55:18AM +0200, Mika Kuoppala wrote:
> > > +	while (remain) {
> > >  		/* Operation in this page
> > >  		 *
> > >  		 * page_base = page offset within aperture
> > >  		 * page_offset = offset within page
> > >  		 * page_length = bytes to copy for this page
> > >  		 */
> > > -		page_base = offset & PAGE_MASK;
> > > -		page_offset = offset_in_page(offset);
> > > -		page_length = remain;
> > > -		if ((page_offset + remain) > PAGE_SIZE)
> > > -			page_length = PAGE_SIZE - page_offset;
> > > -
> > > +		u32 page_base = node.start;
> > 
> > You truncate here as node.start is 64bit offset into the vm area.
> 
> It's a bit of cheat since it can't be 64bit, but the code is equally
> inconsistent. The io-mapping can only handle unsigned long.

And for reference, it is preserving current behaviour.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/3] drm/i915: Add support for mapping an object page by page
  2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
@ 2015-11-13 14:57   ` Tvrtko Ursulin
  0 siblings, 0 replies; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-13 14:57 UTC (permalink / raw)
  To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel


Hi,

On 09/11/15 10:56, ankitprasad.r.sharma@intel.com wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Introduced a new vm specfic callback insert_page() to program a single pte in
> ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
> space. This can be iterated over to access the whole object by using space as
> meagre as page size.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>   drivers/char/agp/intel-gtt.c        |  9 +++++++
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 49 +++++++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_gtt.h |  5 ++++
>   include/drm/intel-gtt.h             |  3 +++
>   4 files changed, 66 insertions(+)
>
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 1341a94..7c68576 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags)
>   	return false;
>   }
>
> +void intel_gtt_insert_page(dma_addr_t addr,
> +			   unsigned int pg,
> +			   unsigned int flags)
> +{
> +	intel_private.driver->write_entry(addr, pg, flags);
> +	wmb();
> +}
> +EXPORT_SYMBOL(intel_gtt_insert_page);
> +
>   void intel_gtt_insert_sg_entries(struct sg_table *st,
>   				 unsigned int pg_start,
>   				 unsigned int flags)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 016739e..9fd1f857 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2345,6 +2345,23 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>   #endif
>   }
>
> +static void gen8_ggtt_insert_page(struct i915_address_space *vm,
> +				  dma_addr_t addr,
> +				  uint64_t offset,
> +				  enum i915_cache_level level,
> +				  u32 unused)
> +{

I am not sure about the interface consistency. insert_entries works on a 
sg and this would now need a DMA address which is encapsulated in the 
former.

How about extending insert_pages to take a page_offset and size in 
pages? The latter could default to obj size if zero.

> +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
> +	gen8_pte_t __iomem *pte =
> +		(gen8_pte_t __iomem *)dev_priv->gtt.gsm +
> +		(offset >> PAGE_SHIFT);
> +
> +	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
> +	wmb();
> +
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);

gen8_ggtt_insert_entries doesn't do the wmb() but does a posting read on 
GFX_FLSH_CNTL_GEN6. Why is that difference?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-24 12:22         ` Daniel Vetter
@ 2015-12-14  8:19           ` Ankitprasad Sharma
  0 siblings, 0 replies; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-12-14  8:19 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, akash.goel

On Tue, 2015-11-24 at 13:22 +0100, Daniel Vetter wrote:
> On Fri, Nov 20, 2015 at 10:06:16AM +0000, Chris Wilson wrote:
> > On Fri, Nov 20, 2015 at 03:07:58PM +0530, Ankitprasad Sharma wrote:
> > > On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> > > > On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > > > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > > > > 
> > > > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > > > > we try a nonblocking pin for the whole object (since that is fastest if
> > > > > reused), then failing that we try to grab one page in the mappable
> > > > > aperture. It also allows us to handle objects larger than the mappable
> > > > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > > > > to a measely 8MiB or something like that).
> > > > 
> > > > We already have a fallback to the shmem pwrite. Why do we need this?
> > > This is mainly for the non-shmem backed objects, as we do not have
> > > fallback path for that. Agree for the shmem backed objects, as we
> > > already have a fallback.
> > > 
> > > Would like to request Chris, if he can clarify further.
> > 
> > Exactly that, with stolen we cannot use the shmem path so there exists
> > no fallback. In order to pwrite to stolen, the GTT path must be fully
> > capable.
> 
> Ok, in that case this should probably be part of the stolen obj series,
> just for clarification.
> -Daniel
Daniel,
I have moved this patch to the stolen memory series with the latest
version. Can we please move ahead for the review and merge of the first
2 patches?

Thanks,
Ankit

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-20 10:06       ` Chris Wilson
@ 2015-11-24 12:22         ` Daniel Vetter
  2015-12-14  8:19           ` Ankitprasad Sharma
  0 siblings, 1 reply; 26+ messages in thread
From: Daniel Vetter @ 2015-11-24 12:22 UTC (permalink / raw)
  To: Chris Wilson, Ankitprasad Sharma, Daniel Vetter, intel-gfx, akash.goel

On Fri, Nov 20, 2015 at 10:06:16AM +0000, Chris Wilson wrote:
> On Fri, Nov 20, 2015 at 03:07:58PM +0530, Ankitprasad Sharma wrote:
> > On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> > > On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > > > 
> > > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > > > we try a nonblocking pin for the whole object (since that is fastest if
> > > > reused), then failing that we try to grab one page in the mappable
> > > > aperture. It also allows us to handle objects larger than the mappable
> > > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > > > to a measely 8MiB or something like that).
> > > 
> > > We already have a fallback to the shmem pwrite. Why do we need this?
> > This is mainly for the non-shmem backed objects, as we do not have
> > fallback path for that. Agree for the shmem backed objects, as we
> > already have a fallback.
> > 
> > Would like to request Chris, if he can clarify further.
> 
> Exactly that, with stolen we cannot use the shmem path so there exists
> no fallback. In order to pwrite to stolen, the GTT path must be fully
> capable.

Ok, in that case this should probably be part of the stolen obj series,
just for clarification.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-20  9:37     ` Ankitprasad Sharma
@ 2015-11-20 10:06       ` Chris Wilson
  2015-11-24 12:22         ` Daniel Vetter
  0 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-20 10:06 UTC (permalink / raw)
  To: Ankitprasad Sharma; +Cc: intel-gfx, akash.goel

On Fri, Nov 20, 2015 at 03:07:58PM +0530, Ankitprasad Sharma wrote:
> On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> > On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > > 
> > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > > we try a nonblocking pin for the whole object (since that is fastest if
> > > reused), then failing that we try to grab one page in the mappable
> > > aperture. It also allows us to handle objects larger than the mappable
> > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > > to a measely 8MiB or something like that).
> > 
> > We already have a fallback to the shmem pwrite. Why do we need this?
> This is mainly for the non-shmem backed objects, as we do not have
> fallback path for that. Agree for the shmem backed objects, as we
> already have a fallback.
> 
> Would like to request Chris, if he can clarify further.

Exactly that, with stolen we cannot use the shmem path so there exists
no fallback. In order to pwrite to stolen, the GTT path must be fully
capable.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-18  9:59   ` Daniel Vetter
@ 2015-11-20  9:37     ` Ankitprasad Sharma
  2015-11-20 10:06       ` Chris Wilson
  0 siblings, 1 reply; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-11-20  9:37 UTC (permalink / raw)
  To: Daniel Vetter, Chris Wilson; +Cc: intel-gfx, akash.goel

On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > 
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> 
> We already have a fallback to the shmem pwrite. Why do we need this?
This is mainly for the non-shmem backed objects, as we do not have
fallback path for that. Agree for the shmem backed objects, as we
already have a fallback.

Would like to request Chris, if he can clarify further.

Thanks, 
Ankit

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
  2015-11-05 12:34   ` Chris Wilson
  2015-11-05 12:37   ` Tvrtko Ursulin
@ 2015-11-18  9:59   ` Daniel Vetter
  2015-11-20  9:37     ` Ankitprasad Sharma
  2 siblings, 1 reply; 26+ messages in thread
From: Daniel Vetter @ 2015-11-18  9:59 UTC (permalink / raw)
  To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel

On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> 
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).

We already have a fallback to the shmem pwrite. Why do we need this?
-Daniel

> 
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
>  1 file changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index bf5ef7a..9132240 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  			 struct drm_file *file)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_mm_node node;
>  	ssize_t remain;
>  	loff_t offset, page_base;
>  	char __user *user_data;
> -	int page_offset, page_length, ret;
> +	int page_offset, page_length, ret, i;
> +	bool pinned = true;
>  
>  	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> -	if (ret)
> -		goto out;
> +	if (ret) {
> +		pinned = false;
> +		memset(&node, 0, sizeof(node));
> +		ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
> +							  &node, 4096, 0,
> +							  I915_CACHE_NONE, 0,
> +							  dev_priv->gtt.mappable_end,
> +							  DRM_MM_SEARCH_DEFAULT,
> +							  DRM_MM_CREATE_DEFAULT);
> +		if (ret)
> +			goto out;
> +	}
>  
>  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
>  	if (ret)
> @@ -786,42 +798,76 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  	user_data = to_user_ptr(args->data_ptr);
>  	remain = args->size;
>  
> -	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> -
>  	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
>  
> -	while (remain > 0) {
> -		/* Operation in this page
> +	if (likely(pinned)) {
> +		offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> +		/* Operation in the page
>  		 *
>  		 * page_base = page offset within aperture
>  		 * page_offset = offset within page
> -		 * page_length = bytes to copy for this page
> +		 * page_length = bytes to copy for the page
>  		 */
>  		page_base = offset & PAGE_MASK;
>  		page_offset = offset_in_page(offset);
> -		page_length = remain;
> -		if ((page_offset + remain) > PAGE_SIZE)
> -			page_length = PAGE_SIZE - page_offset;
> +		while (remain > 0) {
> +			page_length = remain;
> +			if ((page_offset + remain) > PAGE_SIZE)
> +				page_length = PAGE_SIZE - page_offset;
> +
> +			/* If we get a fault while copying data, then (presumably) our
> +			 * source page isn't available.  Return the error and we'll
> +			 * retry in the slow path.
> +			 */
> +			if (fast_user_write(dev_priv->gtt.mappable, page_base,
> +						page_offset, user_data, page_length)) {
> +				ret = -EFAULT;
> +				goto out_flush;
> +			}
>  
> -		/* If we get a fault while copying data, then (presumably) our
> -		 * source page isn't available.  Return the error and we'll
> -		 * retry in the slow path.
> -		 */
> -		if (fast_user_write(dev_priv->gtt.mappable, page_base,
> -				    page_offset, user_data, page_length)) {
> -			ret = -EFAULT;
> -			goto out_flush;
> +			remain -= page_length;
> +			user_data += page_length;
> +			page_offset = 0;
>  		}
> +	} else {
> +		i = args->offset / PAGE_SIZE;
> +		page_offset = offset_in_page(args->offset);
> +		while (remain > 0) {
> +			page_length = remain;
> +			if ((page_offset + remain) > PAGE_SIZE)
> +				page_length = PAGE_SIZE - page_offset;
> +
> +			wmb();
> +			dev_priv->gtt.base.insert_page(&dev_priv->gtt.base,
> +					i915_gem_object_get_dma_address(obj, i),
> +					node.start,
> +					I915_CACHE_NONE,
> +					0);
> +			wmb();
> +
> +			if (fast_user_write(dev_priv->gtt.mappable, node.start,
> +						page_offset, user_data, page_length)) {
> +				ret = -EFAULT;
> +				goto out_flush;
> +			}
>  
> -		remain -= page_length;
> -		user_data += page_length;
> -		offset += page_length;
> +			remain -= page_length;
> +			user_data += page_length;
> +			page_offset = 0;
> +			i++;
> +		}
> +		wmb();
> +		dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> +				node.start, node.size,
> +				true);
> +		drm_mm_remove_node(&node);
>  	}
>  
>  out_flush:
>  	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
>  out_unpin:
> -	i915_gem_object_ggtt_unpin(obj);
> +	if (pinned)
> +		i915_gem_object_ggtt_unpin(obj);
>  out:
>  	return ret;
>  }
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 14:38           ` Tvrtko Ursulin
@ 2015-11-07 10:13             ` Chris Wilson
  0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2015-11-07 10:13 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel

On Thu, Nov 05, 2015 at 02:38:30PM +0000, Tvrtko Ursulin wrote:
> 
> On 05/11/15 12:58, Chris Wilson wrote:
> >On Thu, Nov 05, 2015 at 12:53:20PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 05/11/15 12:42, Chris Wilson wrote:
> >>>On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
> >>>>
> >>>>On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> >>>>>From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>>>>
> >>>>>In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> >>>>>we try a nonblocking pin for the whole object (since that is fastest if
> >>>>>reused), then failing that we try to grab one page in the mappable
> >>>>>aperture. It also allows us to handle objects larger than the mappable
> >>>>>aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> >>>>>to a measely 8MiB or something like that).
> >>>>
> >>>>Aperture in aperture, reminds me of those "Yo dawg I've heard you
> >>>>like X so I've put X in your X so you can Y while you Y" jokes. :D
> >>>>
> >>>>Would using the partial view code be interesting for this? Might be
> >>>>faster due to larger chunks possible, or slower due more expensive
> >>>>set up time, I don't know.
> >>>
> >>>It's the wrong abstraction.
> >>
> >>Looks the same to me, only difference is the size.
> >
> >There are many places that insert-page is used where we cannot do a
> >partial-pin.
> >
> >>Why not just to the page aperture then for simplicity? If there is
> >>any performance gain from trying the full VMA first then why there
> >>wouldn't be some to try with the partial VMA?
> >
> >obj->base.size >> PAGE_SHIFT x partial pages is not even funny.
> 
> Well I did not suggest that but larger chunks so I will repeat my question.
> 
> If going page by page is fine for performance then why have the two
> code paths at all? One which tries top pin the whole object first,
> and second which goes page by page if that fails. Why not just do it
> page by page and avoid having two copy loops etc?

If we already have the vma or can allocate it with impacting upon the
system, using it is best (since we expect to reuse it again). If we
cannot allocate it, our natural iterator size is 4096 bytes and is also
our best chance at allocating that in the aperture.

Partial vma are a high overhead and more importantly a massive impedance
mismatch.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-07  8:02 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
@ 2015-11-07 10:07   ` Chris Wilson
  0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2015-11-07 10:07 UTC (permalink / raw)
  To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel

On Sat, Nov 07, 2015 at 01:32:15PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> 
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
> 
> v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)

Please just use my code as you haven't combined the two paths as well
yet.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-07  8:02 [PATCH v2 0/3] Support for mapping an object page by page ankitprasad.r.sharma
@ 2015-11-07  8:02 ` ankitprasad.r.sharma
  2015-11-07 10:07   ` Chris Wilson
  0 siblings, 1 reply; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-07  8:02 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel

From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>

In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).

v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)

Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 68 ++++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf5ef7a..2556936 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 			 struct drm_file *file)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_mm_node node;
 	ssize_t remain;
 	loff_t offset, page_base;
 	char __user *user_data;
-	int page_offset, page_length, ret;
+	int page_offset, page_length, ret, i = 0;
+	bool pinned = true;
 
 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
-	if (ret)
-		goto out;
+	if (ret) {
+		pinned = false;
+		memset(&node, 0, sizeof(node));
+		ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
+							  &node, 4096, 0,
+							  I915_CACHE_NONE, 0,
+							  dev_priv->gtt.mappable_end,
+							  DRM_MM_SEARCH_DEFAULT,
+							  DRM_MM_CREATE_DEFAULT);
+		if (ret)
+			goto out;
+	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
@@ -783,26 +795,41 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 	if (ret)
 		goto out_unpin;
 
+	i915_gem_object_pin_pages(obj);
+
 	user_data = to_user_ptr(args->data_ptr);
 	remain = args->size;
 
-	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
 	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
 
-	while (remain > 0) {
-		/* Operation in this page
-		 *
-		 * page_base = page offset within aperture
-		 * page_offset = offset within page
-		 * page_length = bytes to copy for this page
-		 */
+	if (pinned) {
+		offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
+		/* page_base = page offset within aperture */
 		page_base = offset & PAGE_MASK;
-		page_offset = offset_in_page(offset);
+	} else {
+		offset = args->offset;
+		page_base = node.start;
+		i = args->offset / PAGE_SIZE;
+	}
+
+	/* page_offset = offset within page */
+	page_offset = offset_in_page(offset);
+	while (remain > 0) {
+		/* page_length = bytes to copy for the page */
 		page_length = remain;
 		if ((page_offset + remain) > PAGE_SIZE)
 			page_length = PAGE_SIZE - page_offset;
 
+		if (!pinned) {
+			wmb();
+			dev_priv->gtt.base.insert_page(&dev_priv->gtt.base,
+					i915_gem_object_get_dma_address(obj, i),
+					node.start,
+					I915_CACHE_NONE,
+					0);
+			wmb();
+			i++;
+		}
 		/* If we get a fault while copying data, then (presumably) our
 		 * source page isn't available.  Return the error and we'll
 		 * retry in the slow path.
@@ -815,13 +842,24 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 
 		remain -= page_length;
 		user_data += page_length;
-		offset += page_length;
+		page_offset = 0;
 	}
 
 out_flush:
+	if (!pinned) {
+		wmb();
+		dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				node.start, node.size,
+				true);
+	}
+
+	i915_gem_object_unpin_pages(obj);
 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 out_unpin:
-	i915_gem_object_ggtt_unpin(obj);
+	if (pinned)
+		i915_gem_object_ggtt_unpin(obj);
+	else
+		drm_mm_remove_node(&node);
 out:
 	return ret;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 12:34   ` Chris Wilson
@ 2015-11-06  6:15     ` Ankitprasad Sharma
  0 siblings, 0 replies; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-11-06  6:15 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, akash.goel

On Thu, 2015-11-05 at 12:34 +0000, Chris Wilson wrote:
> On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > 
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> > 
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
> >  1 file changed, 69 insertions(+), 23 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index bf5ef7a..9132240 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> >  			 struct drm_file *file)
> >  {
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> > +	struct drm_mm_node node;
> >  	ssize_t remain;
> >  	loff_t offset, page_base;
> >  	char __user *user_data;
> > -	int page_offset, page_length, ret;
> > +	int page_offset, page_length, ret, i;
> > +	bool pinned = true;
> >  
> >  	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> > -	if (ret)
> > -		goto out;
> > +	if (ret) {
> > +		pinned = false;
> > +		memset(&node, 0, sizeof(node));
> > +		ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
> > +							  &node, 4096, 0,
> > +							  I915_CACHE_NONE, 0,
> > +							  dev_priv->gtt.mappable_end,
> > +							  DRM_MM_SEARCH_DEFAULT,
> > +							  DRM_MM_CREATE_DEFAULT);
> > +		if (ret)
> > +			goto out;
> 
> Prefer to refer to my original patch as to why this wrong.
If you are concerned about pages for the object not getting allocated,
then soon after node insertion we call
i915_gem_object_set_to_gtt_domain() which takes care of page allocations
for the object.
If there is any other concern, please let me know.

Thanks,
Ankit

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 12:58         ` Chris Wilson
@ 2015-11-05 14:38           ` Tvrtko Ursulin
  2015-11-07 10:13             ` Chris Wilson
  0 siblings, 1 reply; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-05 14:38 UTC (permalink / raw)
  To: Chris Wilson, ankitprasad.r.sharma, intel-gfx, akash.goel


On 05/11/15 12:58, Chris Wilson wrote:
> On Thu, Nov 05, 2015 at 12:53:20PM +0000, Tvrtko Ursulin wrote:
>>
>> On 05/11/15 12:42, Chris Wilson wrote:
>>> On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
>>>>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>>>
>>>>> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
>>>>> we try a nonblocking pin for the whole object (since that is fastest if
>>>>> reused), then failing that we try to grab one page in the mappable
>>>>> aperture. It also allows us to handle objects larger than the mappable
>>>>> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
>>>>> to a measely 8MiB or something like that).
>>>>
>>>> Aperture in aperture, reminds me of those "Yo dawg I've heard you
>>>> like X so I've put X in your X so you can Y while you Y" jokes. :D
>>>>
>>>> Would using the partial view code be interesting for this? Might be
>>>> faster due to larger chunks possible, or slower due more expensive
>>>> set up time, I don't know.
>>>
>>> It's the wrong abstraction.
>>
>> Looks the same to me, only difference is the size.
>
> There are many places that insert-page is used where we cannot do a
> partial-pin.
>
>> Why not just to the page aperture then for simplicity? If there is
>> any performance gain from trying the full VMA first then why there
>> wouldn't be some to try with the partial VMA?
>
> obj->base.size >> PAGE_SHIFT x partial pages is not even funny.

Well I did not suggest that but larger chunks so I will repeat my question.

If going page by page is fine for performance then why have the two code 
paths at all? One which tries top pin the whole object first, and second 
which goes page by page if that fails. Why not just do it page by page 
and avoid having two copy loops etc?

On the other hand, if there is significant performance hit with the page 
by page path, then why not use partial views with 1Mb chunks or something?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 12:53       ` Tvrtko Ursulin
@ 2015-11-05 12:58         ` Chris Wilson
  2015-11-05 14:38           ` Tvrtko Ursulin
  0 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-05 12:58 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel

On Thu, Nov 05, 2015 at 12:53:20PM +0000, Tvrtko Ursulin wrote:
> 
> On 05/11/15 12:42, Chris Wilson wrote:
> >On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> >>>From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>>
> >>>In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> >>>we try a nonblocking pin for the whole object (since that is fastest if
> >>>reused), then failing that we try to grab one page in the mappable
> >>>aperture. It also allows us to handle objects larger than the mappable
> >>>aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> >>>to a measely 8MiB or something like that).
> >>
> >>Aperture in aperture, reminds me of those "Yo dawg I've heard you
> >>like X so I've put X in your X so you can Y while you Y" jokes. :D
> >>
> >>Would using the partial view code be interesting for this? Might be
> >>faster due to larger chunks possible, or slower due more expensive
> >>set up time, I don't know.
> >
> >It's the wrong abstraction.
> 
> Looks the same to me, only difference is the size.

There are many places that insert-page is used where we cannot do a
partial-pin.
 
> Why not just to the page aperture then for simplicity? If there is
> any performance gain from trying the full VMA first then why there
> wouldn't be some to try with the partial VMA?

obj->base.size >> PAGE_SHIFT x partial pages is not even funny.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 12:42     ` Chris Wilson
@ 2015-11-05 12:53       ` Tvrtko Ursulin
  2015-11-05 12:58         ` Chris Wilson
  0 siblings, 1 reply; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-05 12:53 UTC (permalink / raw)
  To: Chris Wilson, ankitprasad.r.sharma, intel-gfx, akash.goel


On 05/11/15 12:42, Chris Wilson wrote:
> On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
>>
>> On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
>>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>
>>> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
>>> we try a nonblocking pin for the whole object (since that is fastest if
>>> reused), then failing that we try to grab one page in the mappable
>>> aperture. It also allows us to handle objects larger than the mappable
>>> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
>>> to a measely 8MiB or something like that).
>>
>> Aperture in aperture, reminds me of those "Yo dawg I've heard you
>> like X so I've put X in your X so you can Y while you Y" jokes. :D
>>
>> Would using the partial view code be interesting for this? Might be
>> faster due to larger chunks possible, or slower due more expensive
>> set up time, I don't know.
>
> It's the wrong abstraction.

Looks the same to me, only difference is the size.

Why not just to the page aperture then for simplicity? If there is any 
performance gain from trying the full VMA first then why there wouldn't 
be some to try with the partial VMA?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 12:37   ` Tvrtko Ursulin
@ 2015-11-05 12:42     ` Chris Wilson
  2015-11-05 12:53       ` Tvrtko Ursulin
  0 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-05 12:42 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel

On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
> 
> On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> >From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> >In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> >we try a nonblocking pin for the whole object (since that is fastest if
> >reused), then failing that we try to grab one page in the mappable
> >aperture. It also allows us to handle objects larger than the mappable
> >aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> >to a measely 8MiB or something like that).
> 
> Aperture in aperture, reminds me of those "Yo dawg I've heard you
> like X so I've put X in your X so you can Y while you Y" jokes. :D
> 
> Would using the partial view code be interesting for this? Might be
> faster due to larger chunks possible, or slower due more expensive
> set up time, I don't know.

It's the wrong abstraction.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
  2015-11-05 12:34   ` Chris Wilson
@ 2015-11-05 12:37   ` Tvrtko Ursulin
  2015-11-05 12:42     ` Chris Wilson
  2015-11-18  9:59   ` Daniel Vetter
  2 siblings, 1 reply; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-05 12:37 UTC (permalink / raw)
  To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel


On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).

Aperture in aperture, reminds me of those "Yo dawg I've heard you like X 
so I've put X in your X so you can Y while you Y" jokes. :D

Would using the partial view code be interesting for this? Might be 
faster due to larger chunks possible, or slower due more expensive set 
up time, I don't know.

Interesting thing is it was added for virtualization use cases so there 
is overlap here.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
@ 2015-11-05 12:34   ` Chris Wilson
  2015-11-06  6:15     ` Ankitprasad Sharma
  2015-11-05 12:37   ` Tvrtko Ursulin
  2015-11-18  9:59   ` Daniel Vetter
  2 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-05 12:34 UTC (permalink / raw)
  To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel

On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> 
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
> 
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
>  1 file changed, 69 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index bf5ef7a..9132240 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
>  			 struct drm_file *file)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_mm_node node;
>  	ssize_t remain;
>  	loff_t offset, page_base;
>  	char __user *user_data;
> -	int page_offset, page_length, ret;
> +	int page_offset, page_length, ret, i;
> +	bool pinned = true;
>  
>  	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> -	if (ret)
> -		goto out;
> +	if (ret) {
> +		pinned = false;
> +		memset(&node, 0, sizeof(node));
> +		ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
> +							  &node, 4096, 0,
> +							  I915_CACHE_NONE, 0,
> +							  dev_priv->gtt.mappable_end,
> +							  DRM_MM_SEARCH_DEFAULT,
> +							  DRM_MM_CREATE_DEFAULT);
> +		if (ret)
> +			goto out;

Prefer to refer to my original patch as to why this wrong.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
  2015-11-05 11:45 [PATCH 0/3] Support for mapping an object page by page ankitprasad.r.sharma
@ 2015-11-05 11:45 ` ankitprasad.r.sharma
  2015-11-05 12:34   ` Chris Wilson
                     ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-05 11:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel

From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>

In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).

Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
 1 file changed, 69 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf5ef7a..9132240 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 			 struct drm_file *file)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_mm_node node;
 	ssize_t remain;
 	loff_t offset, page_base;
 	char __user *user_data;
-	int page_offset, page_length, ret;
+	int page_offset, page_length, ret, i;
+	bool pinned = true;
 
 	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
-	if (ret)
-		goto out;
+	if (ret) {
+		pinned = false;
+		memset(&node, 0, sizeof(node));
+		ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
+							  &node, 4096, 0,
+							  I915_CACHE_NONE, 0,
+							  dev_priv->gtt.mappable_end,
+							  DRM_MM_SEARCH_DEFAULT,
+							  DRM_MM_CREATE_DEFAULT);
+		if (ret)
+			goto out;
+	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 	if (ret)
@@ -786,42 +798,76 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 	user_data = to_user_ptr(args->data_ptr);
 	remain = args->size;
 
-	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
 	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
 
-	while (remain > 0) {
-		/* Operation in this page
+	if (likely(pinned)) {
+		offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
+		/* Operation in the page
 		 *
 		 * page_base = page offset within aperture
 		 * page_offset = offset within page
-		 * page_length = bytes to copy for this page
+		 * page_length = bytes to copy for the page
 		 */
 		page_base = offset & PAGE_MASK;
 		page_offset = offset_in_page(offset);
-		page_length = remain;
-		if ((page_offset + remain) > PAGE_SIZE)
-			page_length = PAGE_SIZE - page_offset;
+		while (remain > 0) {
+			page_length = remain;
+			if ((page_offset + remain) > PAGE_SIZE)
+				page_length = PAGE_SIZE - page_offset;
+
+			/* If we get a fault while copying data, then (presumably) our
+			 * source page isn't available.  Return the error and we'll
+			 * retry in the slow path.
+			 */
+			if (fast_user_write(dev_priv->gtt.mappable, page_base,
+						page_offset, user_data, page_length)) {
+				ret = -EFAULT;
+				goto out_flush;
+			}
 
-		/* If we get a fault while copying data, then (presumably) our
-		 * source page isn't available.  Return the error and we'll
-		 * retry in the slow path.
-		 */
-		if (fast_user_write(dev_priv->gtt.mappable, page_base,
-				    page_offset, user_data, page_length)) {
-			ret = -EFAULT;
-			goto out_flush;
+			remain -= page_length;
+			user_data += page_length;
+			page_offset = 0;
 		}
+	} else {
+		i = args->offset / PAGE_SIZE;
+		page_offset = offset_in_page(args->offset);
+		while (remain > 0) {
+			page_length = remain;
+			if ((page_offset + remain) > PAGE_SIZE)
+				page_length = PAGE_SIZE - page_offset;
+
+			wmb();
+			dev_priv->gtt.base.insert_page(&dev_priv->gtt.base,
+					i915_gem_object_get_dma_address(obj, i),
+					node.start,
+					I915_CACHE_NONE,
+					0);
+			wmb();
+
+			if (fast_user_write(dev_priv->gtt.mappable, node.start,
+						page_offset, user_data, page_length)) {
+				ret = -EFAULT;
+				goto out_flush;
+			}
 
-		remain -= page_length;
-		user_data += page_length;
-		offset += page_length;
+			remain -= page_length;
+			user_data += page_length;
+			page_offset = 0;
+			i++;
+		}
+		wmb();
+		dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				node.start, node.size,
+				true);
+		drm_mm_remove_node(&node);
 	}
 
 out_flush:
 	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
 out_unpin:
-	i915_gem_object_ggtt_unpin(obj);
+	if (pinned)
+		i915_gem_object_ggtt_unpin(obj);
 out:
 	return ret;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2015-12-14  8:47 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
2015-11-13 14:57   ` Tvrtko Ursulin
2015-11-09 10:56 ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address() ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-09 12:20   ` kbuild test robot
2015-11-10  7:55   ` Mika Kuoppala
2015-11-10  8:05     ` Ankitprasad Sharma
2015-11-10  8:44     ` Chris Wilson
2015-11-10 11:51       ` Chris Wilson
  -- strict thread matches above, loose matches on Subject: below --
2015-11-07  8:02 [PATCH v2 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-07  8:02 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-07 10:07   ` Chris Wilson
2015-11-05 11:45 [PATCH 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-05 12:34   ` Chris Wilson
2015-11-06  6:15     ` Ankitprasad Sharma
2015-11-05 12:37   ` Tvrtko Ursulin
2015-11-05 12:42     ` Chris Wilson
2015-11-05 12:53       ` Tvrtko Ursulin
2015-11-05 12:58         ` Chris Wilson
2015-11-05 14:38           ` Tvrtko Ursulin
2015-11-07 10:13             ` Chris Wilson
2015-11-18  9:59   ` Daniel Vetter
2015-11-20  9:37     ` Ankitprasad Sharma
2015-11-20 10:06       ` Chris Wilson
2015-11-24 12:22         ` Daniel Vetter
2015-12-14  8:19           ` Ankitprasad Sharma

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.