* [PATCH v3 0/3] Support for mapping an object page by page
@ 2015-11-09 10:56 ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
` (2 more replies)
0 siblings, 3 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
It is possible that when we want to map an object to the aperture, either
we run out of aperture space or the size of the object is larger than
the mappable aperture. In such cases we might not be able to map the whole
object to the aperture. For cases as such, here we introduce insert_page()
which allows us to map a single page in to the mappable aperture space
(which has a higher probabilty of succeeding). This can be iterated
over to access the whole object by using space as meagre as page size.
Here we try to use insert_page() for pwrite_fast in case a nonblocking
pin for the whole object fails, which helps us to iterate over the whole
object and perform the pwrite without mapping the whole object to the
mappable aperture.
We also introduce i915_gem_object_get_dma_address() to perform fast
sequential lookup of the dma address associated with any page within the
object.
Ankitprasad Sharma (1):
drm/i915: Use insert_page for pwrite_fast
Chris Wilson (2):
drm/i915: Add support for mapping an object page by page
drm/i915: Introduce i915_gem_object_get_dma_address()
drivers/char/agp/intel-gtt.c | 9 +++++
drivers/gpu/drm/i915/i915_drv.h | 17 +++++++++
drivers/gpu/drm/i915/i915_gem.c | 75 ++++++++++++++++++++++++++-----------
drivers/gpu/drm/i915/i915_gem_gtt.c | 49 ++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++
include/drm/intel-gtt.h | 3 ++
6 files changed, 136 insertions(+), 22 deletions(-)
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 1/3] drm/i915: Add support for mapping an object page by page
2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
@ 2015-11-09 10:56 ` ankitprasad.r.sharma
2015-11-13 14:57 ` Tvrtko Ursulin
2015-11-09 10:56 ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address() ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2 siblings, 1 reply; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Chris Wilson <chris@chris-wilson.co.uk>
Introduced a new vm specfic callback insert_page() to program a single pte in
ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
space. This can be iterated over to access the whole object by using space as
meagre as page size.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/char/agp/intel-gtt.c | 9 +++++++
drivers/gpu/drm/i915/i915_gem_gtt.c | 49 +++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++++
include/drm/intel-gtt.h | 3 +++
4 files changed, 66 insertions(+)
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 1341a94..7c68576 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags)
return false;
}
+void intel_gtt_insert_page(dma_addr_t addr,
+ unsigned int pg,
+ unsigned int flags)
+{
+ intel_private.driver->write_entry(addr, pg, flags);
+ wmb();
+}
+EXPORT_SYMBOL(intel_gtt_insert_page);
+
void intel_gtt_insert_sg_entries(struct sg_table *st,
unsigned int pg_start,
unsigned int flags)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 016739e..9fd1f857 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2345,6 +2345,23 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
#endif
}
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ uint64_t offset,
+ enum i915_cache_level level,
+ u32 unused)
+{
+ struct drm_i915_private *dev_priv = to_i915(vm->dev);
+ gen8_pte_t __iomem *pte =
+ (gen8_pte_t __iomem *)dev_priv->gtt.gsm +
+ (offset >> PAGE_SHIFT);
+
+ gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
+ wmb();
+
+ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
struct sg_table *st,
uint64_t start,
@@ -2385,6 +2402,23 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
POSTING_READ(GFX_FLSH_CNTL_GEN6);
}
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ uint64_t offset,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct drm_i915_private *dev_priv = to_i915(vm->dev);
+ gen6_pte_t __iomem *pte =
+ (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
+ (offset >> PAGE_SHIFT);
+
+ iowrite32(vm->pte_encode(addr, level, true, flags), pte);
+ wmb();
+
+ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
/*
* Binds an object into the global gtt with the specified cache level. The object
* will be accessible to the GPU via commands whose operands reference offsets
@@ -2481,6 +2515,18 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
readl(gtt_base);
}
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ uint64_t offset,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+ intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
struct sg_table *pages,
uint64_t start,
@@ -2996,6 +3042,7 @@ static int gen8_gmch_probe(struct drm_device *dev,
ret = ggtt_probe_common(dev, gtt_size);
dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
+ dev_priv->gtt.base.insert_page = gen8_ggtt_insert_page;
dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
@@ -3038,6 +3085,7 @@ static int gen6_gmch_probe(struct drm_device *dev,
ret = ggtt_probe_common(dev, gtt_size);
dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
+ dev_priv->gtt.base.insert_page = gen6_ggtt_insert_page;
dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
@@ -3072,6 +3120,7 @@ static int i915_gmch_probe(struct drm_device *dev,
intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
+ dev_priv->gtt.base.insert_page = i915_ggtt_insert_page;
dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a216397..aae30df 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -320,6 +320,11 @@ struct i915_address_space {
uint64_t start,
uint64_t length,
bool use_scratch);
+ void (*insert_page)(struct i915_address_space *vm,
+ dma_addr_t addr,
+ uint64_t offset,
+ enum i915_cache_level cache_level,
+ u32 flags);
void (*insert_entries)(struct i915_address_space *vm,
struct sg_table *st,
uint64_t start,
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 9e9bddaa5..f49edec 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,6 +13,9 @@ void intel_gmch_remove(void);
bool intel_enable_gtt(void);
void intel_gtt_chipset_flush(void);
+void intel_gtt_insert_page(dma_addr_t addr,
+ unsigned int pg,
+ unsigned int flags);
void intel_gtt_insert_sg_entries(struct sg_table *st,
unsigned int pg_start,
unsigned int flags);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address()
2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
@ 2015-11-09 10:56 ` ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2 siblings, 0 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Chris Wilson <chris@chris-wilson.co.uk>
This utility function is a companion to i915_gem_object_get_page() that
uses the same cached iterator for the scatterlist to perform fast
sequential lookup of the dma address associated with any page within the
object.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d2a546a..548a0eb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2862,6 +2862,23 @@ static inline int __sg_page_count(struct scatterlist *sg)
return sg->length >> PAGE_SHIFT;
}
+static inline dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n)
+{
+ if (n < obj->get_page.last) {
+ obj->get_page.sg = obj->pages->sgl;
+ obj->get_page.last = 0;
+ }
+
+ while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) {
+ obj->get_page.last += __sg_page_count(obj->get_page.sg++);
+ if (unlikely(sg_is_chain(obj->get_page.sg)))
+ obj->get_page.sg = sg_chain_ptr(obj->get_page.sg);
+ }
+
+ return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT);
+}
+
static inline struct page *
i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
{
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 26+ messages in thread
* [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address() ankitprasad.r.sharma
@ 2015-11-09 10:56 ` ankitprasad.r.sharma
2015-11-09 12:20 ` kbuild test robot
2015-11-10 7:55 ` Mika Kuoppala
2 siblings, 2 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-09 10:56 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).
v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
v3: Combined loops based on local patch by Chris (Chris)
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
1 file changed, 53 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f1e3fde..9d2e6e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
* user into the GTT, uncached.
*/
static int
-i915_gem_gtt_pwrite_fast(struct drm_device *dev,
+i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
struct drm_file *file)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
- ssize_t remain;
- loff_t offset, page_base;
+ struct drm_mm_node node;
+ uint64_t remain, offset;
char __user *user_data;
- int page_offset, page_length, ret;
+ int ret;
ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
- if (ret)
- goto out;
+ if (ret) {
+ memset(&node, 0, sizeof(node));
+ ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
+ &node, 4096, 0,
+ I915_CACHE_NONE, 0,
+ i915->gtt.mappable_end,
+ DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_CREATE_DEFAULT);
+ if (ret)
+ goto out;
+
+ i915_gem_object_pin_pages(obj);
+ } else {
+ node.start = i915_gem_obj_ggtt_offset(obj);
+ node.allocated = false;
+ }
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
if (ret)
goto out_unpin;
- user_data = to_user_ptr(args->data_ptr);
- remain = args->size;
-
- offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+ obj->dirty = true;
- while (remain > 0) {
+ user_data = to_user_ptr(args->data_ptr);
+ offset = args->offset;
+ remain = args->size;
+ while (remain) {
/* Operation in this page
*
* page_base = page offset within aperture
* page_offset = offset within page
* page_length = bytes to copy for this page
*/
- page_base = offset & PAGE_MASK;
- page_offset = offset_in_page(offset);
- page_length = remain;
- if ((page_offset + remain) > PAGE_SIZE)
- page_length = PAGE_SIZE - page_offset;
-
+ u32 page_base = node.start;
+ unsigned page_offset = offset_in_page(offset);
+ unsigned page_length = PAGE_SIZE - page_offset;
+ page_length = remain < page_length ? remain : page_length;
+ if (node.allocated) {
+ wmb();
+ i915->gtt.base.insert_page(&i915->gtt.base,
+ i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
+ node.start,
+ I915_CACHE_NONE,
+ 0);
+ wmb();
+ } else {
+ page_base += offset & PAGE_MASK;
+ }
/* If we get a fault while copying data, then (presumably) our
* source page isn't available. Return the error and we'll
* retry in the slow path.
*/
- if (fast_user_write(dev_priv->gtt.mappable, page_base,
+ if (fast_user_write(i915->gtt.mappable, page_base,
page_offset, user_data, page_length)) {
ret = -EFAULT;
goto out_flush;
@@ -821,7 +842,17 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
out_flush:
intel_fb_obj_flush(obj, false, ORIGIN_GTT);
out_unpin:
- i915_gem_object_ggtt_unpin(obj);
+ if (node.allocated) {
+ wmb();
+ i915->gtt.base.clear_range(&i915->gtt.base,
+ node.start, node.size,
+ true);
+ drm_mm_remove_node(&node);
+ i915_gem_object_unpin_pages(obj);
+ }
+ else {
+ i915_gem_object_ggtt_unpin(obj);
+ }
out:
return ret;
}
@@ -1086,7 +1117,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (obj->tiling_mode == I915_TILING_NONE &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
cpu_write_needs_clflush(obj)) {
- ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
+ ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
* textures). Fallback to the shmem path in that case. */
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
@ 2015-11-09 12:20 ` kbuild test robot
2015-11-10 7:55 ` Mika Kuoppala
1 sibling, 0 replies; 26+ messages in thread
From: kbuild test robot @ 2015-11-09 12:20 UTC (permalink / raw)
Cc: akash.goel, Ankitprasad Sharma, intel-gfx, kbuild-all
[-- Attachment #1: Type: text/plain, Size: 19921 bytes --]
Hi Ankitprasad,
[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on v4.3 next-20151109]
url: https://github.com/0day-ci/linux/commits/ankitprasad-r-sharma-intel-com/Support-for-mapping-an-object-page-by-page/20151109-191910
base: git://anongit.freedesktop.org/drm-intel for-linux-next
reproduce: make htmldocs
All warnings (new ones prefixed by >>):
include/drm/drm_crtc.h:875: warning: No description found for parameter 'mutex'
include/drm/drm_crtc.h:875: warning: No description found for parameter 'helper_private'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tile_idr'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'delayed_event'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'edid_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dpms_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'path_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tile_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'plane_type_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'rotation_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_x'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_y'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_w'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_src_h'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_x'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_y'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_w'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_h'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_fb_id'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_crtc_id'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_active'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'prop_mode_id'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dvi_i_subconnector_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dvi_i_select_subconnector_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_subconnector_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_select_subconnector_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_mode_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_left_margin_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_right_margin_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_top_margin_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_bottom_margin_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_brightness_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_contrast_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_flicker_reduction_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_overscan_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_saturation_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'tv_hue_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'scaling_mode_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'aspect_ratio_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'dirty_info_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'suggested_x_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'suggested_y_property'
include/drm/drm_crtc.h:1160: warning: No description found for parameter 'allow_fb_modifiers'
include/drm/drm_fb_helper.h:148: warning: No description found for parameter 'connector_info'
include/drm/drm_dp_helper.h:709: warning: No description found for parameter 'i2c_nack_count'
include/drm/drm_dp_helper.h:709: warning: No description found for parameter 'i2c_defer_count'
drivers/gpu/drm/drm_dp_mst_topology.c:2211: warning: No description found for parameter 'connector'
include/drm/drm_dp_mst_helper.h:97: warning: No description found for parameter 'cached_edid'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'max_dpcd_transaction_bytes'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'sink_count'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'total_slots'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'avail_slots'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'total_pbn'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'qlock'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_msg_downq'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_msg_upq'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_down_in_progress'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_up_in_progress'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'payload_lock'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'proposed_vcpis'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'payloads'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'payload_mask'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'vcpi_mask'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_waitq'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'work'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'tx_work'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'destroy_connector_list'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'destroy_connector_lock'
include/drm/drm_dp_mst_helper.h:470: warning: No description found for parameter 'destroy_connector_work'
drivers/gpu/drm/drm_dp_mst_topology.c:2211: warning: No description found for parameter 'connector'
drivers/gpu/drm/drm_irq.c:173: warning: No description found for parameter 'flags'
include/drm/drmP.h:164: warning: No description found for parameter 'fmt'
include/drm/drmP.h:180: warning: No description found for parameter 'fmt'
include/drm/drmP.h:198: warning: No description found for parameter 'fmt'
include/drm/drmP.h:238: warning: No description found for parameter 'dev'
include/drm/drmP.h:238: warning: No description found for parameter 'data'
include/drm/drmP.h:238: warning: No description found for parameter 'file_priv'
include/drm/drmP.h:271: warning: No description found for parameter 'ioctl'
include/drm/drmP.h:271: warning: No description found for parameter '_func'
include/drm/drmP.h:271: warning: No description found for parameter '_flags'
include/drm/drmP.h:344: warning: cannot understand function prototype: 'struct drm_lock_data '
include/drm/drmP.h:397: warning: cannot understand function prototype: 'struct drm_driver '
include/drm/drmP.h:648: warning: cannot understand function prototype: 'struct drm_info_list '
include/drm/drmP.h:658: warning: cannot understand function prototype: 'struct drm_info_node '
include/drm/drmP.h:668: warning: cannot understand function prototype: 'struct drm_minor '
include/drm/drmP.h:716: warning: cannot understand function prototype: 'struct drm_device '
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'wedged'
drivers/gpu/drm/i915/i915_irq.c:2584: warning: No description found for parameter 'fmt'
drivers/gpu/drm/i915/i915_gem.c:421: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:421: warning: No description found for parameter 'data'
drivers/gpu/drm/i915/i915_gem.c:421: warning: No description found for parameter 'file'
drivers/gpu/drm/i915/i915_gem.c:686: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:686: warning: No description found for parameter 'data'
drivers/gpu/drm/i915/i915_gem.c:686: warning: No description found for parameter 'file'
>> drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'i915'
drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'obj'
drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'args'
drivers/gpu/drm/i915/i915_gem.c:767: warning: No description found for parameter 'file'
drivers/gpu/drm/i915/i915_gem.c:1060: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:1060: warning: No description found for parameter 'data'
drivers/gpu/drm/i915/i915_gem.c:1060: warning: No description found for parameter 'file'
drivers/gpu/drm/i915/i915_gem.c:1225: warning: No description found for parameter 'rps'
drivers/gpu/drm/i915/i915_gem.c:1431: warning: No description found for parameter 'req'
drivers/gpu/drm/i915/i915_gem.c:1466: warning: No description found for parameter 'obj'
drivers/gpu/drm/i915/i915_gem.c:1466: warning: No description found for parameter 'readonly'
drivers/gpu/drm/i915/i915_gem.c:1589: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:1589: warning: No description found for parameter 'data'
drivers/gpu/drm/i915/i915_gem.c:1589: warning: No description found for parameter 'file'
drivers/gpu/drm/i915/i915_gem.c:1652: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:1652: warning: No description found for parameter 'data'
drivers/gpu/drm/i915/i915_gem.c:1652: warning: No description found for parameter 'file'
drivers/gpu/drm/i915/i915_gem.c:1697: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:1697: warning: No description found for parameter 'data'
drivers/gpu/drm/i915/i915_gem.c:1697: warning: No description found for parameter 'file'
drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'size'
drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'tiling_mode'
drivers/gpu/drm/i915/i915_gem.c:1985: warning: No description found for parameter 'fenced'
drivers/gpu/drm/i915/i915_gem.c:1985: warning: Excess function parameter 'obj' description in 'i915_gem_get_gtt_alignment'
drivers/gpu/drm/i915/i915_gem.c:2848: warning: No description found for parameter 'ring'
drivers/gpu/drm/i915/i915_gem.c:2977: warning: No description found for parameter 'obj'
drivers/gpu/drm/i915/i915_gem.c:3027: warning: No description found for parameter 'dev'
drivers/gpu/drm/i915/i915_gem.c:3027: warning: No description found for parameter 'data'
drivers/gpu/drm/i915/i915_gem.c:3027: warning: No description found for parameter 'file'
drivers/gpu/drm/i915/i915_gem.c:3027: warning: Excess function parameter 'DRM_IOCTL_ARGS' description in 'i915_gem_wait_ioctl'
drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'obj'
drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'vm'
drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'ggtt_view'
drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'alignment'
drivers/gpu/drm/i915/i915_gem.c:3396: warning: No description found for parameter 'flags'
drivers/gpu/drm/i915/i915_gem.c:3631: warning: No description found for parameter 'obj'
drivers/gpu/drm/i915/i915_gem.c:3631: warning: No description found for parameter 'write'
drivers/gpu/drm/i915/i915_gem.c:3706: warning: No description found for parameter 'obj'
drivers/gpu/drm/i915/i915_gem.c:3706: warning: No description found for parameter 'cache_level'
drivers/gpu/drm/i915/i915_gem.c:3980: warning: No description found for parameter 'obj'
drivers/gpu/drm/i915/i915_gem.c:3980: warning: No description found for parameter 'write'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: No description found for parameter 'params'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dev' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'file' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ring' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ctx' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'batch_obj' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'exec_start' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dispatch_flags' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: No description found for parameter 'params'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dev' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'file' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ring' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'ctx' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'batch_obj' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'exec_start' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_lrc.c:869: warning: Excess function parameter 'dispatch_flags' description in 'intel_execlists_submission'
drivers/gpu/drm/i915/intel_guc_loader.c:1: warning: no structured comments found
Was looking for 'GuC-specific firmware loader'.
drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: No description found for parameter 'rq'
drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ctx' description in 'i915_guc_submit'
drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ring' description in 'i915_guc_submit'
drivers/gpu/drm/i915/i915_guc_submission.c:741: warning: No description found for parameter 'ctx'
drivers/gpu/drm/i915/i915_guc_submission.c:1: warning: no structured comments found
Was looking for 'GuC-based command submissison'.
drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: No description found for parameter 'rq'
drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ctx' description in 'i915_guc_submit'
drivers/gpu/drm/i915/i915_guc_submission.c:598: warning: Excess function parameter 'ring' description in 'i915_guc_submit'
drivers/gpu/drm/i915/i915_guc_submission.c:741: warning: No description found for parameter 'ctx'
Warning: didn't use docs for i915_hotplug_interrupt_update
Warning: didn't use docs for ilk_update_display_irq
Warning: didn't use docs for ilk_update_gt_irq
Warning: didn't use docs for snb_update_pm_irq
Warning: didn't use docs for bdw_update_port_irq
Warning: didn't use docs for ibx_display_interrupt_update
Warning: didn't use docs for i915_enable_asle_pipestat
Warning: didn't use docs for ivybridge_parity_work
Warning: didn't use docs for i915_reset_and_wakeup
Warning: didn't use docs for i915_handle_error
Warning: didn't use docs for intel_irq_install
Warning: didn't use docs for intel_irq_uninstall
vim +/i915 +767 drivers/gpu/drm/i915/i915_gem.c
4f0c7cfb Ben Widawsky 2012-04-16 751 vaddr = (void __force*)vaddr_atomic + page_offset;
4f0c7cfb Ben Widawsky 2012-04-16 752 unwritten = __copy_from_user_inatomic_nocache(vaddr,
0839ccb8 Keith Packard 2008-10-30 753 user_data, length);
3e4d3af5 Peter Zijlstra 2010-10-26 754 io_mapping_unmap_atomic(vaddr_atomic);
fbd5a26d Chris Wilson 2010-10-14 755 return unwritten;
0839ccb8 Keith Packard 2008-10-30 756 }
0839ccb8 Keith Packard 2008-10-30 757
3de09aa3 Eric Anholt 2009-03-09 758 /**
3de09aa3 Eric Anholt 2009-03-09 759 * This is the fast pwrite path, where we copy the data directly from the
3de09aa3 Eric Anholt 2009-03-09 760 * user into the GTT, uncached.
3de09aa3 Eric Anholt 2009-03-09 761 */
673a394b Eric Anholt 2008-07-30 762 static int
25ec998a Ankitprasad Sharma 2015-11-09 763 i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
05394f39 Chris Wilson 2010-11-08 764 struct drm_i915_gem_object *obj,
673a394b Eric Anholt 2008-07-30 765 struct drm_i915_gem_pwrite *args,
05394f39 Chris Wilson 2010-11-08 766 struct drm_file *file)
673a394b Eric Anholt 2008-07-30 @767 {
25ec998a Ankitprasad Sharma 2015-11-09 768 struct drm_mm_node node;
25ec998a Ankitprasad Sharma 2015-11-09 769 uint64_t remain, offset;
673a394b Eric Anholt 2008-07-30 770 char __user *user_data;
25ec998a Ankitprasad Sharma 2015-11-09 771 int ret;
935aaa69 Daniel Vetter 2012-03-25 772
1ec9e26d Daniel Vetter 2014-02-14 773 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
25ec998a Ankitprasad Sharma 2015-11-09 774 if (ret) {
25ec998a Ankitprasad Sharma 2015-11-09 775 memset(&node, 0, sizeof(node));
:::::: The code at line 767 was first introduced by commit
:::::: 673a394b1e3b69be886ff24abfd6df97c52e8d08 drm: Add GEM ("graphics execution manager") to i915 driver.
:::::: TO: Eric Anholt <eric@anholt.net>
:::::: CC: Dave Airlie <airlied@linux.ie>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 6062 bytes --]
[-- Attachment #3: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-09 12:20 ` kbuild test robot
@ 2015-11-10 7:55 ` Mika Kuoppala
2015-11-10 8:05 ` Ankitprasad Sharma
2015-11-10 8:44 ` Chris Wilson
1 sibling, 2 replies; 26+ messages in thread
From: Mika Kuoppala @ 2015-11-10 7:55 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
ankitprasad.r.sharma@intel.com writes:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
>
> v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
>
> v3: Combined loops based on local patch by Chris (Chris)
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
> 1 file changed, 53 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f1e3fde..9d2e6e3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
> * user into the GTT, uncached.
> */
> static int
> -i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> struct drm_i915_gem_object *obj,
> struct drm_i915_gem_pwrite *args,
> struct drm_file *file)
> {
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - ssize_t remain;
> - loff_t offset, page_base;
> + struct drm_mm_node node;
> + uint64_t remain, offset;
> char __user *user_data;
> - int page_offset, page_length, ret;
> + int ret;
>
> ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> - if (ret)
> - goto out;
> + if (ret) {
> + memset(&node, 0, sizeof(node));
> + ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> + &node, 4096, 0,
> + I915_CACHE_NONE, 0,
> + i915->gtt.mappable_end,
> + DRM_MM_SEARCH_DEFAULT,
> + DRM_MM_CREATE_DEFAULT);
> + if (ret)
> + goto out;
> +
> + i915_gem_object_pin_pages(obj);
> + } else {
> + node.start = i915_gem_obj_ggtt_offset(obj);
> + node.allocated = false;
> + }
>
> ret = i915_gem_object_set_to_gtt_domain(obj, true);
> if (ret)
> @@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> if (ret)
> goto out_unpin;
>
> - user_data = to_user_ptr(args->data_ptr);
> - remain = args->size;
> -
> - offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> -
> intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> + obj->dirty = true;
>
> - while (remain > 0) {
> + user_data = to_user_ptr(args->data_ptr);
> + offset = args->offset;
> + remain = args->size;
> + while (remain) {
> /* Operation in this page
> *
> * page_base = page offset within aperture
> * page_offset = offset within page
> * page_length = bytes to copy for this page
> */
> - page_base = offset & PAGE_MASK;
> - page_offset = offset_in_page(offset);
> - page_length = remain;
> - if ((page_offset + remain) > PAGE_SIZE)
> - page_length = PAGE_SIZE - page_offset;
> -
> + u32 page_base = node.start;
You truncate here as node.start is 64bit offset into the vm area.
-Mika
> + unsigned page_offset = offset_in_page(offset);
> + unsigned page_length = PAGE_SIZE - page_offset;
> + page_length = remain < page_length ? remain : page_length;
> + if (node.allocated) {
> + wmb();
> + i915->gtt.base.insert_page(&i915->gtt.base,
> + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
> + node.start,
> + I915_CACHE_NONE,
> + 0);
> + wmb();
> + } else {
> + page_base += offset & PAGE_MASK;
> + }
> /* If we get a fault while copying data, then (presumably) our
> * source page isn't available. Return the error and we'll
> * retry in the slow path.
> */
> - if (fast_user_write(dev_priv->gtt.mappable, page_base,
> + if (fast_user_write(i915->gtt.mappable, page_base,
> page_offset, user_data, page_length)) {
> ret = -EFAULT;
> goto out_flush;
> @@ -821,7 +842,17 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> out_flush:
> intel_fb_obj_flush(obj, false, ORIGIN_GTT);
> out_unpin:
> - i915_gem_object_ggtt_unpin(obj);
> + if (node.allocated) {
> + wmb();
> + i915->gtt.base.clear_range(&i915->gtt.base,
> + node.start, node.size,
> + true);
> + drm_mm_remove_node(&node);
> + i915_gem_object_unpin_pages(obj);
> + }
> + else {
> + i915_gem_object_ggtt_unpin(obj);
> + }
> out:
> return ret;
> }
> @@ -1086,7 +1117,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> if (obj->tiling_mode == I915_TILING_NONE &&
> obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> cpu_write_needs_clflush(obj)) {
> - ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
> + ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
> /* Note that the gtt paths might fail with non-page-backed user
> * pointers (e.g. gtt mappings when moving data between
> * textures). Fallback to the shmem path in that case. */
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-10 7:55 ` Mika Kuoppala
@ 2015-11-10 8:05 ` Ankitprasad Sharma
2015-11-10 8:44 ` Chris Wilson
1 sibling, 0 replies; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-11-10 8:05 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: intel-gfx, akash.goel
On Tue, 2015-11-10 at 09:55 +0200, Mika Kuoppala wrote:
> ankitprasad.r.sharma@intel.com writes:
>
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> >
> > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> >
> > v3: Combined loops based on local patch by Chris (Chris)
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
> > 1 file changed, 53 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index f1e3fde..9d2e6e3 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
> > * user into the GTT, uncached.
> > */
> > static int
> > -i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> > +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> > struct drm_i915_gem_object *obj,
> > struct drm_i915_gem_pwrite *args,
> > struct drm_file *file)
> > {
> > - struct drm_i915_private *dev_priv = dev->dev_private;
> > - ssize_t remain;
> > - loff_t offset, page_base;
> > + struct drm_mm_node node;
> > + uint64_t remain, offset;
> > char __user *user_data;
> > - int page_offset, page_length, ret;
> > + int ret;
> >
> > ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> > - if (ret)
> > - goto out;
> > + if (ret) {
> > + memset(&node, 0, sizeof(node));
> > + ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> > + &node, 4096, 0,
> > + I915_CACHE_NONE, 0,
> > + i915->gtt.mappable_end,
> > + DRM_MM_SEARCH_DEFAULT,
> > + DRM_MM_CREATE_DEFAULT);
> > + if (ret)
> > + goto out;
> > +
> > + i915_gem_object_pin_pages(obj);
> > + } else {
> > + node.start = i915_gem_obj_ggtt_offset(obj);
> > + node.allocated = false;
> > + }
> >
> > ret = i915_gem_object_set_to_gtt_domain(obj, true);
> > if (ret)
> > @@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> > if (ret)
> > goto out_unpin;
> >
> > - user_data = to_user_ptr(args->data_ptr);
> > - remain = args->size;
> > -
> > - offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> > -
> > intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> > + obj->dirty = true;
> >
> > - while (remain > 0) {
> > + user_data = to_user_ptr(args->data_ptr);
> > + offset = args->offset;
> > + remain = args->size;
> > + while (remain) {
> > /* Operation in this page
> > *
> > * page_base = page offset within aperture
> > * page_offset = offset within page
> > * page_length = bytes to copy for this page
> > */
> > - page_base = offset & PAGE_MASK;
> > - page_offset = offset_in_page(offset);
> > - page_length = remain;
> > - if ((page_offset + remain) > PAGE_SIZE)
> > - page_length = PAGE_SIZE - page_offset;
> > -
> > + u32 page_base = node.start;
>
> You truncate here as node.start is 64bit offset into the vm area.
>
True, but it will work here always as node.start is going to be less
than 256 MB for both the cases (whole object pin or page by page pwrite)
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-10 7:55 ` Mika Kuoppala
2015-11-10 8:05 ` Ankitprasad Sharma
@ 2015-11-10 8:44 ` Chris Wilson
2015-11-10 11:51 ` Chris Wilson
1 sibling, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-10 8:44 UTC (permalink / raw)
To: Mika Kuoppala; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel
On Tue, Nov 10, 2015 at 09:55:18AM +0200, Mika Kuoppala wrote:
> ankitprasad.r.sharma@intel.com writes:
>
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> >
> > v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
> >
> > v3: Combined loops based on local patch by Chris (Chris)
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> > drivers/gpu/drm/i915/i915_gem.c | 75 +++++++++++++++++++++++++++++------------
> > 1 file changed, 53 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index f1e3fde..9d2e6e3 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -760,20 +760,33 @@ fast_user_write(struct io_mapping *mapping,
> > * user into the GTT, uncached.
> > */
> > static int
> > -i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> > +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
> > struct drm_i915_gem_object *obj,
> > struct drm_i915_gem_pwrite *args,
> > struct drm_file *file)
> > {
> > - struct drm_i915_private *dev_priv = dev->dev_private;
> > - ssize_t remain;
> > - loff_t offset, page_base;
> > + struct drm_mm_node node;
> > + uint64_t remain, offset;
> > char __user *user_data;
> > - int page_offset, page_length, ret;
> > + int ret;
> >
> > ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> > - if (ret)
> > - goto out;
> > + if (ret) {
> > + memset(&node, 0, sizeof(node));
> > + ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> > + &node, 4096, 0,
> > + I915_CACHE_NONE, 0,
> > + i915->gtt.mappable_end,
> > + DRM_MM_SEARCH_DEFAULT,
> > + DRM_MM_CREATE_DEFAULT);
> > + if (ret)
> > + goto out;
> > +
> > + i915_gem_object_pin_pages(obj);
> > + } else {
> > + node.start = i915_gem_obj_ggtt_offset(obj);
> > + node.allocated = false;
> > + }
> >
> > ret = i915_gem_object_set_to_gtt_domain(obj, true);
> > if (ret)
> > @@ -783,31 +796,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> > if (ret)
> > goto out_unpin;
> >
> > - user_data = to_user_ptr(args->data_ptr);
> > - remain = args->size;
> > -
> > - offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> > -
> > intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> > + obj->dirty = true;
> >
> > - while (remain > 0) {
> > + user_data = to_user_ptr(args->data_ptr);
> > + offset = args->offset;
> > + remain = args->size;
> > + while (remain) {
> > /* Operation in this page
> > *
> > * page_base = page offset within aperture
> > * page_offset = offset within page
> > * page_length = bytes to copy for this page
> > */
> > - page_base = offset & PAGE_MASK;
> > - page_offset = offset_in_page(offset);
> > - page_length = remain;
> > - if ((page_offset + remain) > PAGE_SIZE)
> > - page_length = PAGE_SIZE - page_offset;
> > -
> > + u32 page_base = node.start;
>
> You truncate here as node.start is 64bit offset into the vm area.
It's a bit of cheat since it can't be 64bit, but the code is equally
inconsistent. The io-mapping can only handle unsigned long.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-10 8:44 ` Chris Wilson
@ 2015-11-10 11:51 ` Chris Wilson
0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2015-11-10 11:51 UTC (permalink / raw)
To: Mika Kuoppala, ankitprasad.r.sharma, intel-gfx, akash.goel
On Tue, Nov 10, 2015 at 08:44:59AM +0000, Chris Wilson wrote:
> On Tue, Nov 10, 2015 at 09:55:18AM +0200, Mika Kuoppala wrote:
> > > + while (remain) {
> > > /* Operation in this page
> > > *
> > > * page_base = page offset within aperture
> > > * page_offset = offset within page
> > > * page_length = bytes to copy for this page
> > > */
> > > - page_base = offset & PAGE_MASK;
> > > - page_offset = offset_in_page(offset);
> > > - page_length = remain;
> > > - if ((page_offset + remain) > PAGE_SIZE)
> > > - page_length = PAGE_SIZE - page_offset;
> > > -
> > > + u32 page_base = node.start;
> >
> > You truncate here as node.start is 64bit offset into the vm area.
>
> It's a bit of cheat since it can't be 64bit, but the code is equally
> inconsistent. The io-mapping can only handle unsigned long.
And for reference, it is preserving current behaviour.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 1/3] drm/i915: Add support for mapping an object page by page
2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
@ 2015-11-13 14:57 ` Tvrtko Ursulin
0 siblings, 0 replies; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-13 14:57 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel
Hi,
On 09/11/15 10:56, ankitprasad.r.sharma@intel.com wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Introduced a new vm specfic callback insert_page() to program a single pte in
> ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
> space. This can be iterated over to access the whole object by using space as
> meagre as page size.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/char/agp/intel-gtt.c | 9 +++++++
> drivers/gpu/drm/i915/i915_gem_gtt.c | 49 +++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_gtt.h | 5 ++++
> include/drm/intel-gtt.h | 3 +++
> 4 files changed, 66 insertions(+)
>
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 1341a94..7c68576 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags)
> return false;
> }
>
> +void intel_gtt_insert_page(dma_addr_t addr,
> + unsigned int pg,
> + unsigned int flags)
> +{
> + intel_private.driver->write_entry(addr, pg, flags);
> + wmb();
> +}
> +EXPORT_SYMBOL(intel_gtt_insert_page);
> +
> void intel_gtt_insert_sg_entries(struct sg_table *st,
> unsigned int pg_start,
> unsigned int flags)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 016739e..9fd1f857 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2345,6 +2345,23 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> #endif
> }
>
> +static void gen8_ggtt_insert_page(struct i915_address_space *vm,
> + dma_addr_t addr,
> + uint64_t offset,
> + enum i915_cache_level level,
> + u32 unused)
> +{
I am not sure about the interface consistency. insert_entries works on a
sg and this would now need a DMA address which is encapsulated in the
former.
How about extending insert_pages to take a page_offset and size in
pages? The latter could default to obj size if zero.
> + struct drm_i915_private *dev_priv = to_i915(vm->dev);
> + gen8_pte_t __iomem *pte =
> + (gen8_pte_t __iomem *)dev_priv->gtt.gsm +
> + (offset >> PAGE_SHIFT);
> +
> + gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
> + wmb();
> +
> + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
gen8_ggtt_insert_entries doesn't do the wmb() but does a posting read on
GFX_FLSH_CNTL_GEN6. Why is that difference?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-24 12:22 ` Daniel Vetter
@ 2015-12-14 8:19 ` Ankitprasad Sharma
0 siblings, 0 replies; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-12-14 8:19 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx, akash.goel
On Tue, 2015-11-24 at 13:22 +0100, Daniel Vetter wrote:
> On Fri, Nov 20, 2015 at 10:06:16AM +0000, Chris Wilson wrote:
> > On Fri, Nov 20, 2015 at 03:07:58PM +0530, Ankitprasad Sharma wrote:
> > > On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> > > > On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > > > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > > > >
> > > > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > > > > we try a nonblocking pin for the whole object (since that is fastest if
> > > > > reused), then failing that we try to grab one page in the mappable
> > > > > aperture. It also allows us to handle objects larger than the mappable
> > > > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > > > > to a measely 8MiB or something like that).
> > > >
> > > > We already have a fallback to the shmem pwrite. Why do we need this?
> > > This is mainly for the non-shmem backed objects, as we do not have
> > > fallback path for that. Agree for the shmem backed objects, as we
> > > already have a fallback.
> > >
> > > Would like to request Chris, if he can clarify further.
> >
> > Exactly that, with stolen we cannot use the shmem path so there exists
> > no fallback. In order to pwrite to stolen, the GTT path must be fully
> > capable.
>
> Ok, in that case this should probably be part of the stolen obj series,
> just for clarification.
> -Daniel
Daniel,
I have moved this patch to the stolen memory series with the latest
version. Can we please move ahead for the review and merge of the first
2 patches?
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-20 10:06 ` Chris Wilson
@ 2015-11-24 12:22 ` Daniel Vetter
2015-12-14 8:19 ` Ankitprasad Sharma
0 siblings, 1 reply; 26+ messages in thread
From: Daniel Vetter @ 2015-11-24 12:22 UTC (permalink / raw)
To: Chris Wilson, Ankitprasad Sharma, Daniel Vetter, intel-gfx, akash.goel
On Fri, Nov 20, 2015 at 10:06:16AM +0000, Chris Wilson wrote:
> On Fri, Nov 20, 2015 at 03:07:58PM +0530, Ankitprasad Sharma wrote:
> > On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> > > On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > > >
> > > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > > > we try a nonblocking pin for the whole object (since that is fastest if
> > > > reused), then failing that we try to grab one page in the mappable
> > > > aperture. It also allows us to handle objects larger than the mappable
> > > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > > > to a measely 8MiB or something like that).
> > >
> > > We already have a fallback to the shmem pwrite. Why do we need this?
> > This is mainly for the non-shmem backed objects, as we do not have
> > fallback path for that. Agree for the shmem backed objects, as we
> > already have a fallback.
> >
> > Would like to request Chris, if he can clarify further.
>
> Exactly that, with stolen we cannot use the shmem path so there exists
> no fallback. In order to pwrite to stolen, the GTT path must be fully
> capable.
Ok, in that case this should probably be part of the stolen obj series,
just for clarification.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-20 9:37 ` Ankitprasad Sharma
@ 2015-11-20 10:06 ` Chris Wilson
2015-11-24 12:22 ` Daniel Vetter
0 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-20 10:06 UTC (permalink / raw)
To: Ankitprasad Sharma; +Cc: intel-gfx, akash.goel
On Fri, Nov 20, 2015 at 03:07:58PM +0530, Ankitprasad Sharma wrote:
> On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> > On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > >
> > > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > > we try a nonblocking pin for the whole object (since that is fastest if
> > > reused), then failing that we try to grab one page in the mappable
> > > aperture. It also allows us to handle objects larger than the mappable
> > > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > > to a measely 8MiB or something like that).
> >
> > We already have a fallback to the shmem pwrite. Why do we need this?
> This is mainly for the non-shmem backed objects, as we do not have
> fallback path for that. Agree for the shmem backed objects, as we
> already have a fallback.
>
> Would like to request Chris, if he can clarify further.
Exactly that, with stolen we cannot use the shmem path so there exists
no fallback. In order to pwrite to stolen, the GTT path must be fully
capable.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-18 9:59 ` Daniel Vetter
@ 2015-11-20 9:37 ` Ankitprasad Sharma
2015-11-20 10:06 ` Chris Wilson
0 siblings, 1 reply; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-11-20 9:37 UTC (permalink / raw)
To: Daniel Vetter, Chris Wilson; +Cc: intel-gfx, akash.goel
On Wed, 2015-11-18 at 10:59 +0100, Daniel Vetter wrote:
> On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
>
> We already have a fallback to the shmem pwrite. Why do we need this?
This is mainly for the non-shmem backed objects, as we do not have
fallback path for that. Agree for the shmem backed objects, as we
already have a fallback.
Would like to request Chris, if he can clarify further.
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-05 12:34 ` Chris Wilson
2015-11-05 12:37 ` Tvrtko Ursulin
@ 2015-11-18 9:59 ` Daniel Vetter
2015-11-20 9:37 ` Ankitprasad Sharma
2 siblings, 1 reply; 26+ messages in thread
From: Daniel Vetter @ 2015-11-18 9:59 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel
On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
We already have a fallback to the shmem pwrite. Why do we need this?
-Daniel
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
> 1 file changed, 69 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index bf5ef7a..9132240 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> struct drm_file *file)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_mm_node node;
> ssize_t remain;
> loff_t offset, page_base;
> char __user *user_data;
> - int page_offset, page_length, ret;
> + int page_offset, page_length, ret, i;
> + bool pinned = true;
>
> ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> - if (ret)
> - goto out;
> + if (ret) {
> + pinned = false;
> + memset(&node, 0, sizeof(node));
> + ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
> + &node, 4096, 0,
> + I915_CACHE_NONE, 0,
> + dev_priv->gtt.mappable_end,
> + DRM_MM_SEARCH_DEFAULT,
> + DRM_MM_CREATE_DEFAULT);
> + if (ret)
> + goto out;
> + }
>
> ret = i915_gem_object_set_to_gtt_domain(obj, true);
> if (ret)
> @@ -786,42 +798,76 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> user_data = to_user_ptr(args->data_ptr);
> remain = args->size;
>
> - offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> -
> intel_fb_obj_invalidate(obj, ORIGIN_GTT);
>
> - while (remain > 0) {
> - /* Operation in this page
> + if (likely(pinned)) {
> + offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
> + /* Operation in the page
> *
> * page_base = page offset within aperture
> * page_offset = offset within page
> - * page_length = bytes to copy for this page
> + * page_length = bytes to copy for the page
> */
> page_base = offset & PAGE_MASK;
> page_offset = offset_in_page(offset);
> - page_length = remain;
> - if ((page_offset + remain) > PAGE_SIZE)
> - page_length = PAGE_SIZE - page_offset;
> + while (remain > 0) {
> + page_length = remain;
> + if ((page_offset + remain) > PAGE_SIZE)
> + page_length = PAGE_SIZE - page_offset;
> +
> + /* If we get a fault while copying data, then (presumably) our
> + * source page isn't available. Return the error and we'll
> + * retry in the slow path.
> + */
> + if (fast_user_write(dev_priv->gtt.mappable, page_base,
> + page_offset, user_data, page_length)) {
> + ret = -EFAULT;
> + goto out_flush;
> + }
>
> - /* If we get a fault while copying data, then (presumably) our
> - * source page isn't available. Return the error and we'll
> - * retry in the slow path.
> - */
> - if (fast_user_write(dev_priv->gtt.mappable, page_base,
> - page_offset, user_data, page_length)) {
> - ret = -EFAULT;
> - goto out_flush;
> + remain -= page_length;
> + user_data += page_length;
> + page_offset = 0;
> }
> + } else {
> + i = args->offset / PAGE_SIZE;
> + page_offset = offset_in_page(args->offset);
> + while (remain > 0) {
> + page_length = remain;
> + if ((page_offset + remain) > PAGE_SIZE)
> + page_length = PAGE_SIZE - page_offset;
> +
> + wmb();
> + dev_priv->gtt.base.insert_page(&dev_priv->gtt.base,
> + i915_gem_object_get_dma_address(obj, i),
> + node.start,
> + I915_CACHE_NONE,
> + 0);
> + wmb();
> +
> + if (fast_user_write(dev_priv->gtt.mappable, node.start,
> + page_offset, user_data, page_length)) {
> + ret = -EFAULT;
> + goto out_flush;
> + }
>
> - remain -= page_length;
> - user_data += page_length;
> - offset += page_length;
> + remain -= page_length;
> + user_data += page_length;
> + page_offset = 0;
> + i++;
> + }
> + wmb();
> + dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
> + node.start, node.size,
> + true);
> + drm_mm_remove_node(&node);
> }
>
> out_flush:
> intel_fb_obj_flush(obj, false, ORIGIN_GTT);
> out_unpin:
> - i915_gem_object_ggtt_unpin(obj);
> + if (pinned)
> + i915_gem_object_ggtt_unpin(obj);
> out:
> return ret;
> }
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 14:38 ` Tvrtko Ursulin
@ 2015-11-07 10:13 ` Chris Wilson
0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2015-11-07 10:13 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel
On Thu, Nov 05, 2015 at 02:38:30PM +0000, Tvrtko Ursulin wrote:
>
> On 05/11/15 12:58, Chris Wilson wrote:
> >On Thu, Nov 05, 2015 at 12:53:20PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 05/11/15 12:42, Chris Wilson wrote:
> >>>On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
> >>>>
> >>>>On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> >>>>>From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>>>>
> >>>>>In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> >>>>>we try a nonblocking pin for the whole object (since that is fastest if
> >>>>>reused), then failing that we try to grab one page in the mappable
> >>>>>aperture. It also allows us to handle objects larger than the mappable
> >>>>>aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> >>>>>to a measely 8MiB or something like that).
> >>>>
> >>>>Aperture in aperture, reminds me of those "Yo dawg I've heard you
> >>>>like X so I've put X in your X so you can Y while you Y" jokes. :D
> >>>>
> >>>>Would using the partial view code be interesting for this? Might be
> >>>>faster due to larger chunks possible, or slower due more expensive
> >>>>set up time, I don't know.
> >>>
> >>>It's the wrong abstraction.
> >>
> >>Looks the same to me, only difference is the size.
> >
> >There are many places that insert-page is used where we cannot do a
> >partial-pin.
> >
> >>Why not just to the page aperture then for simplicity? If there is
> >>any performance gain from trying the full VMA first then why there
> >>wouldn't be some to try with the partial VMA?
> >
> >obj->base.size >> PAGE_SHIFT x partial pages is not even funny.
>
> Well I did not suggest that but larger chunks so I will repeat my question.
>
> If going page by page is fine for performance then why have the two
> code paths at all? One which tries top pin the whole object first,
> and second which goes page by page if that fails. Why not just do it
> page by page and avoid having two copy loops etc?
If we already have the vma or can allocate it with impacting upon the
system, using it is best (since we expect to reuse it again). If we
cannot allocate it, our natural iterator size is 4096 bytes and is also
our best chance at allocating that in the aperture.
Partial vma are a high overhead and more importantly a massive impedance
mismatch.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-07 8:02 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
@ 2015-11-07 10:07 ` Chris Wilson
0 siblings, 0 replies; 26+ messages in thread
From: Chris Wilson @ 2015-11-07 10:07 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel
On Sat, Nov 07, 2015 at 01:32:15PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
>
> v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
Please just use my code as you haven't combined the two paths as well
yet.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-07 8:02 [PATCH v2 0/3] Support for mapping an object page by page ankitprasad.r.sharma
@ 2015-11-07 8:02 ` ankitprasad.r.sharma
2015-11-07 10:07 ` Chris Wilson
0 siblings, 1 reply; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-07 8:02 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).
v2: Pin pages before starting pwrite, Combined duplicate loops (Chris)
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 68 ++++++++++++++++++++++++++++++++---------
1 file changed, 53 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf5ef7a..2556936 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_mm_node node;
ssize_t remain;
loff_t offset, page_base;
char __user *user_data;
- int page_offset, page_length, ret;
+ int page_offset, page_length, ret, i = 0;
+ bool pinned = true;
ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
- if (ret)
- goto out;
+ if (ret) {
+ pinned = false;
+ memset(&node, 0, sizeof(node));
+ ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
+ &node, 4096, 0,
+ I915_CACHE_NONE, 0,
+ dev_priv->gtt.mappable_end,
+ DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_CREATE_DEFAULT);
+ if (ret)
+ goto out;
+ }
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@@ -783,26 +795,41 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
if (ret)
goto out_unpin;
+ i915_gem_object_pin_pages(obj);
+
user_data = to_user_ptr(args->data_ptr);
remain = args->size;
- offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
intel_fb_obj_invalidate(obj, ORIGIN_GTT);
- while (remain > 0) {
- /* Operation in this page
- *
- * page_base = page offset within aperture
- * page_offset = offset within page
- * page_length = bytes to copy for this page
- */
+ if (pinned) {
+ offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
+ /* page_base = page offset within aperture */
page_base = offset & PAGE_MASK;
- page_offset = offset_in_page(offset);
+ } else {
+ offset = args->offset;
+ page_base = node.start;
+ i = args->offset / PAGE_SIZE;
+ }
+
+ /* page_offset = offset within page */
+ page_offset = offset_in_page(offset);
+ while (remain > 0) {
+ /* page_length = bytes to copy for the page */
page_length = remain;
if ((page_offset + remain) > PAGE_SIZE)
page_length = PAGE_SIZE - page_offset;
+ if (!pinned) {
+ wmb();
+ dev_priv->gtt.base.insert_page(&dev_priv->gtt.base,
+ i915_gem_object_get_dma_address(obj, i),
+ node.start,
+ I915_CACHE_NONE,
+ 0);
+ wmb();
+ i++;
+ }
/* If we get a fault while copying data, then (presumably) our
* source page isn't available. Return the error and we'll
* retry in the slow path.
@@ -815,13 +842,24 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
remain -= page_length;
user_data += page_length;
- offset += page_length;
+ page_offset = 0;
}
out_flush:
+ if (!pinned) {
+ wmb();
+ dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+ node.start, node.size,
+ true);
+ }
+
+ i915_gem_object_unpin_pages(obj);
intel_fb_obj_flush(obj, false, ORIGIN_GTT);
out_unpin:
- i915_gem_object_ggtt_unpin(obj);
+ if (pinned)
+ i915_gem_object_ggtt_unpin(obj);
+ else
+ drm_mm_remove_node(&node);
out:
return ret;
}
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 12:34 ` Chris Wilson
@ 2015-11-06 6:15 ` Ankitprasad Sharma
0 siblings, 0 replies; 26+ messages in thread
From: Ankitprasad Sharma @ 2015-11-06 6:15 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, akash.goel
On Thu, 2015-11-05 at 12:34 +0000, Chris Wilson wrote:
> On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> > we try a nonblocking pin for the whole object (since that is fastest if
> > reused), then failing that we try to grab one page in the mappable
> > aperture. It also allows us to handle objects larger than the mappable
> > aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> > to a measely 8MiB or something like that).
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
> > 1 file changed, 69 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index bf5ef7a..9132240 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> > struct drm_file *file)
> > {
> > struct drm_i915_private *dev_priv = dev->dev_private;
> > + struct drm_mm_node node;
> > ssize_t remain;
> > loff_t offset, page_base;
> > char __user *user_data;
> > - int page_offset, page_length, ret;
> > + int page_offset, page_length, ret, i;
> > + bool pinned = true;
> >
> > ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> > - if (ret)
> > - goto out;
> > + if (ret) {
> > + pinned = false;
> > + memset(&node, 0, sizeof(node));
> > + ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
> > + &node, 4096, 0,
> > + I915_CACHE_NONE, 0,
> > + dev_priv->gtt.mappable_end,
> > + DRM_MM_SEARCH_DEFAULT,
> > + DRM_MM_CREATE_DEFAULT);
> > + if (ret)
> > + goto out;
>
> Prefer to refer to my original patch as to why this wrong.
If you are concerned about pages for the object not getting allocated,
then soon after node insertion we call
i915_gem_object_set_to_gtt_domain() which takes care of page allocations
for the object.
If there is any other concern, please let me know.
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 12:58 ` Chris Wilson
@ 2015-11-05 14:38 ` Tvrtko Ursulin
2015-11-07 10:13 ` Chris Wilson
0 siblings, 1 reply; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-05 14:38 UTC (permalink / raw)
To: Chris Wilson, ankitprasad.r.sharma, intel-gfx, akash.goel
On 05/11/15 12:58, Chris Wilson wrote:
> On Thu, Nov 05, 2015 at 12:53:20PM +0000, Tvrtko Ursulin wrote:
>>
>> On 05/11/15 12:42, Chris Wilson wrote:
>>> On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
>>>>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>>>
>>>>> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
>>>>> we try a nonblocking pin for the whole object (since that is fastest if
>>>>> reused), then failing that we try to grab one page in the mappable
>>>>> aperture. It also allows us to handle objects larger than the mappable
>>>>> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
>>>>> to a measely 8MiB or something like that).
>>>>
>>>> Aperture in aperture, reminds me of those "Yo dawg I've heard you
>>>> like X so I've put X in your X so you can Y while you Y" jokes. :D
>>>>
>>>> Would using the partial view code be interesting for this? Might be
>>>> faster due to larger chunks possible, or slower due more expensive
>>>> set up time, I don't know.
>>>
>>> It's the wrong abstraction.
>>
>> Looks the same to me, only difference is the size.
>
> There are many places that insert-page is used where we cannot do a
> partial-pin.
>
>> Why not just to the page aperture then for simplicity? If there is
>> any performance gain from trying the full VMA first then why there
>> wouldn't be some to try with the partial VMA?
>
> obj->base.size >> PAGE_SHIFT x partial pages is not even funny.
Well I did not suggest that but larger chunks so I will repeat my question.
If going page by page is fine for performance then why have the two code
paths at all? One which tries top pin the whole object first, and second
which goes page by page if that fails. Why not just do it page by page
and avoid having two copy loops etc?
On the other hand, if there is significant performance hit with the page
by page path, then why not use partial views with 1Mb chunks or something?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 12:53 ` Tvrtko Ursulin
@ 2015-11-05 12:58 ` Chris Wilson
2015-11-05 14:38 ` Tvrtko Ursulin
0 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-05 12:58 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel
On Thu, Nov 05, 2015 at 12:53:20PM +0000, Tvrtko Ursulin wrote:
>
> On 05/11/15 12:42, Chris Wilson wrote:
> >On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> >>>From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >>>
> >>>In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> >>>we try a nonblocking pin for the whole object (since that is fastest if
> >>>reused), then failing that we try to grab one page in the mappable
> >>>aperture. It also allows us to handle objects larger than the mappable
> >>>aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> >>>to a measely 8MiB or something like that).
> >>
> >>Aperture in aperture, reminds me of those "Yo dawg I've heard you
> >>like X so I've put X in your X so you can Y while you Y" jokes. :D
> >>
> >>Would using the partial view code be interesting for this? Might be
> >>faster due to larger chunks possible, or slower due more expensive
> >>set up time, I don't know.
> >
> >It's the wrong abstraction.
>
> Looks the same to me, only difference is the size.
There are many places that insert-page is used where we cannot do a
partial-pin.
> Why not just to the page aperture then for simplicity? If there is
> any performance gain from trying the full VMA first then why there
> wouldn't be some to try with the partial VMA?
obj->base.size >> PAGE_SHIFT x partial pages is not even funny.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 12:42 ` Chris Wilson
@ 2015-11-05 12:53 ` Tvrtko Ursulin
2015-11-05 12:58 ` Chris Wilson
0 siblings, 1 reply; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-05 12:53 UTC (permalink / raw)
To: Chris Wilson, ankitprasad.r.sharma, intel-gfx, akash.goel
On 05/11/15 12:42, Chris Wilson wrote:
> On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
>>
>> On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
>>> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>>>
>>> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
>>> we try a nonblocking pin for the whole object (since that is fastest if
>>> reused), then failing that we try to grab one page in the mappable
>>> aperture. It also allows us to handle objects larger than the mappable
>>> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
>>> to a measely 8MiB or something like that).
>>
>> Aperture in aperture, reminds me of those "Yo dawg I've heard you
>> like X so I've put X in your X so you can Y while you Y" jokes. :D
>>
>> Would using the partial view code be interesting for this? Might be
>> faster due to larger chunks possible, or slower due more expensive
>> set up time, I don't know.
>
> It's the wrong abstraction.
Looks the same to me, only difference is the size.
Why not just to the page aperture then for simplicity? If there is any
performance gain from trying the full VMA first then why there wouldn't
be some to try with the partial VMA?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 12:37 ` Tvrtko Ursulin
@ 2015-11-05 12:42 ` Chris Wilson
2015-11-05 12:53 ` Tvrtko Ursulin
0 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-05 12:42 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: ankitprasad.r.sharma, intel-gfx, akash.goel
On Thu, Nov 05, 2015 at 12:37:46PM +0000, Tvrtko Ursulin wrote:
>
> On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> >From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> >In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> >we try a nonblocking pin for the whole object (since that is fastest if
> >reused), then failing that we try to grab one page in the mappable
> >aperture. It also allows us to handle objects larger than the mappable
> >aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> >to a measely 8MiB or something like that).
>
> Aperture in aperture, reminds me of those "Yo dawg I've heard you
> like X so I've put X in your X so you can Y while you Y" jokes. :D
>
> Would using the partial view code be interesting for this? Might be
> faster due to larger chunks possible, or slower due more expensive
> set up time, I don't know.
It's the wrong abstraction.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-05 12:34 ` Chris Wilson
@ 2015-11-05 12:37 ` Tvrtko Ursulin
2015-11-05 12:42 ` Chris Wilson
2015-11-18 9:59 ` Daniel Vetter
2 siblings, 1 reply; 26+ messages in thread
From: Tvrtko Ursulin @ 2015-11-05 12:37 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel
On 05/11/15 11:45, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
Aperture in aperture, reminds me of those "Yo dawg I've heard you like X
so I've put X in your X so you can Y while you Y" jokes. :D
Would using the partial view code be interesting for this? Might be
faster due to larger chunks possible, or slower due more expensive set
up time, I don't know.
Interesting thing is it was added for virtualization use cases so there
is overlap here.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* Re: [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
@ 2015-11-05 12:34 ` Chris Wilson
2015-11-06 6:15 ` Ankitprasad Sharma
2015-11-05 12:37 ` Tvrtko Ursulin
2015-11-18 9:59 ` Daniel Vetter
2 siblings, 1 reply; 26+ messages in thread
From: Chris Wilson @ 2015-11-05 12:34 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel
On Thu, Nov 05, 2015 at 05:15:59PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
> we try a nonblocking pin for the whole object (since that is fastest if
> reused), then failing that we try to grab one page in the mappable
> aperture. It also allows us to handle objects larger than the mappable
> aperture (e.g. if we need to pwrite with vGPU restricting the aperture
> to a measely 8MiB or something like that).
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
> 1 file changed, 69 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index bf5ef7a..9132240 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
> struct drm_file *file)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_mm_node node;
> ssize_t remain;
> loff_t offset, page_base;
> char __user *user_data;
> - int page_offset, page_length, ret;
> + int page_offset, page_length, ret, i;
> + bool pinned = true;
>
> ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
> - if (ret)
> - goto out;
> + if (ret) {
> + pinned = false;
> + memset(&node, 0, sizeof(node));
> + ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
> + &node, 4096, 0,
> + I915_CACHE_NONE, 0,
> + dev_priv->gtt.mappable_end,
> + DRM_MM_SEARCH_DEFAULT,
> + DRM_MM_CREATE_DEFAULT);
> + if (ret)
> + goto out;
Prefer to refer to my original patch as to why this wrong.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 26+ messages in thread
* [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast
2015-11-05 11:45 [PATCH 0/3] Support for mapping an object page by page ankitprasad.r.sharma
@ 2015-11-05 11:45 ` ankitprasad.r.sharma
2015-11-05 12:34 ` Chris Wilson
` (2 more replies)
0 siblings, 3 replies; 26+ messages in thread
From: ankitprasad.r.sharma @ 2015-11-05 11:45 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First,
we try a nonblocking pin for the whole object (since that is fastest if
reused), then failing that we try to grab one page in the mappable
aperture. It also allows us to handle objects larger than the mappable
aperture (e.g. if we need to pwrite with vGPU restricting the aperture
to a measely 8MiB or something like that).
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 92 ++++++++++++++++++++++++++++++-----------
1 file changed, 69 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf5ef7a..9132240 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -766,14 +766,26 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_mm_node node;
ssize_t remain;
loff_t offset, page_base;
char __user *user_data;
- int page_offset, page_length, ret;
+ int page_offset, page_length, ret, i;
+ bool pinned = true;
ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
- if (ret)
- goto out;
+ if (ret) {
+ pinned = false;
+ memset(&node, 0, sizeof(node));
+ ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
+ &node, 4096, 0,
+ I915_CACHE_NONE, 0,
+ dev_priv->gtt.mappable_end,
+ DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_CREATE_DEFAULT);
+ if (ret)
+ goto out;
+ }
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@@ -786,42 +798,76 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
user_data = to_user_ptr(args->data_ptr);
remain = args->size;
- offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
-
intel_fb_obj_invalidate(obj, ORIGIN_GTT);
- while (remain > 0) {
- /* Operation in this page
+ if (likely(pinned)) {
+ offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
+ /* Operation in the page
*
* page_base = page offset within aperture
* page_offset = offset within page
- * page_length = bytes to copy for this page
+ * page_length = bytes to copy for the page
*/
page_base = offset & PAGE_MASK;
page_offset = offset_in_page(offset);
- page_length = remain;
- if ((page_offset + remain) > PAGE_SIZE)
- page_length = PAGE_SIZE - page_offset;
+ while (remain > 0) {
+ page_length = remain;
+ if ((page_offset + remain) > PAGE_SIZE)
+ page_length = PAGE_SIZE - page_offset;
+
+ /* If we get a fault while copying data, then (presumably) our
+ * source page isn't available. Return the error and we'll
+ * retry in the slow path.
+ */
+ if (fast_user_write(dev_priv->gtt.mappable, page_base,
+ page_offset, user_data, page_length)) {
+ ret = -EFAULT;
+ goto out_flush;
+ }
- /* If we get a fault while copying data, then (presumably) our
- * source page isn't available. Return the error and we'll
- * retry in the slow path.
- */
- if (fast_user_write(dev_priv->gtt.mappable, page_base,
- page_offset, user_data, page_length)) {
- ret = -EFAULT;
- goto out_flush;
+ remain -= page_length;
+ user_data += page_length;
+ page_offset = 0;
}
+ } else {
+ i = args->offset / PAGE_SIZE;
+ page_offset = offset_in_page(args->offset);
+ while (remain > 0) {
+ page_length = remain;
+ if ((page_offset + remain) > PAGE_SIZE)
+ page_length = PAGE_SIZE - page_offset;
+
+ wmb();
+ dev_priv->gtt.base.insert_page(&dev_priv->gtt.base,
+ i915_gem_object_get_dma_address(obj, i),
+ node.start,
+ I915_CACHE_NONE,
+ 0);
+ wmb();
+
+ if (fast_user_write(dev_priv->gtt.mappable, node.start,
+ page_offset, user_data, page_length)) {
+ ret = -EFAULT;
+ goto out_flush;
+ }
- remain -= page_length;
- user_data += page_length;
- offset += page_length;
+ remain -= page_length;
+ user_data += page_length;
+ page_offset = 0;
+ i++;
+ }
+ wmb();
+ dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+ node.start, node.size,
+ true);
+ drm_mm_remove_node(&node);
}
out_flush:
intel_fb_obj_flush(obj, false, ORIGIN_GTT);
out_unpin:
- i915_gem_object_ggtt_unpin(obj);
+ if (pinned)
+ i915_gem_object_ggtt_unpin(obj);
out:
return ret;
}
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 26+ messages in thread
end of thread, other threads:[~2015-12-14 8:47 UTC | newest]
Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-09 10:56 [PATCH v3 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 1/3] drm/i915: Add support " ankitprasad.r.sharma
2015-11-13 14:57 ` Tvrtko Ursulin
2015-11-09 10:56 ` [PATCH 2/3] drm/i915: Introduce i915_gem_object_get_dma_address() ankitprasad.r.sharma
2015-11-09 10:56 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-09 12:20 ` kbuild test robot
2015-11-10 7:55 ` Mika Kuoppala
2015-11-10 8:05 ` Ankitprasad Sharma
2015-11-10 8:44 ` Chris Wilson
2015-11-10 11:51 ` Chris Wilson
-- strict thread matches above, loose matches on Subject: below --
2015-11-07 8:02 [PATCH v2 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-07 8:02 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-07 10:07 ` Chris Wilson
2015-11-05 11:45 [PATCH 0/3] Support for mapping an object page by page ankitprasad.r.sharma
2015-11-05 11:45 ` [PATCH 3/3] drm/i915: Use insert_page for pwrite_fast ankitprasad.r.sharma
2015-11-05 12:34 ` Chris Wilson
2015-11-06 6:15 ` Ankitprasad Sharma
2015-11-05 12:37 ` Tvrtko Ursulin
2015-11-05 12:42 ` Chris Wilson
2015-11-05 12:53 ` Tvrtko Ursulin
2015-11-05 12:58 ` Chris Wilson
2015-11-05 14:38 ` Tvrtko Ursulin
2015-11-07 10:13 ` Chris Wilson
2015-11-18 9:59 ` Daniel Vetter
2015-11-20 9:37 ` Ankitprasad Sharma
2015-11-20 10:06 ` Chris Wilson
2015-11-24 12:22 ` Daniel Vetter
2015-12-14 8:19 ` Ankitprasad Sharma
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.