All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
@ 2017-02-17 15:10 Chris Wilson
  2017-02-17 18:22 ` ✓ Fi.CI.BAT: success for drm/i915: Remove temporary allocation of dma addresses when rotating (rev2) Patchwork
  2017-02-21 15:01 ` [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating Joonas Lahtinen
  0 siblings, 2 replies; 14+ messages in thread
From: Chris Wilson @ 2017-02-17 15:10 UTC (permalink / raw)
  To: intel-gfx

The object already stores (computed on the fly) the index to dma address
so use it instead of reallocating a large temporary array every time we
bind a rotated framebuffer.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 77 ++++++++++++-------------------------
 1 file changed, 25 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 47a38272f54c..848dbb926fd1 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3043,27 +3043,32 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 }
 
 static struct scatterlist *
-rotate_pages(const dma_addr_t *in, unsigned int offset,
-	     unsigned int width, unsigned int height,
-	     unsigned int stride,
+rotate_pages(struct drm_i915_gem_object *obj,
+	     const struct intel_rotation_plane_info *p,
 	     struct sg_table *st, struct scatterlist *sg)
 {
 	unsigned int column, row;
-	unsigned int src_idx;
 
-	for (column = 0; column < width; column++) {
-		src_idx = stride * (height - 1) + column;
-		for (row = 0; row < height; row++) {
-			st->nents++;
+	for (column = 0; column < p->width; column++) {
+		unsigned long src_idx =
+			p->stride * (p->height - 1) + column + p->offset;
+		for (row = 0; row < p->height; row++) {
+			struct scatterlist *src;
+			unsigned int n;
+
+			src = i915_gem_object_get_sg(obj, src_idx, &n);
+			src_idx -= p->stride;
+
 			/* We don't need the pages, but need to initialize
 			 * the entries so the sg list can be happily traversed.
 			 * The only thing we need are DMA addresses.
 			 */
 			sg_set_page(sg, NULL, PAGE_SIZE, 0);
-			sg_dma_address(sg) = in[offset + src_idx];
+			sg_dma_address(sg) = sg_dma_address(src) + n*PAGE_SIZE;
 			sg_dma_len(sg) = PAGE_SIZE;
-			sg = sg_next(sg);
-			src_idx -= stride;
+			sg = __sg_next(sg);
+
+			st->nents++;
 		}
 	}
 
@@ -3074,62 +3079,30 @@ static noinline struct sg_table *
 intel_rotate_pages(struct intel_rotation_info *rot_info,
 		   struct drm_i915_gem_object *obj)
 {
-	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
-	unsigned int size = intel_rotation_info_size(rot_info);
-	struct sgt_iter sgt_iter;
-	dma_addr_t dma_addr;
-	unsigned long i;
-	dma_addr_t *page_addr_list;
-	struct sg_table *st;
+	const unsigned int size = intel_rotation_info_size(rot_info);
 	struct scatterlist *sg;
+	struct sg_table *st;
+	unsigned long i;
 	int ret = -ENOMEM;
 
-	/* Allocate a temporary list of source pages for random access. */
-	page_addr_list = drm_malloc_gfp(n_pages,
-					sizeof(dma_addr_t),
-					GFP_TEMPORARY);
-	if (!page_addr_list)
-		return ERR_PTR(ret);
-
-	/* Allocate target SG list. */
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
-		goto err_st_alloc;
+		goto err;
 
 	ret = sg_alloc_table(st, size, GFP_KERNEL);
 	if (ret)
-		goto err_sg_alloc;
-
-	/* Populate source page list from the object. */
-	i = 0;
-	for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
-		page_addr_list[i++] = dma_addr;
+		goto err;
 
-	GEM_BUG_ON(i != n_pages);
 	st->nents = 0;
 	sg = st->sgl;
-
-	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
-		sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
-				  rot_info->plane[i].width, rot_info->plane[i].height,
-				  rot_info->plane[i].stride, st, sg);
-	}
-
-	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
-		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
-
-	drm_free_large(page_addr_list);
+	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++)
+		sg = rotate_pages(obj, &rot_info->plane[i], st, sg);
+	GEM_BUG_ON(st->nents != size);
 
 	return st;
 
-err_sg_alloc:
+err:
 	kfree(st);
-err_st_alloc:
-	drm_free_large(page_addr_list);
-
-	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
-		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
-
 	return ERR_PTR(ret);
 }
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: Remove temporary allocation of dma addresses when rotating (rev2)
  2017-02-17 15:10 [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating Chris Wilson
@ 2017-02-17 18:22 ` Patchwork
  2017-02-21 15:01 ` [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating Joonas Lahtinen
  1 sibling, 0 replies; 14+ messages in thread
From: Patchwork @ 2017-02-17 18:22 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Remove temporary allocation of dma addresses when rotating (rev2)
URL   : https://patchwork.freedesktop.org/series/19850/
State : success

== Summary ==

Series 19850v2 drm/i915: Remove temporary allocation of dma addresses when rotating
https://patchwork.freedesktop.org/api/1.0/series/19850/revisions/2/mbox/

fi-bdw-5557u     total:252  pass:241  dwarn:0   dfail:0   fail:0   skip:11 
fi-bsw-n3050     total:252  pass:213  dwarn:0   dfail:0   fail:0   skip:39 
fi-bxt-j4205     total:252  pass:233  dwarn:0   dfail:0   fail:0   skip:19 
fi-bxt-t5700     total:83   pass:70   dwarn:0   dfail:0   fail:0   skip:12 
fi-byt-j1900     total:252  pass:225  dwarn:0   dfail:0   fail:0   skip:27 
fi-byt-n2820     total:252  pass:221  dwarn:0   dfail:0   fail:0   skip:31 
fi-hsw-4770      total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-hsw-4770r     total:252  pass:236  dwarn:0   dfail:0   fail:0   skip:16 
fi-ilk-650       total:252  pass:202  dwarn:0   dfail:0   fail:0   skip:50 
fi-ivb-3520m     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-ivb-3770      total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-kbl-7500u     total:252  pass:234  dwarn:0   dfail:0   fail:0   skip:18 
fi-skl-6260u     total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-skl-6700hq    total:252  pass:235  dwarn:0   dfail:0   fail:0   skip:17 
fi-skl-6700k     total:252  pass:230  dwarn:4   dfail:0   fail:0   skip:18 
fi-skl-6770hq    total:252  pass:242  dwarn:0   dfail:0   fail:0   skip:10 
fi-snb-2520m     total:252  pass:224  dwarn:0   dfail:0   fail:0   skip:28 
fi-snb-2600      total:252  pass:223  dwarn:0   dfail:0   fail:0   skip:29 

02022d17a5787709617b7897de3906970e2b0721 drm-tip: 2017y-02m-17d-15h-15m-45s UTC integration manifest
1ecf305 drm/i915: Remove temporary allocation of dma addresses when rotating

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_3884/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-17 15:10 [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating Chris Wilson
  2017-02-17 18:22 ` ✓ Fi.CI.BAT: success for drm/i915: Remove temporary allocation of dma addresses when rotating (rev2) Patchwork
@ 2017-02-21 15:01 ` Joonas Lahtinen
  2017-02-22  8:29   ` Tvrtko Ursulin
  1 sibling, 1 reply; 14+ messages in thread
From: Joonas Lahtinen @ 2017-02-21 15:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On pe, 2017-02-17 at 15:10 +0000, Chris Wilson wrote:
> The object already stores (computed on the fly) the index to dma address
> so use it instead of reallocating a large temporary array every time we
> bind a rotated framebuffer.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Matthew Auld <matthew.william.auld@gmail.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

<SNIP>

> +rotate_pages(struct drm_i915_gem_object *obj,
> +	     const struct intel_rotation_plane_info *p,
>  	     struct sg_table *st, struct scatterlist *sg)
>  {
>  	unsigned int column, row;
> -	unsigned int src_idx;
>  
> -	for (column = 0; column < width; column++) {
> -		src_idx = stride * (height - 1) + column;
> -		for (row = 0; row < height; row++) {
> -			st->nents++;
> +	for (column = 0; column < p->width; column++) {
> +		unsigned long src_idx =
> +			p->stride * (p->height - 1) + column + p->offset;
> +		for (row = 0; row < p->height; row++) {
> +			struct scatterlist *src;
> +			unsigned int n;
> +
> +			src = i915_gem_object_get_sg(obj, src_idx, &n);

i915_gem_object_get_sg has variable names obj, n, *offset, so I'd be
little concerned of sidetracking reader. Rename n into offset?

> +			src_idx -= p->stride;
> +
>  			/* We don't need the pages, but need to initialize
>  			 * the entries so the sg list can be happily traversed.
>  			 * The only thing we need are DMA addresses.
>  			 */
>  			sg_set_page(sg, NULL, PAGE_SIZE, 0);
> -			sg_dma_address(sg) = in[offset + src_idx];
> +			sg_dma_address(sg) = sg_dma_address(src) + n*PAGE_SIZE;
>  			sg_dma_len(sg) = PAGE_SIZE;
> -			sg = sg_next(sg);
> -			src_idx -= stride;

I'm not sure why moving this line, might as well hoist all these to the
for() line.

> +			sg = __sg_next(sg);
> +
> +			st->nents++;
>  		}
>  	}
>  
> @@ -3074,62 +3079,30 @@ static noinline struct sg_table *
>  intel_rotate_pages(struct intel_rotation_info *rot_info,
>  		   struct drm_i915_gem_object *obj)
>  {
> -	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
> -	unsigned int size = intel_rotation_info_size(rot_info);
> -	struct sgt_iter sgt_iter;
> -	dma_addr_t dma_addr;
> -	unsigned long i;
> -	dma_addr_t *page_addr_list;
> -	struct sg_table *st;
> +	const unsigned int size = intel_rotation_info_size(rot_info);

This is only used once, just inline it.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Could use an A-b from Tvrtko.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-21 15:01 ` [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating Joonas Lahtinen
@ 2017-02-22  8:29   ` Tvrtko Ursulin
  2017-02-22  8:44     ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2017-02-22  8:29 UTC (permalink / raw)
  To: Joonas Lahtinen, Chris Wilson, intel-gfx


On 21/02/2017 15:01, Joonas Lahtinen wrote:
> On pe, 2017-02-17 at 15:10 +0000, Chris Wilson wrote:
>> The object already stores (computed on the fly) the index to dma address
>> so use it instead of reallocating a large temporary array every time we
>> bind a rotated framebuffer.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Matthew Auld <matthew.william.auld@gmail.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> <SNIP>
>
>> +rotate_pages(struct drm_i915_gem_object *obj,
>> +	     const struct intel_rotation_plane_info *p,
>>  	     struct sg_table *st, struct scatterlist *sg)
>>  {
>>  	unsigned int column, row;
>> -	unsigned int src_idx;
>>
>> -	for (column = 0; column < width; column++) {
>> -		src_idx = stride * (height - 1) + column;
>> -		for (row = 0; row < height; row++) {
>> -			st->nents++;
>> +	for (column = 0; column < p->width; column++) {
>> +		unsigned long src_idx =
>> +			p->stride * (p->height - 1) + column + p->offset;
>> +		for (row = 0; row < p->height; row++) {
>> +			struct scatterlist *src;
>> +			unsigned int n;
>> +
>> +			src = i915_gem_object_get_sg(obj, src_idx, &n);
>
> i915_gem_object_get_sg has variable names obj, n, *offset, so I'd be
> little concerned of sidetracking reader. Rename n into offset?
>
>> +			src_idx -= p->stride;
>> +
>>  			/* We don't need the pages, but need to initialize
>>  			 * the entries so the sg list can be happily traversed.
>>  			 * The only thing we need are DMA addresses.
>>  			 */
>>  			sg_set_page(sg, NULL, PAGE_SIZE, 0);
>> -			sg_dma_address(sg) = in[offset + src_idx];
>> +			sg_dma_address(sg) = sg_dma_address(src) + n*PAGE_SIZE;
>>  			sg_dma_len(sg) = PAGE_SIZE;
>> -			sg = sg_next(sg);
>> -			src_idx -= stride;
>
> I'm not sure why moving this line, might as well hoist all these to the
> for() line.
>
>> +			sg = __sg_next(sg);
>> +
>> +			st->nents++;
>>  		}
>>  	}
>>
>> @@ -3074,62 +3079,30 @@ static noinline struct sg_table *
>>  intel_rotate_pages(struct intel_rotation_info *rot_info,
>>  		   struct drm_i915_gem_object *obj)
>>  {
>> -	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
>> -	unsigned int size = intel_rotation_info_size(rot_info);
>> -	struct sgt_iter sgt_iter;
>> -	dma_addr_t dma_addr;
>> -	unsigned long i;
>> -	dma_addr_t *page_addr_list;
>> -	struct sg_table *st;
>> +	const unsigned int size = intel_rotation_info_size(rot_info);
>
> This is only used once, just inline it.
>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>
> Could use an A-b from Tvrtko.

I did not like it in another thread, well, better say I was concerned 
about the increased memory use by the radix tree which would then stick 
around for the obj->pages lifetime (long time for a framebuffer I 
thought). While the temporary array allocations here are not that big 
and very temporary.

I guess someone needs to bite the bullet and try and figure out how 
exactly big is the radix tree for some mixes of more or less coalesced 
sg entries.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-22  8:29   ` Tvrtko Ursulin
@ 2017-02-22  8:44     ` Chris Wilson
  2017-02-27  9:55       ` Tvrtko Ursulin
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-02-22  8:44 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Wed, Feb 22, 2017 at 08:29:06AM +0000, Tvrtko Ursulin wrote:
> 
> On 21/02/2017 15:01, Joonas Lahtinen wrote:
> >On pe, 2017-02-17 at 15:10 +0000, Chris Wilson wrote:
> >>The object already stores (computed on the fly) the index to dma address
> >>so use it instead of reallocating a large temporary array every time we
> >>bind a rotated framebuffer.
> >>
> >>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>Cc: Matthew Auld <matthew.william.auld@gmail.com>
> >>Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >>Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >
> ><SNIP>
> >
> >>+rotate_pages(struct drm_i915_gem_object *obj,
> >>+	     const struct intel_rotation_plane_info *p,
> >> 	     struct sg_table *st, struct scatterlist *sg)
> >> {
> >> 	unsigned int column, row;
> >>-	unsigned int src_idx;
> >>
> >>-	for (column = 0; column < width; column++) {
> >>-		src_idx = stride * (height - 1) + column;
> >>-		for (row = 0; row < height; row++) {
> >>-			st->nents++;
> >>+	for (column = 0; column < p->width; column++) {
> >>+		unsigned long src_idx =
> >>+			p->stride * (p->height - 1) + column + p->offset;
> >>+		for (row = 0; row < p->height; row++) {
> >>+			struct scatterlist *src;
> >>+			unsigned int n;
> >>+
> >>+			src = i915_gem_object_get_sg(obj, src_idx, &n);
> >
> >i915_gem_object_get_sg has variable names obj, n, *offset, so I'd be
> >little concerned of sidetracking reader. Rename n into offset?
> >
> >>+			src_idx -= p->stride;
> >>+
> >> 			/* We don't need the pages, but need to initialize
> >> 			 * the entries so the sg list can be happily traversed.
> >> 			 * The only thing we need are DMA addresses.
> >> 			 */
> >> 			sg_set_page(sg, NULL, PAGE_SIZE, 0);
> >>-			sg_dma_address(sg) = in[offset + src_idx];
> >>+			sg_dma_address(sg) = sg_dma_address(src) + n*PAGE_SIZE;
> >> 			sg_dma_len(sg) = PAGE_SIZE;
> >>-			sg = sg_next(sg);
> >>-			src_idx -= stride;
> >
> >I'm not sure why moving this line, might as well hoist all these to the
> >for() line.
> >
> >>+			sg = __sg_next(sg);
> >>+
> >>+			st->nents++;
> >> 		}
> >> 	}
> >>
> >>@@ -3074,62 +3079,30 @@ static noinline struct sg_table *
> >> intel_rotate_pages(struct intel_rotation_info *rot_info,
> >> 		   struct drm_i915_gem_object *obj)
> >> {
> >>-	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
> >>-	unsigned int size = intel_rotation_info_size(rot_info);
> >>-	struct sgt_iter sgt_iter;
> >>-	dma_addr_t dma_addr;
> >>-	unsigned long i;
> >>-	dma_addr_t *page_addr_list;
> >>-	struct sg_table *st;
> >>+	const unsigned int size = intel_rotation_info_size(rot_info);
> >
> >This is only used once, just inline it.
> >
> >Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> >
> >Could use an A-b from Tvrtko.
> 
> I did not like it in another thread, well, better say I was
> concerned about the increased memory use by the radix tree which
> would then stick around for the obj->pages lifetime (long time for a
> framebuffer I thought). While the temporary array allocations here
> are not that big and very temporary.
> 
> I guess someone needs to bite the bullet and try and figure out how
> exactly big is the radix tree for some mixes of more or less
> coalesced sg entries.

I also think that's an argument for improving the general cache rather
than arguing against using it.
-Chris
> 

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-22  8:44     ` Chris Wilson
@ 2017-02-27  9:55       ` Tvrtko Ursulin
  2017-02-27 10:06         ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2017-02-27  9:55 UTC (permalink / raw)
  To: Chris Wilson, Joonas Lahtinen, intel-gfx


On 22/02/2017 08:44, Chris Wilson wrote:
> On Wed, Feb 22, 2017 at 08:29:06AM +0000, Tvrtko Ursulin wrote:
>>
>> On 21/02/2017 15:01, Joonas Lahtinen wrote:
>>> On pe, 2017-02-17 at 15:10 +0000, Chris Wilson wrote:
>>>> The object already stores (computed on the fly) the index to dma address
>>>> so use it instead of reallocating a large temporary array every time we
>>>> bind a rotated framebuffer.
>>>>
>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Matthew Auld <matthew.william.auld@gmail.com>
>>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> <SNIP>
>>>
>>>> +rotate_pages(struct drm_i915_gem_object *obj,
>>>> +	     const struct intel_rotation_plane_info *p,
>>>> 	     struct sg_table *st, struct scatterlist *sg)
>>>> {
>>>> 	unsigned int column, row;
>>>> -	unsigned int src_idx;
>>>>
>>>> -	for (column = 0; column < width; column++) {
>>>> -		src_idx = stride * (height - 1) + column;
>>>> -		for (row = 0; row < height; row++) {
>>>> -			st->nents++;
>>>> +	for (column = 0; column < p->width; column++) {
>>>> +		unsigned long src_idx =
>>>> +			p->stride * (p->height - 1) + column + p->offset;
>>>> +		for (row = 0; row < p->height; row++) {
>>>> +			struct scatterlist *src;
>>>> +			unsigned int n;
>>>> +
>>>> +			src = i915_gem_object_get_sg(obj, src_idx, &n);
>>>
>>> i915_gem_object_get_sg has variable names obj, n, *offset, so I'd be
>>> little concerned of sidetracking reader. Rename n into offset?

Or use i915_gem_object_get_dma_address in the sg_dma_adress_assignment 
directly.

>>>
>>>> +			src_idx -= p->stride;
>>>> +
>>>> 			/* We don't need the pages, but need to initialize
>>>> 			 * the entries so the sg list can be happily traversed.
>>>> 			 * The only thing we need are DMA addresses.
>>>> 			 */
>>>> 			sg_set_page(sg, NULL, PAGE_SIZE, 0);
>>>> -			sg_dma_address(sg) = in[offset + src_idx];
>>>> +			sg_dma_address(sg) = sg_dma_address(src) + n*PAGE_SIZE;
>>>> 			sg_dma_len(sg) = PAGE_SIZE;
>>>> -			sg = sg_next(sg);
>>>> -			src_idx -= stride;
>>>
>>> I'm not sure why moving this line, might as well hoist all these to the
>>> for() line.
>>>
>>>> +			sg = __sg_next(sg);
>>>> +
>>>> +			st->nents++;
>>>> 		}
>>>> 	}
>>>>
>>>> @@ -3074,62 +3079,30 @@ static noinline struct sg_table *
>>>> intel_rotate_pages(struct intel_rotation_info *rot_info,
>>>> 		   struct drm_i915_gem_object *obj)
>>>> {
>>>> -	const unsigned long n_pages = obj->base.size / PAGE_SIZE;
>>>> -	unsigned int size = intel_rotation_info_size(rot_info);
>>>> -	struct sgt_iter sgt_iter;
>>>> -	dma_addr_t dma_addr;
>>>> -	unsigned long i;
>>>> -	dma_addr_t *page_addr_list;
>>>> -	struct sg_table *st;
>>>> +	const unsigned int size = intel_rotation_info_size(rot_info);
>>>
>>> This is only used once, just inline it.
>>>
>>> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>>
>>> Could use an A-b from Tvrtko.
>>
>> I did not like it in another thread, well, better say I was
>> concerned about the increased memory use by the radix tree which
>> would then stick around for the obj->pages lifetime (long time for a
>> framebuffer I thought). While the temporary array allocations here
>> are not that big and very temporary.
>>
>> I guess someone needs to bite the bullet and try and figure out how
>> exactly big is the radix tree for some mixes of more or less
>> coalesced sg entries.
>
> I also think that's an argument for improving the general cache rather
> than arguing against using it.

Well I wasn't concerned about the cache per se, but about whether it is 
completely appropriate (best choice) to use it in this particular case.

Because as I said before, for 1920x1080x32 we are talking about a 16KiB 
extremely short lived temporary allocation, vs the similar size for the 
sg radix cache. But radix cache sticks around the the lifetime of 
obj->mm.pages and it wouldn't otherwise be there since AFAICS in 
practice no one really touches frame buffers in a way to trigger its 
creation.

Those amounts of memory are not a concern, but again, is the 
simplification of the code worth the conceptual downsides mentioned 
above? Even if we considered 4K frame buffers, when both allocations go 
to ~64KiB, would that change anything? I am not sure, probably not for me.

So I am still unsure that we should go with this change.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-27  9:55       ` Tvrtko Ursulin
@ 2017-02-27 10:06         ` Chris Wilson
  2017-02-27 10:14           ` Tvrtko Ursulin
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-02-27 10:06 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Feb 27, 2017 at 09:55:10AM +0000, Tvrtko Ursulin wrote:
> 
> On 22/02/2017 08:44, Chris Wilson wrote:
> >I also think that's an argument for improving the general cache rather
> >than arguing against using it.
> 
> Well I wasn't concerned about the cache per se, but about whether it
> is completely appropriate (best choice) to use it in this particular
> case.
> 
> Because as I said before, for 1920x1080x32 we are talking about a
> 16KiB extremely short lived temporary allocation, vs the similar
> size for the sg radix cache. But radix cache sticks around the the
> lifetime of obj->mm.pages and it wouldn't otherwise be there since
> AFAICS in practice no one really touches frame buffers in a way to
> trigger its creation.
> 
> Those amounts of memory are not a concern, but again, is the
> simplification of the code worth the conceptual downsides mentioned
> above? Even if we considered 4K frame buffers, when both allocations
> go to ~64KiB, would that change anything? I am not sure, probably
> not for me.
> 
> So I am still unsure that we should go with this change.

Again, the complaint you have here are general concerns about caching
the mapping. Avoiding using the cache instead of improving the cache
seems the wrong approach.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-27 10:06         ` Chris Wilson
@ 2017-02-27 10:14           ` Tvrtko Ursulin
  2017-02-27 10:21             ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2017-02-27 10:14 UTC (permalink / raw)
  To: Chris Wilson, Joonas Lahtinen, intel-gfx


On 27/02/2017 10:06, Chris Wilson wrote:
> On Mon, Feb 27, 2017 at 09:55:10AM +0000, Tvrtko Ursulin wrote:
>>
>> On 22/02/2017 08:44, Chris Wilson wrote:
>>> I also think that's an argument for improving the general cache rather
>>> than arguing against using it.
>>
>> Well I wasn't concerned about the cache per se, but about whether it
>> is completely appropriate (best choice) to use it in this particular
>> case.
>>
>> Because as I said before, for 1920x1080x32 we are talking about a
>> 16KiB extremely short lived temporary allocation, vs the similar
>> size for the sg radix cache. But radix cache sticks around the the
>> lifetime of obj->mm.pages and it wouldn't otherwise be there since
>> AFAICS in practice no one really touches frame buffers in a way to
>> trigger its creation.
>>
>> Those amounts of memory are not a concern, but again, is the
>> simplification of the code worth the conceptual downsides mentioned
>> above? Even if we considered 4K frame buffers, when both allocations
>> go to ~64KiB, would that change anything? I am not sure, probably
>> not for me.
>>
>> So I am still unsure that we should go with this change.
>
> Again, the complaint you have here are general concerns about caching
> the mapping. Avoiding using the cache instead of improving the cache
> seems the wrong approach.

Depends what kind of improvments to the cache you have in mind. If you 
are thinking about size then I disagree, I think it is efficient enough 
already. But if you are thinking about the lifetime management then it 
is obvious from all that I have written so far that I would agree with 
that. Since the core of my "complaint" is the lifetime mismatch, and not 
the size.

For lifetime I am not sure what you could do. Exposing the size of it, 
with maybe some other bits attached the the object, to the shrinker I 
think doesn't make much sense since the sizes are so small compared to 
the backing store sizes.

Perhaps you could add an explicit reset of the cache after the rotation 
is done with it, but then the only remaining benefit will be avoiding 
greater than zero order allocations. I say the only one.. it would still 
be a good one. Just have no idea if this level of cache usage would 
satisfy you!

Perhaps you could say what kind of optimisation you have in mind to save 
me guessing? :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-27 10:14           ` Tvrtko Ursulin
@ 2017-02-27 10:21             ` Chris Wilson
  2017-02-27 12:52               ` Joonas Lahtinen
  2017-02-27 14:31               ` Tvrtko Ursulin
  0 siblings, 2 replies; 14+ messages in thread
From: Chris Wilson @ 2017-02-27 10:21 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Feb 27, 2017 at 10:14:12AM +0000, Tvrtko Ursulin wrote:
> 
> On 27/02/2017 10:06, Chris Wilson wrote:
> >On Mon, Feb 27, 2017 at 09:55:10AM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 22/02/2017 08:44, Chris Wilson wrote:
> >>>I also think that's an argument for improving the general cache rather
> >>>than arguing against using it.
> >>
> >>Well I wasn't concerned about the cache per se, but about whether it
> >>is completely appropriate (best choice) to use it in this particular
> >>case.
> >>
> >>Because as I said before, for 1920x1080x32 we are talking about a
> >>16KiB extremely short lived temporary allocation, vs the similar
> >>size for the sg radix cache. But radix cache sticks around the the
> >>lifetime of obj->mm.pages and it wouldn't otherwise be there since
> >>AFAICS in practice no one really touches frame buffers in a way to
> >>trigger its creation.
> >>
> >>Those amounts of memory are not a concern, but again, is the
> >>simplification of the code worth the conceptual downsides mentioned
> >>above? Even if we considered 4K frame buffers, when both allocations
> >>go to ~64KiB, would that change anything? I am not sure, probably
> >>not for me.
> >>
> >>So I am still unsure that we should go with this change.
> >
> >Again, the complaint you have here are general concerns about caching
> >the mapping. Avoiding using the cache instead of improving the cache
> >seems the wrong approach.
> 
> Depends what kind of improvments to the cache you have in mind. If
> you are thinking about size then I disagree, I think it is efficient
> enough already. But if you are thinking about the lifetime
> management then it is obvious from all that I have written so far
> that I would agree with that. Since the core of my "complaint" is
> the lifetime mismatch, and not the size.
> 
> For lifetime I am not sure what you could do. Exposing the size of
> it, with maybe some other bits attached the the object, to the
> shrinker I think doesn't make much sense since the sizes are so
> small compared to the backing store sizes.
> 
> Perhaps you could add an explicit reset of the cache after the
> rotation is done with it, but then the only remaining benefit will
> be avoiding greater than zero order allocations. I say the only
> one.. it would still be a good one. Just have no idea if this level
> of cache usage would satisfy you!
> 
> Perhaps you could say what kind of optimisation you have in mind to
> save me guessing? :)

I was thinking you would like an inactivity timer. Or we could have a
separate shrinker, as that's the principal cache management system.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-27 10:21             ` Chris Wilson
@ 2017-02-27 12:52               ` Joonas Lahtinen
  2017-02-27 14:31               ` Tvrtko Ursulin
  1 sibling, 0 replies; 14+ messages in thread
From: Joonas Lahtinen @ 2017-02-27 12:52 UTC (permalink / raw)
  To: Chris Wilson, Tvrtko Ursulin; +Cc: intel-gfx

On ma, 2017-02-27 at 10:21 +0000, Chris Wilson wrote:
> On Mon, Feb 27, 2017 at 10:14:12AM +0000, Tvrtko Ursulin wrote:
> > Perhaps you could say what kind of optimisation you have in mind to
> > save me guessing? :)
> 
> I was thinking you would like an inactivity timer. Or we could have a
> separate shrinker, as that's the principal cache management system.

My vote goes for merging and addressing the seen problems in the
caching logic.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-27 10:21             ` Chris Wilson
  2017-02-27 12:52               ` Joonas Lahtinen
@ 2017-02-27 14:31               ` Tvrtko Ursulin
  2017-11-14 18:14                 ` Chris Wilson
  1 sibling, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2017-02-27 14:31 UTC (permalink / raw)
  To: Chris Wilson, Joonas Lahtinen, intel-gfx


On 27/02/2017 10:21, Chris Wilson wrote:
> On Mon, Feb 27, 2017 at 10:14:12AM +0000, Tvrtko Ursulin wrote:
>>
>> On 27/02/2017 10:06, Chris Wilson wrote:
>>> On Mon, Feb 27, 2017 at 09:55:10AM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 22/02/2017 08:44, Chris Wilson wrote:
>>>>> I also think that's an argument for improving the general cache rather
>>>>> than arguing against using it.
>>>>
>>>> Well I wasn't concerned about the cache per se, but about whether it
>>>> is completely appropriate (best choice) to use it in this particular
>>>> case.
>>>>
>>>> Because as I said before, for 1920x1080x32 we are talking about a
>>>> 16KiB extremely short lived temporary allocation, vs the similar
>>>> size for the sg radix cache. But radix cache sticks around the the
>>>> lifetime of obj->mm.pages and it wouldn't otherwise be there since
>>>> AFAICS in practice no one really touches frame buffers in a way to
>>>> trigger its creation.
>>>>
>>>> Those amounts of memory are not a concern, but again, is the
>>>> simplification of the code worth the conceptual downsides mentioned
>>>> above? Even if we considered 4K frame buffers, when both allocations
>>>> go to ~64KiB, would that change anything? I am not sure, probably
>>>> not for me.
>>>>
>>>> So I am still unsure that we should go with this change.
>>>
>>> Again, the complaint you have here are general concerns about caching
>>> the mapping. Avoiding using the cache instead of improving the cache
>>> seems the wrong approach.
>>
>> Depends what kind of improvments to the cache you have in mind. If
>> you are thinking about size then I disagree, I think it is efficient
>> enough already. But if you are thinking about the lifetime
>> management then it is obvious from all that I have written so far
>> that I would agree with that. Since the core of my "complaint" is
>> the lifetime mismatch, and not the size.
>>
>> For lifetime I am not sure what you could do. Exposing the size of
>> it, with maybe some other bits attached the the object, to the
>> shrinker I think doesn't make much sense since the sizes are so
>> small compared to the backing store sizes.
>>
>> Perhaps you could add an explicit reset of the cache after the
>> rotation is done with it, but then the only remaining benefit will
>> be avoiding greater than zero order allocations. I say the only
>> one.. it would still be a good one. Just have no idea if this level
>> of cache usage would satisfy you!
>>
>> Perhaps you could say what kind of optimisation you have in mind to
>> save me guessing? :)
>
> I was thinking you would like an inactivity timer. Or we could have a
> separate shrinker, as that's the principal cache management system.

I thought about the shrinker myself. Even wrote some code to more 
accurately size the objects as part of the existing passes. But as I 
said the contribution of anything object and not backing store is so 
small that, even though it would conceptually be more correct and 
perhaps avoid some marginal over-shrinking, I am not sure it is worth 
doing it. Assuming of course that I got the sizing of the radix tree 
correct! I just hacked something up based on some debug dumping code 
from radix-tree.c.

So the complication is there is no API to get the size of the radix tree 
(or the scatter list table) and we would have to add something, either 
internally to i915, or try and upstream it.

Or we avoid that with your timer idea and just purge all caches which 
haven't been used in a while. Maybe from idle work or something.

But for this immediate patch, would you be happy with adding and 
exporting i915_gem_object_reset_page_iter and calling it after rotation 
is done with accessing the pages? Benefit would be avoidance of 
drm_malloc_gfp if that bothers you most.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-02-27 14:31               ` Tvrtko Ursulin
@ 2017-11-14 18:14                 ` Chris Wilson
  2017-11-15  9:18                   ` Tvrtko Ursulin
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-11-14 18:14 UTC (permalink / raw)
  To: Tvrtko Ursulin, Joonas Lahtinen, intel-gfx

Quoting Tvrtko Ursulin (2017-02-27 14:31:17)
> 
> On 27/02/2017 10:21, Chris Wilson wrote:
> > On Mon, Feb 27, 2017 at 10:14:12AM +0000, Tvrtko Ursulin wrote:
> >>
> >> On 27/02/2017 10:06, Chris Wilson wrote:
> >>> On Mon, Feb 27, 2017 at 09:55:10AM +0000, Tvrtko Ursulin wrote:
> >>>>
> >>>> On 22/02/2017 08:44, Chris Wilson wrote:
> >>>>> I also think that's an argument for improving the general cache rather
> >>>>> than arguing against using it.
> >>>>
> >>>> Well I wasn't concerned about the cache per se, but about whether it
> >>>> is completely appropriate (best choice) to use it in this particular
> >>>> case.
> >>>>
> >>>> Because as I said before, for 1920x1080x32 we are talking about a
> >>>> 16KiB extremely short lived temporary allocation, vs the similar
> >>>> size for the sg radix cache. But radix cache sticks around the the
> >>>> lifetime of obj->mm.pages and it wouldn't otherwise be there since
> >>>> AFAICS in practice no one really touches frame buffers in a way to
> >>>> trigger its creation.
> >>>>
> >>>> Those amounts of memory are not a concern, but again, is the
> >>>> simplification of the code worth the conceptual downsides mentioned
> >>>> above? Even if we considered 4K frame buffers, when both allocations
> >>>> go to ~64KiB, would that change anything? I am not sure, probably
> >>>> not for me.
> >>>>
> >>>> So I am still unsure that we should go with this change.
> >>>
> >>> Again, the complaint you have here are general concerns about caching
> >>> the mapping. Avoiding using the cache instead of improving the cache
> >>> seems the wrong approach.
> >>
> >> Depends what kind of improvments to the cache you have in mind. If
> >> you are thinking about size then I disagree, I think it is efficient
> >> enough already. But if you are thinking about the lifetime
> >> management then it is obvious from all that I have written so far
> >> that I would agree with that. Since the core of my "complaint" is
> >> the lifetime mismatch, and not the size.
> >>
> >> For lifetime I am not sure what you could do. Exposing the size of
> >> it, with maybe some other bits attached the the object, to the
> >> shrinker I think doesn't make much sense since the sizes are so
> >> small compared to the backing store sizes.
> >>
> >> Perhaps you could add an explicit reset of the cache after the
> >> rotation is done with it, but then the only remaining benefit will
> >> be avoiding greater than zero order allocations. I say the only
> >> one.. it would still be a good one. Just have no idea if this level
> >> of cache usage would satisfy you!
> >>
> >> Perhaps you could say what kind of optimisation you have in mind to
> >> save me guessing? :)
> >
> > I was thinking you would like an inactivity timer. Or we could have a
> > separate shrinker, as that's the principal cache management system.
> 
> I thought about the shrinker myself. Even wrote some code to more 
> accurately size the objects as part of the existing passes. But as I 
> said the contribution of anything object and not backing store is so 
> small that, even though it would conceptually be more correct and 
> perhaps avoid some marginal over-shrinking, I am not sure it is worth 
> doing it. Assuming of course that I got the sizing of the radix tree 
> correct! I just hacked something up based on some debug dumping code 
> from radix-tree.c.
> 
> So the complication is there is no API to get the size of the radix tree 
> (or the scatter list table) and we would have to add something, either 
> internally to i915, or try and upstream it.
> 
> Or we avoid that with your timer idea and just purge all caches which 
> haven't been used in a while. Maybe from idle work or something.

Tempting. I like hooking into mark_idle/park more than adding a new
timer, and we already have the precedent of using that to initiate a
cache flush.

What's the impact of us keeping pages pinned when idle -- (a lot) more
work in the shrinker. Let's see where the cost-beneift lies.
 
> But for this immediate patch, would you be happy with adding and 
> exporting i915_gem_object_reset_page_iter and calling it after rotation 
> is done with accessing the pages? Benefit would be avoidance of 
> drm_malloc_gfp if that bothers you most.

Honestly I think the page_iter cache is useful and likely to already
exist or be used shortly after a portion of the object is rotated.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-11-14 18:14                 ` Chris Wilson
@ 2017-11-15  9:18                   ` Tvrtko Ursulin
  2017-11-15 10:03                     ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Tvrtko Ursulin @ 2017-11-15  9:18 UTC (permalink / raw)
  To: Chris Wilson, Joonas Lahtinen, intel-gfx


On 14/11/2017 18:14, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2017-02-27 14:31:17)
>>
>> On 27/02/2017 10:21, Chris Wilson wrote:
>>> On Mon, Feb 27, 2017 at 10:14:12AM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 27/02/2017 10:06, Chris Wilson wrote:
>>>>> On Mon, Feb 27, 2017 at 09:55:10AM +0000, Tvrtko Ursulin wrote:
>>>>>>
>>>>>> On 22/02/2017 08:44, Chris Wilson wrote:
>>>>>>> I also think that's an argument for improving the general cache rather
>>>>>>> than arguing against using it.
>>>>>>
>>>>>> Well I wasn't concerned about the cache per se, but about whether it
>>>>>> is completely appropriate (best choice) to use it in this particular
>>>>>> case.
>>>>>>
>>>>>> Because as I said before, for 1920x1080x32 we are talking about a
>>>>>> 16KiB extremely short lived temporary allocation, vs the similar
>>>>>> size for the sg radix cache. But radix cache sticks around the the
>>>>>> lifetime of obj->mm.pages and it wouldn't otherwise be there since
>>>>>> AFAICS in practice no one really touches frame buffers in a way to
>>>>>> trigger its creation.
>>>>>>
>>>>>> Those amounts of memory are not a concern, but again, is the
>>>>>> simplification of the code worth the conceptual downsides mentioned
>>>>>> above? Even if we considered 4K frame buffers, when both allocations
>>>>>> go to ~64KiB, would that change anything? I am not sure, probably
>>>>>> not for me.
>>>>>>
>>>>>> So I am still unsure that we should go with this change.
>>>>>
>>>>> Again, the complaint you have here are general concerns about caching
>>>>> the mapping. Avoiding using the cache instead of improving the cache
>>>>> seems the wrong approach.
>>>>
>>>> Depends what kind of improvments to the cache you have in mind. If
>>>> you are thinking about size then I disagree, I think it is efficient
>>>> enough already. But if you are thinking about the lifetime
>>>> management then it is obvious from all that I have written so far
>>>> that I would agree with that. Since the core of my "complaint" is
>>>> the lifetime mismatch, and not the size.
>>>>
>>>> For lifetime I am not sure what you could do. Exposing the size of
>>>> it, with maybe some other bits attached the the object, to the
>>>> shrinker I think doesn't make much sense since the sizes are so
>>>> small compared to the backing store sizes.
>>>>
>>>> Perhaps you could add an explicit reset of the cache after the
>>>> rotation is done with it, but then the only remaining benefit will
>>>> be avoiding greater than zero order allocations. I say the only
>>>> one.. it would still be a good one. Just have no idea if this level
>>>> of cache usage would satisfy you!
>>>>
>>>> Perhaps you could say what kind of optimisation you have in mind to
>>>> save me guessing? :)
>>>
>>> I was thinking you would like an inactivity timer. Or we could have a
>>> separate shrinker, as that's the principal cache management system.
>>
>> I thought about the shrinker myself. Even wrote some code to more
>> accurately size the objects as part of the existing passes. But as I
>> said the contribution of anything object and not backing store is so
>> small that, even though it would conceptually be more correct and
>> perhaps avoid some marginal over-shrinking, I am not sure it is worth
>> doing it. Assuming of course that I got the sizing of the radix tree
>> correct! I just hacked something up based on some debug dumping code
>> from radix-tree.c.
>>
>> So the complication is there is no API to get the size of the radix tree
>> (or the scatter list table) and we would have to add something, either
>> internally to i915, or try and upstream it.
>>
>> Or we avoid that with your timer idea and just purge all caches which
>> haven't been used in a while. Maybe from idle work or something.
> 
> Tempting. I like hooking into mark_idle/park more than adding a new
> timer, and we already have the precedent of using that to initiate a
> cache flush.
> 
> What's the impact of us keeping pages pinned when idle -- (a lot) more
> work in the shrinker. Let's see where the cost-beneift lies.
>   
>> But for this immediate patch, would you be happy with adding and
>> exporting i915_gem_object_reset_page_iter and calling it after rotation
>> is done with accessing the pages? Benefit would be avoidance of
>> drm_malloc_gfp if that bothers you most.
> 
> Honestly I think the page_iter cache is useful and likely to already
> exist or be used shortly after a portion of the object is rotated.

How come? I thought CPU access to framebuffers is atypical nowadays.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating
  2017-11-15  9:18                   ` Tvrtko Ursulin
@ 2017-11-15 10:03                     ` Chris Wilson
  0 siblings, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-11-15 10:03 UTC (permalink / raw)
  To: Tvrtko Ursulin, Joonas Lahtinen, intel-gfx

Quoting Tvrtko Ursulin (2017-11-15 09:18:00)
> 
> On 14/11/2017 18:14, Chris Wilson wrote:
> > Honestly I think the page_iter cache is useful and likely to already
> > exist or be used shortly after a portion of the object is rotated.
> 
> How come? I thought CPU access to framebuffers is atypical nowadays.

But not entirely forgotten. It's the first, and sometimes, last thing a
client wires up. (Just yesterday mesa gained a patch for yet another
access path. Great.)

I also think it is of wider utility for the when the buffer gets reused
or new views. Your argument applies better to the wider aspect of the
futility of keeping caches around that may never be reused. And that is
worth improving, even more so if we can measure an impact.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-11-15 10:03 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-17 15:10 [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating Chris Wilson
2017-02-17 18:22 ` ✓ Fi.CI.BAT: success for drm/i915: Remove temporary allocation of dma addresses when rotating (rev2) Patchwork
2017-02-21 15:01 ` [PATCH] drm/i915: Remove temporary allocation of dma addresses when rotating Joonas Lahtinen
2017-02-22  8:29   ` Tvrtko Ursulin
2017-02-22  8:44     ` Chris Wilson
2017-02-27  9:55       ` Tvrtko Ursulin
2017-02-27 10:06         ` Chris Wilson
2017-02-27 10:14           ` Tvrtko Ursulin
2017-02-27 10:21             ` Chris Wilson
2017-02-27 12:52               ` Joonas Lahtinen
2017-02-27 14:31               ` Tvrtko Ursulin
2017-11-14 18:14                 ` Chris Wilson
2017-11-15  9:18                   ` Tvrtko Ursulin
2017-11-15 10:03                     ` Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.