All of lore.kernel.org
 help / color / mirror / Atom feed
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
To: Matthew Auld <matthew.auld@intel.com>
Cc: matthew.brost@intel.com, paulo.r.zanoni@intel.com,
	tvrtko.ursulin@intel.com, jani.nikula@intel.com,
	intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	thomas.hellstrom@intel.com, lionel.g.landwerlin@intel.com,
	jason@jlekstrand.net, andi.shyti@linux.intel.com,
	daniel.vetter@intel.com, christian.koenig@amd.com
Subject: Re: [PATCH v8 21/22] drm/i915/vm_bind: Properly build persistent map sg table
Date: Tue, 13 Dec 2022 20:58:34 -0800	[thread overview]
Message-ID: <Y5lX+rKcXkV9YoPI@nvishwa1-DESK> (raw)
In-Reply-To: <b6f14794-caa4-d19e-e61a-2778dad3f57b@intel.com>

On Mon, Dec 12, 2022 at 06:17:01PM +0000, Matthew Auld wrote:
>On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:
>>Properly build the sg table for persistent mapping which can
>>be partial map of the underlying object. Ensure the sg pages
>>are properly set for page backed regions. The dump capture
>>support requires this for page backed regions.
>>
>>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_vma.c | 120 +++++++++++++++++++++++++++++++-
>>  1 file changed, 119 insertions(+), 1 deletion(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>index 1b9033865768..68a9ac77b4f2 100644
>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>@@ -1298,6 +1298,120 @@ intel_partial_pages(const struct i915_gtt_view *view,
>>  	return ERR_PTR(ret);
>>  }
>>+static unsigned int
>>+intel_copy_dma_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+		  u64 offset, u64 length, bool dry_run)
>>+{
>>+	struct scatterlist *dst_sg, *src_sg;
>>+	unsigned int i, len, nents = 0;
>>+
>>+	dst_sg = dst_st->sgl;
>>+	for_each_sgtable_dma_sg(src_st, src_sg, i) {
>>+		if (sg_dma_len(src_sg) <= offset) {
>>+			offset -= sg_dma_len(src_sg);
>>+			continue;
>>+		}
>>+
>>+		nents++;
>>+		len = min(sg_dma_len(src_sg) - offset, length);
>>+		if (!dry_run) {
>>+			sg_dma_address(dst_sg) = sg_dma_address(src_sg) + offset;
>>+			sg_dma_len(dst_sg) = len;
>>+			dst_sg = sg_next(dst_sg);
>>+		}
>>+
>>+		length -= len;
>>+		offset = 0;
>>+		if (!length)
>>+			break;
>>+	}
>>+	WARN_ON_ONCE(length);
>>+
>>+	return nents;
>>+}
>>+
>>+static unsigned int
>>+intel_copy_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+	      u64 offset, u64 length, bool dry_run)
>>+{
>>+	struct scatterlist *dst_sg, *src_sg;
>>+	unsigned int i, len, nents = 0;
>>+
>>+	dst_sg = dst_st->sgl;
>>+	for_each_sgtable_sg(src_st, src_sg, i) {
>>+		if (src_sg->length <= offset) {
>>+			offset -= src_sg->length;
>>+			continue;
>>+		}
>>+
>>+		nents++;
>>+		len = min(src_sg->length - offset, length);
>>+		if (!dry_run) {
>>+			unsigned long pfn;
>>+
>>+			pfn = page_to_pfn(sg_page(src_sg)) + offset / PAGE_SIZE;
>>+			sg_set_page(dst_sg, pfn_to_page(pfn), len, 0);
>>+			dst_sg = sg_next(dst_sg);
>>+		}
>>+
>>+		length -= len;
>>+		offset = 0;
>>+		if (!length)
>>+			break;
>>+	}
>>+	WARN_ON_ONCE(length);
>>+
>>+	return nents;
>>+}
>>+
>>+static noinline struct sg_table *
>>+intel_persistent_partial_pages(const struct i915_gtt_view *view,
>>+			       struct drm_i915_gem_object *obj)
>>+{
>>+	u64 offset = view->partial.offset << PAGE_SHIFT;
>>+	struct sg_table *st, *obj_st = obj->mm.pages;
>>+	u64 length = view->partial.size << PAGE_SHIFT;
>>+	struct scatterlist *sg;
>>+	unsigned int nents;
>>+	int ret = -ENOMEM;
>>+
>>+	st = kmalloc(sizeof(*st), GFP_KERNEL);
>>+	if (!st)
>>+		goto err_st_alloc;
>>+
>>+	/* Get required sg_table size */
>>+	nents = intel_copy_dma_sg(obj_st, st, offset, length, true);
>>+	if (i915_gem_object_has_struct_page(obj)) {
>>+		unsigned int pg_nents;
>>+
>>+		pg_nents = intel_copy_sg(obj_st, st, offset, length, true);
>>+		if (nents < pg_nents)
>>+			nents = pg_nents;
>>+	}
>>+
>>+	ret = sg_alloc_table(st, nents, GFP_KERNEL);
>>+	if (ret)
>>+		goto err_sg_alloc;
>>+
>>+	/* Build sg_table for specified <offset, length> section */
>>+	intel_copy_dma_sg(obj_st, st, offset, length, false);
>>+	if (i915_gem_object_has_struct_page(obj))
>>+		intel_copy_sg(obj_st, st, offset, length, false);
>>+
>>+	/* Mark last sg */
>>+	sg = st->sgl;
>>+	while (sg_next(sg))
>>+		sg = sg_next(sg);
>>+	sg_mark_end(sg);
>
>Do we need this bit? The nents is exactly orig_nents, and 
>sg_alloc_table will already mark the end for you.
>

Ok, looks like we don't need it as sg_alloc_table() sets it.
While looking at sg_alloc_table(), looks like it is possible for it
to return with -ENOMEM with partial built table, but we are not
cleaning it anywhere. Something to consider later may be.
https://elixir.bootlin.com/linux/latest/source/lib/scatterlist.c#L330

>Is it not possible to re-use remap_contiguous_pages() somehow? Also do 
>we need the dry_run bit if we use sg_trim()? Maybe something like:
>
>dst = sg_alloc_table(partial.size);
>
>remap_contigious_pages_sg(dst, src);
>i915_sg_trim(dst);
>
>dst->nents = 0;
>sg = remap_contigious_pages_dma_sg(dst, src);
>

But then those remap_contiguous_pages[_dma]_sg would look just like
out intel_copy[_dma]_sg().
And the problem I have with i915_sg_trim() is that it uses _sg iterator
only and not _dma_sg iterator. I think at least in theory, it is possible
to have more number of dma_sg elements than the sg (page) elements. Right?
That is why I am doing a dry run of both above and getting the max of both.

Niranjana

>>+
>>+	return st;
>>+
>>+err_sg_alloc:
>>+	kfree(st);
>>+err_st_alloc:
>>+	return ERR_PTR(ret);
>>+}
>>+
>>  static int
>>  __i915_vma_get_pages(struct i915_vma *vma)
>>  {
>>@@ -1330,7 +1444,11 @@ __i915_vma_get_pages(struct i915_vma *vma)
>>  		break;
>>  	case I915_GTT_VIEW_PARTIAL:
>>-		pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>>+		if (i915_vma_is_persistent(vma))
>>+			pages = intel_persistent_partial_pages(&vma->gtt_view,
>>+							       vma->obj);
>>+		else
>>+			pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>>  		break;
>>  	}

WARNING: multiple messages have this Message-ID (diff)
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
To: Matthew Auld <matthew.auld@intel.com>
Cc: paulo.r.zanoni@intel.com, jani.nikula@intel.com,
	intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	thomas.hellstrom@intel.com, daniel.vetter@intel.com,
	christian.koenig@amd.com
Subject: Re: [Intel-gfx] [PATCH v8 21/22] drm/i915/vm_bind: Properly build persistent map sg table
Date: Tue, 13 Dec 2022 20:58:34 -0800	[thread overview]
Message-ID: <Y5lX+rKcXkV9YoPI@nvishwa1-DESK> (raw)
In-Reply-To: <b6f14794-caa4-d19e-e61a-2778dad3f57b@intel.com>

On Mon, Dec 12, 2022 at 06:17:01PM +0000, Matthew Auld wrote:
>On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:
>>Properly build the sg table for persistent mapping which can
>>be partial map of the underlying object. Ensure the sg pages
>>are properly set for page backed regions. The dump capture
>>support requires this for page backed regions.
>>
>>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_vma.c | 120 +++++++++++++++++++++++++++++++-
>>  1 file changed, 119 insertions(+), 1 deletion(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>index 1b9033865768..68a9ac77b4f2 100644
>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>@@ -1298,6 +1298,120 @@ intel_partial_pages(const struct i915_gtt_view *view,
>>  	return ERR_PTR(ret);
>>  }
>>+static unsigned int
>>+intel_copy_dma_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+		  u64 offset, u64 length, bool dry_run)
>>+{
>>+	struct scatterlist *dst_sg, *src_sg;
>>+	unsigned int i, len, nents = 0;
>>+
>>+	dst_sg = dst_st->sgl;
>>+	for_each_sgtable_dma_sg(src_st, src_sg, i) {
>>+		if (sg_dma_len(src_sg) <= offset) {
>>+			offset -= sg_dma_len(src_sg);
>>+			continue;
>>+		}
>>+
>>+		nents++;
>>+		len = min(sg_dma_len(src_sg) - offset, length);
>>+		if (!dry_run) {
>>+			sg_dma_address(dst_sg) = sg_dma_address(src_sg) + offset;
>>+			sg_dma_len(dst_sg) = len;
>>+			dst_sg = sg_next(dst_sg);
>>+		}
>>+
>>+		length -= len;
>>+		offset = 0;
>>+		if (!length)
>>+			break;
>>+	}
>>+	WARN_ON_ONCE(length);
>>+
>>+	return nents;
>>+}
>>+
>>+static unsigned int
>>+intel_copy_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+	      u64 offset, u64 length, bool dry_run)
>>+{
>>+	struct scatterlist *dst_sg, *src_sg;
>>+	unsigned int i, len, nents = 0;
>>+
>>+	dst_sg = dst_st->sgl;
>>+	for_each_sgtable_sg(src_st, src_sg, i) {
>>+		if (src_sg->length <= offset) {
>>+			offset -= src_sg->length;
>>+			continue;
>>+		}
>>+
>>+		nents++;
>>+		len = min(src_sg->length - offset, length);
>>+		if (!dry_run) {
>>+			unsigned long pfn;
>>+
>>+			pfn = page_to_pfn(sg_page(src_sg)) + offset / PAGE_SIZE;
>>+			sg_set_page(dst_sg, pfn_to_page(pfn), len, 0);
>>+			dst_sg = sg_next(dst_sg);
>>+		}
>>+
>>+		length -= len;
>>+		offset = 0;
>>+		if (!length)
>>+			break;
>>+	}
>>+	WARN_ON_ONCE(length);
>>+
>>+	return nents;
>>+}
>>+
>>+static noinline struct sg_table *
>>+intel_persistent_partial_pages(const struct i915_gtt_view *view,
>>+			       struct drm_i915_gem_object *obj)
>>+{
>>+	u64 offset = view->partial.offset << PAGE_SHIFT;
>>+	struct sg_table *st, *obj_st = obj->mm.pages;
>>+	u64 length = view->partial.size << PAGE_SHIFT;
>>+	struct scatterlist *sg;
>>+	unsigned int nents;
>>+	int ret = -ENOMEM;
>>+
>>+	st = kmalloc(sizeof(*st), GFP_KERNEL);
>>+	if (!st)
>>+		goto err_st_alloc;
>>+
>>+	/* Get required sg_table size */
>>+	nents = intel_copy_dma_sg(obj_st, st, offset, length, true);
>>+	if (i915_gem_object_has_struct_page(obj)) {
>>+		unsigned int pg_nents;
>>+
>>+		pg_nents = intel_copy_sg(obj_st, st, offset, length, true);
>>+		if (nents < pg_nents)
>>+			nents = pg_nents;
>>+	}
>>+
>>+	ret = sg_alloc_table(st, nents, GFP_KERNEL);
>>+	if (ret)
>>+		goto err_sg_alloc;
>>+
>>+	/* Build sg_table for specified <offset, length> section */
>>+	intel_copy_dma_sg(obj_st, st, offset, length, false);
>>+	if (i915_gem_object_has_struct_page(obj))
>>+		intel_copy_sg(obj_st, st, offset, length, false);
>>+
>>+	/* Mark last sg */
>>+	sg = st->sgl;
>>+	while (sg_next(sg))
>>+		sg = sg_next(sg);
>>+	sg_mark_end(sg);
>
>Do we need this bit? The nents is exactly orig_nents, and 
>sg_alloc_table will already mark the end for you.
>

Ok, looks like we don't need it as sg_alloc_table() sets it.
While looking at sg_alloc_table(), looks like it is possible for it
to return with -ENOMEM with partial built table, but we are not
cleaning it anywhere. Something to consider later may be.
https://elixir.bootlin.com/linux/latest/source/lib/scatterlist.c#L330

>Is it not possible to re-use remap_contiguous_pages() somehow? Also do 
>we need the dry_run bit if we use sg_trim()? Maybe something like:
>
>dst = sg_alloc_table(partial.size);
>
>remap_contigious_pages_sg(dst, src);
>i915_sg_trim(dst);
>
>dst->nents = 0;
>sg = remap_contigious_pages_dma_sg(dst, src);
>

But then those remap_contiguous_pages[_dma]_sg would look just like
out intel_copy[_dma]_sg().
And the problem I have with i915_sg_trim() is that it uses _sg iterator
only and not _dma_sg iterator. I think at least in theory, it is possible
to have more number of dma_sg elements than the sg (page) elements. Right?
That is why I am doing a dry run of both above and getting the max of both.

Niranjana

>>+
>>+	return st;
>>+
>>+err_sg_alloc:
>>+	kfree(st);
>>+err_st_alloc:
>>+	return ERR_PTR(ret);
>>+}
>>+
>>  static int
>>  __i915_vma_get_pages(struct i915_vma *vma)
>>  {
>>@@ -1330,7 +1444,11 @@ __i915_vma_get_pages(struct i915_vma *vma)
>>  		break;
>>  	case I915_GTT_VIEW_PARTIAL:
>>-		pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>>+		if (i915_vma_is_persistent(vma))
>>+			pages = intel_persistent_partial_pages(&vma->gtt_view,
>>+							       vma->obj);
>>+		else
>>+			pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>>  		break;
>>  	}

  reply	other threads:[~2022-12-14  4:59 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-29  7:26 [PATCH v8 00/22] drm/i915/vm_bind: Add VM_BIND functionality Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 01/22] drm/i915/vm_bind: Expose vm lookup function Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 02/22] drm/i915/vm_bind: Add __i915_sw_fence_await_reservation() Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 03/22] drm/i915/vm_bind: Expose i915_gem_object_max_page_size() Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 04/22] drm/i915/vm_bind: Add support to create persistent vma Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 05/22] drm/i915/vm_bind: Implement bind and unbind of object Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 06/22] drm/i915/vm_bind: Support for VM private BOs Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 07/22] drm/i915/vm_bind: Add support to handle object evictions Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 08/22] drm/i915/vm_bind: Support persistent vma activeness tracking Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 09/22] drm/i915/vm_bind: Add out fence support Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 10/22] drm/i915/vm_bind: Abstract out common execbuf functions Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 11/22] drm/i915/vm_bind: Use common execbuf functions in execbuf path Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 12/22] drm/i915/vm_bind: Implement I915_GEM_EXECBUFFER3 ioctl Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 13/22] drm/i915/vm_bind: Update i915_vma_verify_bind_complete() Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 14/22] drm/i915/vm_bind: Expose i915_request_await_bind() Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 15/22] drm/i915/vm_bind: Handle persistent vmas in execbuf3 Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [PATCH v8 16/22] drm/i915/vm_bind: userptr dma-resv changes Niranjana Vishwanathapura
2022-11-29  7:26   ` [Intel-gfx] " Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 17/22] drm/i915/vm_bind: Limit vm_bind mode to non-recoverable contexts Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 18/22] drm/i915/vm_bind: Add uapi for user to enable vm_bind_mode Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 19/22] drm/i915/vm_bind: Render VM_BIND documentation Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 20/22] drm/i915/vm_bind: Async vm_unbind support Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 21/22] drm/i915/vm_bind: Properly build persistent map sg table Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-12-12 18:17   ` Matthew Auld
2022-12-12 18:17     ` [Intel-gfx] " Matthew Auld
2022-12-14  4:58     ` Niranjana Vishwanathapura [this message]
2022-12-14  4:58       ` Niranjana Vishwanathapura
2022-11-29  7:26 ` [Intel-gfx] [PATCH v8 22/22] drm/i915/vm_bind: Support capture of persistent mappings Niranjana Vishwanathapura
2022-11-29  7:26   ` Niranjana Vishwanathapura
2022-12-01 10:49   ` Matthew Auld
2022-12-01 10:49     ` [Intel-gfx] " Matthew Auld
2022-12-01 15:27     ` Niranjana Vishwanathapura
2022-12-01 15:27       ` [Intel-gfx] " Niranjana Vishwanathapura
2022-12-01 18:43       ` Niranjana Vishwanathapura
2022-12-06 17:40         ` Matthew Auld
2022-12-08 13:54           ` Niranjana Vishwanathapura
2022-11-29  8:24 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915/vm_bind: Add VM_BIND functionality (rev11) Patchwork
2022-11-29  8:24 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2022-11-29  8:46 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2022-11-29 11:29 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y5lX+rKcXkV9YoPI@nvishwa1-DESK \
    --to=niranjana.vishwanathapura@intel.com \
    --cc=andi.shyti@linux.intel.com \
    --cc=christian.koenig@amd.com \
    --cc=daniel.vetter@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=jani.nikula@intel.com \
    --cc=jason@jlekstrand.net \
    --cc=lionel.g.landwerlin@intel.com \
    --cc=matthew.auld@intel.com \
    --cc=matthew.brost@intel.com \
    --cc=paulo.r.zanoni@intel.com \
    --cc=thomas.hellstrom@intel.com \
    --cc=tvrtko.ursulin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.