All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: Tvrtko Ursulin <tursulin@ursulin.net>, Intel-gfx@lists.freedesktop.org
Cc: "Ben Widawsky" <benjamin.widawsky@intel.com>,
	"Jason Ekstrand" <jason@jlekstrand.net>,
	"Tvrtko Ursulin" <tvrtko.ursulin@intel.com>,
	linux-kernel@vger.kernel.org,
	"Joonas Lahtinen" <joonas.lahtinen@linux.intel.com>
Subject: Re: [PATCH 4/4] drm/i915: Use __sg_alloc_table_from_pages for userptr allocations
Date: Fri, 28 Jul 2017 12:06:58 +0100	[thread overview]
Message-ID: <150124001887.27803.7479747269963365633@mail.alporthouse.com> (raw)
In-Reply-To: <20170727090504.15812-5-tvrtko.ursulin@linux.intel.com>

Quoting Tvrtko Ursulin (2017-07-27 10:05:04)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> With the addition of __sg_alloc_table_from_pages we can control
> the maximum coallescing size and eliminate a separate path for
> allocating backing store here.
> 
> Similar to 871dfbd67d4e ("drm/i915: Allow compaction upto
> SWIOTLB max segment size") this enables more compact sg lists to
> be created and so has a beneficial effect on workloads with many
> and/or large objects of this class.
> 
> v2:
>  * Rename helper to i915_sg_segment_size and fix swiotlb override.
>  * Commit message update.
> 
> v3:
>  * Actually include the swiotlb override fix.
> 
> v4:
>  * Regroup parameters a bit. (Chris Wilson)
> 
> v5:
>  * Rebase for swiotlb_max_segment.
>  * Add DMA map failure handling as in abb0deacb5a6
>    ("drm/i915: Fallback to single PAGE_SIZE segments for DMA remapping").
> 
> v6: Handle swiotlb_max_segment() returning 1. (Joonas Lahtinen)
> 
> v7: Rebase.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: linux-kernel@vger.kernel.org
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         | 15 +++++++
>  drivers/gpu/drm/i915/i915_gem.c         |  6 +--
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 79 ++++++++++++---------------------
>  3 files changed, 45 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2c7456f4ed38..6383940e8d55 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2749,6 +2749,21 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
>              (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
>              ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
>  
> +static inline unsigned int i915_sg_segment_size(void)
> +{
> +       unsigned int size = swiotlb_max_segment();
> +
> +       if (size == 0)
> +               return SCATTERLIST_MAX_SEGMENT;
> +
> +       size = rounddown(size, PAGE_SIZE);

Looks like swiotbl_max_seqment() is always page aligned when not 1.
And it returns bytes, ok.

Given that you are using a pot, you can use round_down().

> +       /* swiotlb_max_segment_size can return 1 byte when it means one page. */
> +       if (size < PAGE_SIZE)
> +               size = PAGE_SIZE;
> +
> +       return size;
> +}
> +
>  static inline const struct intel_device_info *
>  intel_info(const struct drm_i915_private *dev_priv)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6faabf34f142..a60885d6231b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2339,7 +2339,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>         struct sgt_iter sgt_iter;
>         struct page *page;
>         unsigned long last_pfn = 0;     /* suppress gcc warning */
> -       unsigned int max_segment;
> +       unsigned int max_segment = i915_sg_segment_size();
>         gfp_t noreclaim;
>         int ret;
>  
> @@ -2350,10 +2350,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
>         GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
>         GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
>  
> -       max_segment = swiotlb_max_segment();
> -       if (!max_segment)
> -               max_segment = rounddown(UINT_MAX, PAGE_SIZE);

Conversion to i915_sg_segment_size(), ok.

> -
>         st = kmalloc(sizeof(*st), GFP_KERNEL);
>         if (st == NULL)
>                 return ERR_PTR(-ENOMEM);
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index ccd09e8419f5..60c10d4118ae 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -399,64 +399,42 @@ struct get_pages_work {
>         struct task_struct *task;
>  };
>  
> -#if IS_ENABLED(CONFIG_SWIOTLB)
> -#define swiotlb_active() swiotlb_nr_tbl()
> -#else
> -#define swiotlb_active() 0
> -#endif

Converted to i915_sg_segment_size(), nice.

> -static int
> -st_set_pages(struct sg_table **st, struct page **pvec, int num_pages)
> -{
> -       struct scatterlist *sg;
> -       int ret, n;
> -
> -       *st = kmalloc(sizeof(**st), GFP_KERNEL);
> -       if (*st == NULL)
> -               return -ENOMEM;
> -
> -       if (swiotlb_active()) {
> -               ret = sg_alloc_table(*st, num_pages, GFP_KERNEL);
> -               if (ret)
> -                       goto err;
> -
> -               for_each_sg((*st)->sgl, sg, num_pages, n)
> -                       sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> -       } else {
> -               ret = sg_alloc_table_from_pages(*st, pvec, num_pages,
> -                                               0, num_pages << PAGE_SHIFT,
> -                                               GFP_KERNEL);
> -               if (ret)
> -                       goto err;
> -       }
> -
> -       return 0;
> -
> -err:
> -       kfree(*st);
> -       *st = NULL;
> -       return ret;
> -}
> -
>  static struct sg_table *
> -__i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
> -                            struct page **pvec, int num_pages)
> +__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
> +                              struct page **pvec, int num_pages)
>  {
> -       struct sg_table *pages;
> +       unsigned int max_segment = i915_sg_segment_size();
> +       struct sg_table *st;
>         int ret;
>  
> -       ret = st_set_pages(&pages, pvec, num_pages);
> -       if (ret)
> +       st = kmalloc(sizeof(*st), GFP_KERNEL);
> +       if (!st)
> +               return ERR_PTR(-ENOMEM);
> +
> +alloc_table:
> +       ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
> +                                         0, num_pages << PAGE_SHIFT,
> +                                         max_segment,
> +                                         GFP_KERNEL);
> +       if (ret) {
> +               kfree(st);
>                 return ERR_PTR(ret);
> +       }
>  
> -       ret = i915_gem_gtt_prepare_pages(obj, pages);
> +       ret = i915_gem_gtt_prepare_pages(obj, st);
>         if (ret) {
> -               sg_free_table(pages);
> -               kfree(pages);
> +               sg_free_table(st);
> +
> +               if (max_segment > PAGE_SIZE) {
> +                       max_segment = PAGE_SIZE;
> +                       goto alloc_table;
> +               }
> +
> +               kfree(st);
>                 return ERR_PTR(ret);
>         }

Much neater.

>  
> -       return pages;
> +       return st;
>  }
>  
>  static int
> @@ -540,7 +518,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>                 struct sg_table *pages = ERR_PTR(ret);
>  
>                 if (pinned == npages) {
> -                       pages = __i915_gem_userptr_set_pages(obj, pvec, npages);
> +                       pages = __i915_gem_userptr_alloc_pages(obj, pvec,
> +                                                              npages);
>                         if (!IS_ERR(pages)) {
>                                 __i915_gem_object_set_pages(obj, pages);
>                                 pinned = 0;
> @@ -661,7 +640,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
>                 pages = __i915_gem_userptr_get_pages_schedule(obj);
>                 active = pages == ERR_PTR(-EAGAIN);
>         } else {
> -               pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
> +               pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
>                 active = !IS_ERR(pages);
>         }
>         if (active)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

  reply	other threads:[~2017-07-28 11:07 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-27  9:05 [PATCH 0/4] Userptr bo slab use optimization Tvrtko Ursulin
2017-07-27  9:05 ` [PATCH 1/4] lib/scatterlist: Fix offset type in sg_alloc_table_from_pages Tvrtko Ursulin
2017-07-27  9:05   ` Tvrtko Ursulin
2017-07-27  9:05 ` [PATCH 2/4] lib/scatterlist: Avoid potential scatterlist entry overflow Tvrtko Ursulin
2017-07-27  9:05   ` Tvrtko Ursulin
2017-07-28 10:53   ` [Intel-gfx] " Chris Wilson
2017-07-27  9:05 ` [PATCH 3/4] lib/scatterlist: Introduce and export __sg_alloc_table_from_pages Tvrtko Ursulin
2017-07-27  9:05   ` Tvrtko Ursulin
2017-07-28 11:07   ` Chris Wilson
2017-08-02 13:06   ` [Intel-gfx] " Tvrtko Ursulin
2017-08-02 23:01     ` Andrew Morton
2017-08-03  9:21       ` Tvrtko Ursulin
2017-08-03  9:21         ` Tvrtko Ursulin
2017-07-27  9:05 ` [PATCH 4/4] drm/i915: Use __sg_alloc_table_from_pages for userptr allocations Tvrtko Ursulin
2017-07-27  9:05   ` Tvrtko Ursulin
2017-07-28 11:06   ` Chris Wilson [this message]
2017-07-27  9:25 ` [PATCH 0/4] Userptr bo slab use optimization Chris Wilson
2017-07-27 10:46   ` Tvrtko Ursulin
2017-07-27 10:53     ` Chris Wilson
2017-08-01 18:16   ` Ben Widawsky
2017-07-27  9:39 ` ✓ Fi.CI.BAT: success for " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2017-05-04 15:54 [PATCH 1/4] lib/scatterlist: Fix offset type in sg_alloc_table_from_pages Tvrtko Ursulin
2017-05-04 15:54 ` [PATCH 4/4] drm/i915: Use __sg_alloc_table_from_pages for userptr allocations Tvrtko Ursulin
2017-05-04 15:54   ` Tvrtko Ursulin
2017-01-16 14:12 [PATCH 1/4] lib/scatterlist: Fix offset type in sg_alloc_table_from_pages Tvrtko Ursulin
2017-01-16 14:12 ` [PATCH 4/4] drm/i915: Use __sg_alloc_table_from_pages for userptr allocations Tvrtko Ursulin
2017-01-11  9:00 [PATCH 1/4] lib/scatterlist: Fix offset type in sg_alloc_table_from_pages Tvrtko Ursulin
2017-01-11  9:00 ` [PATCH 4/4] drm/i915: Use __sg_alloc_table_from_pages for userptr allocations Tvrtko Ursulin
2017-01-11  9:00   ` Tvrtko Ursulin
2016-11-11  8:50 [PATCH 0/4] Compact userptr object backing store allocation Tvrtko Ursulin
2016-11-11  8:50 ` [PATCH 4/4] drm/i915: Use __sg_alloc_table_from_pages for userptr allocations Tvrtko Ursulin
2016-11-11  8:50   ` Tvrtko Ursulin
2016-11-11 10:23   ` Chris Wilson
2016-11-11 10:23     ` Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=150124001887.27803.7479747269963365633@mail.alporthouse.com \
    --to=chris@chris-wilson.co.uk \
    --cc=Intel-gfx@lists.freedesktop.org \
    --cc=benjamin.widawsky@intel.com \
    --cc=jason@jlekstrand.net \
    --cc=joonas.lahtinen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tursulin@ursulin.net \
    --cc=tvrtko.ursulin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.