All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: Dave Gordon <david.s.gordon@intel.com>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH v4 3/4] Introduce & use new lightweight SGL iterators
Date: Fri, 20 May 2016 10:56:31 +0100	[thread overview]
Message-ID: <20160520095631.GI3590@nuc-i3427.alporthouse.com> (raw)
In-Reply-To: <20160520081506.GF3590@nuc-i3427.alporthouse.com>

On Fri, May 20, 2016 at 09:15:06AM +0100, Chris Wilson wrote:
> On Fri, May 20, 2016 at 01:31:29AM +0100, Dave Gordon wrote:
> > The existing for_each_sg_page() iterator is somewhat heavyweight, and is
> > limiting i915 driver performance in a few benchmarks. So here we
> > introduce somewhat lighter weight iterators, primarily for use with GEM
> > objects or other case where we need only deal with whole aligned pages.
> > 
> > Unlike the old iterator, the new iterators use an internal state
> > structure which is not intended to be accessed by the caller; instead
> > each takes as a parameter an output variable which is set before each
> > iteration. This makes them particularly simple to use :)
> > 
> > One of the new iterators provides the caller with the DMA address of
> > each page in turn; the other provides the 'struct page' pointer required
> > by many memory management operations.
> > 
> > Various uses of for_each_sg_page() are then converted to the new macros.
> > 
> > Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h         | 62 +++++++++++++++++++++++++--
> >  drivers/gpu/drm/i915/i915_gem.c         | 20 ++++-----
> >  drivers/gpu/drm/i915/i915_gem_fence.c   | 14 +++---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c     | 76 ++++++++++++++++-----------------
> >  drivers/gpu/drm/i915/i915_gem_userptr.c |  7 ++-
> >  5 files changed, 116 insertions(+), 63 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 6894d8e..01cde0f 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2120,6 +2120,10 @@ struct drm_i915_gem_object_ops {
> >  #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
> >  	(0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
> >  
> > +void i915_gem_track_fb(struct drm_i915_gem_object *old,
> > +		       struct drm_i915_gem_object *new,
> > +		       unsigned frontbuffer_bits);
> > +
> 
> That's not a much better spot, since this is now before the object it
> acts upon. One day we'll get our types separated from their actors.
> 
> >  struct drm_i915_gem_object {
> >  	struct drm_gem_object base;
> >  
> > @@ -2251,9 +2255,61 @@ struct drm_i915_gem_object {
> >  };
> >  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
> >  
> > -void i915_gem_track_fb(struct drm_i915_gem_object *old,
> > -		       struct drm_i915_gem_object *new,
> > -		       unsigned frontbuffer_bits);
> > +/*
> > + * New optimised SGL iterator for GEM objects
> > + */
> > +struct sgt_iter {
> > +	struct scatterlist *sgp;
> > +	union {
> > +		unsigned long pfn;
> > +		unsigned long dma;
> > +	} ix;
> 
> An anonymous union here would be slightly more pleasant. And we should
> be using the correct types.
> 
> > +	unsigned int curr;
> > +	unsigned int max;
> > +};
> > +
> > +/* Constructor for new iterator */
> > +static inline struct sgt_iter
> > +__sgt_iter(struct scatterlist *sgl, bool dma)
> > +{
> > +	struct sgt_iter s = { .sgp = sgl };
> > +
> > +	if (s.sgp) {
> 
> Not acceptable just to GPF out if passed in a NULL? It's a BUG for all
> our use cases. And would save a conditional every chain step.

Oh, it's how we stop after __sg_next().

Ok, played a bit and the only thing that sticks is

 /*
- * New optimised SGL iterator for GEM objects
+ * Optimised SGL iterator for GEM objects
  */
 struct sgt_iter {
        struct scatterlist *sgp;
        union {
                unsigned long pfn;
-               unsigned long dma;
-       } ix;
+               dma_addr_t dma;
+       };
        unsigned int curr;
        unsigned int max;
 };
 
-/* Constructor for new iterator */
-static inline struct sgt_iter
+static __always_inline struct sgt_iter

Nothing is ever new past the commit that introduces it!

Final diff (a few % in micro, consistently above the noise):

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 53ff4993f5b8..586f06646630 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2256,31 +2256,26 @@ struct drm_i915_gem_object {
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /*
- * New optimised SGL iterator for GEM objects
+ * Optimised SGL iterator for GEM objects
  */
-struct sgt_iter {
+static __always_inline struct sgt_iter {
        struct scatterlist *sgp;
        union {
                unsigned long pfn;
-               unsigned long dma;
-       } ix;
+               dma_addr_t dma;
+       };
        unsigned int curr;
        unsigned int max;
-};
-
-/* Constructor for new iterator */
-static inline struct sgt_iter
-__sgt_iter(struct scatterlist *sgl, bool dma)
-{
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
        struct sgt_iter s = { .sgp = sgl };
 
        if (s.sgp) {
                s.max = s.curr = s.sgp->offset;
                s.max += s.sgp->length;
                if (dma)
-                       s.ix.dma = sg_dma_address(s.sgp);
+                       s.dma = sg_dma_address(s.sgp);
                else
-                       s.ix.pfn = page_to_pfn(sg_page(s.sgp));
+                       s.pfn = page_to_pfn(sg_page(s.sgp));
        }
 
        return s;
@@ -2313,9 +2308,9 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
  */
 #define for_each_sgt_dma(__dmap, __iter, __sgt)                                \
        for ((__iter) = __sgt_iter((__sgt)->sgl, true);                 \
-            ((__dmap) = (__iter).ix.dma + (__iter).curr);              \
+            ((__dmap) = (__iter).dma + (__iter).curr);                 \
             (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
-               ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
+            ((__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0))
 
 /**
  * for_each_sgt_page - iterate over the pages of the given sg_table
@@ -2325,10 +2320,10 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg)
  */
 #define for_each_sgt_page(__pp, __iter, __sgt)                         \
        for ((__iter) = __sgt_iter((__sgt)->sgl, false);                \
-            ((__pp) = (__iter).ix.pfn == 0 ? NULL :                    \
-                  pfn_to_page((__iter).ix.pfn + ((__iter).curr >> PAGE_SHIFT)));\
+            ((__pp) = (__iter).pfn == 0 ? NULL :                       \
+             pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
             (((__iter).curr += PAGE_SIZE) < (__iter).max) ||           \
-               ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
+            ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))


And with that,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-05-20  9:56 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-20  0:31 [PATCH v4 1/4] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
2016-05-20  0:31 ` [PATCH v4 2/4] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
2016-05-20  0:31 ` [PATCH v4 3/4] Introduce & use new lightweight SGL iterators Dave Gordon
2016-05-20  8:15   ` Chris Wilson
2016-05-20  9:56     ` Chris Wilson [this message]
2016-05-20  0:31 ` [PATCH v4 4/4] Try inlining sg_next() Dave Gordon
2016-05-20  7:35   ` Chris Wilson
2016-05-20  9:18 ` ✗ Ro.CI.BAT: failure for series starting with [v4,1/4] drm/i915: refactor i915_gem_object_pin_map() Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160520095631.GI3590@nuc-i3427.alporthouse.com \
    --to=chris@chris-wilson.co.uk \
    --cc=david.s.gordon@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.