All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Vetter <daniel@ffwll.ch>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags
Date: Wed, 8 Jun 2016 11:53:08 +0200	[thread overview]
Message-ID: <20160608095308.GV3363@phenom.ffwll.local> (raw)
In-Reply-To: <1464972953-2726-14-git-send-email-chris@chris-wilson.co.uk>

On Fri, Jun 03, 2016 at 05:55:28PM +0100, Chris Wilson wrote:
> We are motivated to avoid using a bitfield for obj->active for a couple
> of reasons. Firstly, we wish to document our lockless read of obj->active
> using READ_ONCE inside i915_gem_busy_ioctl() and that requires an
> integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code
> when presented with a bitfield and that shows up high on the profiles of
> request tracking (mainly due to excess memory traffic as it converts
> the bitfield to a register and back and generates frequent AGI in the
> process).

AGI = address generator interlock, I guess gcc first dynamically computes
the address, then loads the right blocks, frobs them and stores the
blocks? Please elaborate (in the commit message), but if it's indeed this
silly this makes sense.
-Daniel
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
>  drivers/gpu/drm/i915/i915_drv.h            | 31 +++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem.c            | 16 +++++++--------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++-----
>  drivers/gpu/drm/i915/i915_gem_shrinker.c   |  5 +++--
>  drivers/gpu/drm/i915/i915_gem_userptr.c    |  2 +-
>  6 files changed, 48 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 355bbf895c22..9154919fdd56 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
>  
>  static char get_active_flag(struct drm_i915_gem_object *obj)
>  {
> -	return obj->active ? '*' : ' ';
> +	return i915_gem_object_is_active(obj) ? '*' : ' ';
>  }
>  
>  static char get_pin_flag(struct drm_i915_gem_object *obj)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 236ade61cade..e72b7f35a98e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2136,12 +2136,16 @@ struct drm_i915_gem_object {
>  
>  	struct list_head batch_pool_link;
>  
> +	unsigned long flags;
>  	/**
>  	 * This is set if the object is on the active lists (has pending
>  	 * rendering and so a non-zero seqno), and is not set if it i s on
>  	 * inactive (ready to be unbound) list.
>  	 */
> -	unsigned int active:I915_NUM_ENGINES;
> +#define I915_BO_ACTIVE_SHIFT 0
> +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
> +#define __I915_BO_ACTIVE(bo) \
> +	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
>  
>  	/**
>  	 * This is set if the object has been written to since last bound
> @@ -2288,6 +2292,31 @@ i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj)
>  }
>  __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
>  
> +static inline unsigned long
> +i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
> +{
> +	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
> +}
> +
> +static inline void
> +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
> +{
> +	obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);
> +}
> +
> +static inline void
> +i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine)
> +{
> +	obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT));
> +}
> +
> +static inline bool
> +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> +				  int engine)
> +{
> +	return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT));
> +}
> +
>  /*
>   * Optimised SGL iterator for GEM objects
>   */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 05425ae7c8a8..a8279a598c4b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1126,7 +1126,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	active_mask = obj->active;
> +	active_mask = i915_gem_object_is_active(obj);
>  	if (!active_mask)
>  		return 0;
>  
> @@ -1165,7 +1165,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  	BUG_ON(!dev_priv->mm.interruptible);
>  
> -	active_mask = obj->active;
> +	active_mask = i915_gem_object_is_active(obj);
>  	if (!active_mask)
>  		return 0;
>  
> @@ -2109,10 +2109,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
>  	struct drm_i915_gem_object *obj =
>  		container_of(active, struct drm_i915_gem_object, last_read[ring]);
>  
> -	GEM_BUG_ON((obj->active & (1 << ring)) == 0);
> +	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, ring));
>  
> -	obj->active &= ~(1 << ring);
> -	if (obj->active)
> +	i915_gem_object_unset_active(obj, ring);
> +	if (i915_gem_object_is_active(obj))
>  		return;
>  
>  	/* Bump our place on the bound list to keep it roughly in LRU order
> @@ -2383,7 +2383,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  		return -ENOENT;
>  	}
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		goto out;
>  
>  	for (i = 0; i < I915_NUM_ENGINES; i++) {
> @@ -2472,7 +2472,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	active_mask = obj->active;
> +	active_mask = i915_gem_object_is_active(obj);
>  	if (!active_mask)
>  		return 0;
>  
> @@ -3516,7 +3516,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	 * become non-busy without any further actions.
>  	 */
>  	args->busy = 0;
> -	if (obj->active) {
> +	if (i915_gem_object_is_active(obj)) {
>  		struct drm_i915_gem_request *req;
>  		int i;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 69bf73b51df9..224265619f00 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -432,7 +432,7 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
>  
>  static bool object_is_idle(struct drm_i915_gem_object *obj)
>  {
> -	unsigned long active = obj->active;
> +	unsigned long active = i915_gem_object_is_active(obj);
>  	int idx;
>  
>  	for_each_active(active, idx) {
> @@ -991,7 +991,7 @@ static int
>  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  				struct list_head *vmas)
>  {
> -	const unsigned other_rings = ~intel_engine_flag(req->engine);
> +	const unsigned other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT;
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
>  	bool flush_chipset = false;
> @@ -1000,7 +1000,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
>  
> -		if (obj->active & other_rings) {
> +		if (obj->flags & other_rings) {
>  			ret = i915_gem_object_sync(obj, req);
>  			if (ret)
>  				return ret;
> @@ -1159,9 +1159,9 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>  	 * add the active reference first and queue for it to be dropped
>  	 * *last*.
>  	 */
> -	if (obj->active == 0)
> +	if (!i915_gem_object_is_active(obj))
>  		i915_gem_object_get(obj);
> -	obj->active |= 1 << idx;
> +	i915_gem_object_set_active(obj, idx);
>  	i915_gem_active_set(&obj->last_read[idx], req);
>  
>  	if (flags & EXEC_OBJECT_WRITE) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 71ad58836f48..5cbc4ee52c6d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -168,7 +168,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  			    !is_vmalloc_addr(obj->mapping))
>  				continue;
>  
> -			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
> +			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
> +			    i915_gem_object_is_active(obj))
>  				continue;
>  
>  			if (!can_release_pages(obj))
> @@ -253,7 +254,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  			count += obj->base.size >> PAGE_SHIFT;
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> -		if (!obj->active && can_release_pages(obj))
> +		if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
>  			count += obj->base.size >> PAGE_SHIFT;
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index e57521dbddc6..221792632290 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -67,7 +67,7 @@ static void wait_rendering(struct drm_i915_gem_object *obj)
>  	struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
>  	int i, n;
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		return;
>  
>  	n = 0;
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2016-06-08  9:53 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-03 16:55 Tracking VMA Chris Wilson
2016-06-03 16:55 ` [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something Chris Wilson
2016-06-03 16:55 ` [PATCH 02/38] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something() Chris Wilson
2016-06-03 16:55 ` [PATCH 03/38] drm/i915: Double check the active status on the batch pool Chris Wilson
2016-06-03 16:55 ` [PATCH 04/38] drm/i915: Remove request retirement before each batch Chris Wilson
2016-06-06 13:40   ` Mika Kuoppala
2016-06-03 16:55 ` [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands() Chris Wilson
2016-06-06 14:26   ` Mika Kuoppala
2016-06-03 16:55 ` [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
2016-06-08  9:41   ` Daniel Vetter
2016-06-08 10:08     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 07/38] drm/i915: Split insertion/binding of an object into the VM Chris Wilson
2016-06-03 16:55 ` [PATCH 08/38] drm/i915: Record allocated vma size Chris Wilson
2016-06-03 16:55 ` [PATCH 09/38] drm/i915: Start passing around i915_vma from execbuffer Chris Wilson
2016-06-03 16:55 ` [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin() Chris Wilson
2016-06-08  9:43   ` Daniel Vetter
2016-06-08 12:58     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 11/38] drm/i915: Make fb_tracking.lock a spinlock Chris Wilson
2016-06-03 16:55 ` [PATCH 12/38] drm/i915: Use atomics to manipulate obj->frontbuffer_bits Chris Wilson
2016-06-03 16:55 ` [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags Chris Wilson
2016-06-08  9:53   ` Daniel Vetter [this message]
2016-06-03 16:55 ` [PATCH 14/38] drm/i915: Move i915_gem_object_wait_rendering() Chris Wilson
2016-06-03 16:55 ` [PATCH 15/38] drm/i915: Mark all current requests as complete before resetting them Chris Wilson
2016-06-03 16:55 ` [PATCH 16/38] drm/i915: Enable lockless lookup of request tracking via RCU Chris Wilson
2016-06-03 16:55 ` [PATCH 17/38] drm/i915: Introduce i915_gem_active_wait_unlocked() Chris Wilson
2016-06-03 16:55 ` [PATCH 18/38] drm/i915: Convert non-blocking waits for requests over to using RCU Chris Wilson
2016-06-03 16:55 ` [PATCH 19/38] drm/i915: Convert non-blocking userptr " Chris Wilson
2016-06-03 16:55 ` [PATCH 20/38] drm/i915/userptr: Remove superfluous interruptible=false on waiting Chris Wilson
2016-06-03 16:55 ` [PATCH 21/38] drm/i915: Avoid requiring struct_mutex during suspend Chris Wilson
2016-06-03 16:55 ` [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom Chris Wilson
2016-06-08  9:57   ` Daniel Vetter
2016-06-08 10:04     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 23/38] suspend Chris Wilson
2016-06-03 16:55 ` [PATCH 24/38] drm/i915: Do a nonblocking wait first in pread/pwrite Chris Wilson
2016-06-03 16:55 ` [PATCH 25/38] drm/i915: Remove (struct_mutex) locking for wait-ioctl Chris Wilson
2016-06-03 16:55 ` [PATCH 26/38] drm/i915: Remove (struct_mutex) locking for busy-ioctl Chris Wilson
2016-06-03 16:55 ` [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl Chris Wilson
2016-06-08  9:59   ` Daniel Vetter
2016-06-08 10:03     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl Chris Wilson
2016-06-08 10:01   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 29/38] drm/i915: Remove locking for get_tiling Chris Wilson
2016-06-08 10:02   ` Daniel Vetter
2016-06-08 10:11     ` Chris Wilson
2016-06-13 14:19       ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 30/38] drm/i915: Assert that the request hasn't been retired Chris Wilson
2016-06-03 16:55 ` [PATCH 31/38] drm/i915: Reduce amount of duplicate buffer information captured on error Chris Wilson
2016-06-03 16:55 ` [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
2016-06-08 10:06   ` Daniel Vetter
2016-06-08 11:37     ` Chris Wilson
2016-06-03 16:55 ` [PATCH 33/38] drm/i915: Scan GGTT active list for context object Chris Wilson
2016-06-03 16:55 ` [PATCH 34/38] drm/i915: Move setting of request->batch into its single callsite Chris Wilson
2016-06-03 16:55 ` [PATCH 35/38] drm/i915: Mark unmappable GGTT entries as PIN_HIGH Chris Wilson
2016-06-03 16:55 ` [PATCH 36/38] drm/i915: Track pinned vma inside guc Chris Wilson
2016-06-03 16:55 ` [PATCH 37/38] drm/i915: Track pinned VMA Chris Wilson
2016-06-08 10:08   ` Daniel Vetter
2016-06-03 16:55 ` [PATCH 38/38] drm/i915/overlay: Use VMA as the primary tracker for images Chris Wilson
2016-06-06 10:42 ` ✗ Ro.CI.BAT: failure for series starting with [01/38] drm/i915: Combine loops within i915_gem_evict_something Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160608095308.GV3363@phenom.ffwll.local \
    --to=daniel@ffwll.ch \
    --cc=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.