All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ian Romanick <idr@freedesktop.org>
To: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org, miku@iki.fi
Subject: Re: [PATCH v2 10/16] drm/i915: add struct ctx_reset_state
Date: Mon, 18 Mar 2013 13:18:30 -0700	[thread overview]
Message-ID: <51477696.4040807@freedesktop.org> (raw)
In-Reply-To: <1363276337-12509-11-git-send-email-mika.kuoppala@intel.com>

On 03/14/2013 08:52 AM, Mika Kuoppala wrote:
> To count context losses, add struct ctx_reset_state for
> both i915_hw_context and drm_i915_file_private.
> drm_i915_file_private is used when there is no context.
>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_dma.c         |    4 +++-
>   drivers/gpu/drm/i915/i915_drv.h         |   19 +++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_context.c |   11 +++++++++++
>   3 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index e16099b..7902d97 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1792,7 +1792,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file)
>   	struct drm_i915_file_private *file_priv;
>
>   	DRM_DEBUG_DRIVER("\n");
> -	file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL);
> +	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
>   	if (!file_priv)
>   		return -ENOMEM;
>
> @@ -1801,6 +1801,8 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file)
>   	spin_lock_init(&file_priv->mm.lock);
>   	INIT_LIST_HEAD(&file_priv->mm.request_list);
>
> +	i915_gem_context_init_reset_state(dev, &file_priv->reset_state);
> +
>   	idr_init(&file_priv->context_idr);
>
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a54c507..d004548 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -433,6 +433,19 @@ struct i915_hw_ppgtt {
>   	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
>   };
>
> +struct ctx_reset_state {
> +	/* guilty and reset counts when context initialized */
> +	unsigned long guilty_cnt;
> +	unsigned long reset_cnt;

I think we can afford to spell out "count."  The first time I saw cnt, 
it looked like a dirty word. :)

I think this structure could you some better description of the overall 
architecture.  It's not completely obvious from the individual pieces... 
and that makes it really hard to evaluate.

reset_cnt is the number of resets since start-up.  What is guilty_cnt? 
What are innocent and guilty (below)?

All of this makes it difficult for me to tell whether or not the logic 
in patch 16 is correct... and I don't think it is.

> +
> +	unsigned innocent;
> +	unsigned guilty;
> +

         /* Time when this context was last blamed for a GPU reset.
          */
> +	unsigned long last_guilty_reset;
> +
> +	/* banned to submit more work */
> +	bool banned;
> +};
>
>   /* This must match up with the value previously used for execbuf2.rsvd1. */
>   #define DEFAULT_CONTEXT_ID 0
> @@ -443,6 +456,7 @@ struct i915_hw_context {
>   	struct drm_i915_file_private *file_priv;
>   	struct intel_ring_buffer *ring;
>   	struct drm_i915_gem_object *obj;
> +	struct ctx_reset_state reset_state;
>   };
>
>   enum no_fbc_reason {
> @@ -805,6 +819,7 @@ struct i915_gpu_error {
>
>   	unsigned long last_reset;
>
> +	unsigned long guilty_cnt;
>   	/**
>   	 * State variable and reset counter controlling the reset flow
>   	 *
> @@ -1257,6 +1272,8 @@ struct drm_i915_file_private {
>   		struct list_head request_list;
>   	} mm;
>   	struct idr context_idr;
> +
> +	struct ctx_reset_state reset_state;
>   };
>
>   #define INTEL_INFO(dev)	(((struct drm_i915_private *) (dev)->dev_private)->info)
> @@ -1677,6 +1694,8 @@ struct i915_hw_context * __must_check
>   i915_switch_context(struct intel_ring_buffer *ring,
>   		    struct drm_file *file, int to_id);
>   void i915_gem_context_free(struct kref *ctx_ref);
> +void i915_gem_context_init_reset_state(struct drm_device *dev,
> +				       struct ctx_reset_state *rs);
>   int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   				  struct drm_file *file);
>   int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 8fb4d3c..dbd14b8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -145,6 +145,15 @@ static void do_destroy(struct i915_hw_context *ctx)
>   	kfree(ctx);
>   }
>
> +void i915_gem_context_init_reset_state(struct drm_device *dev,
> +				       struct ctx_reset_state *rs)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	rs->reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
> +	rs->guilty_cnt = dev_priv->gpu_error.guilty_cnt;
> +}
> +
>   static struct i915_hw_context *
>   create_hw_context(struct drm_device *dev,
>   		  struct drm_i915_file_private *file_priv)
> @@ -177,6 +186,8 @@ create_hw_context(struct drm_device *dev,
>
>   	ctx->file_priv = file_priv;
>
> +	i915_gem_context_init_reset_state(dev, &ctx->reset_state);
> +
>   again:
>   	if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) {
>   		ret = -ENOMEM;
>

  parent reply	other threads:[~2013-03-18 20:19 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-14 15:52 [PATCH v2 00/16] arb robustness enablers v2 Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 01/16] drm/i915: return context from i915_switch_context() Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 02/16] drm/i915: cleanup i915_add_request Mika Kuoppala
2013-03-15  9:43   ` Chris Wilson
2013-03-14 15:52 ` [PATCH v2 03/16] drm/i915: reference count for i915_hw_contexts Mika Kuoppala
2013-03-15  9:44   ` Chris Wilson
2013-04-02 22:45   ` [PATCH 1/2] [v3] " Ben Widawsky
2013-04-02 22:45     ` [PATCH 2/2] drm/i915: Print all contexts in debugfs Ben Widawsky
2013-04-03  1:27     ` [PATCH 1/2] [v3] drm/i915: reference count for i915_hw_contexts Ben Widawsky
2013-04-03 11:56       ` Chris Wilson
2013-04-03 16:37         ` Daniel Vetter
2013-03-14 15:52 ` [PATCH v2 04/16] drm/i915: Resurrect ring kicking for semaphores, selectively Mika Kuoppala
2013-03-17 21:53   ` Daniel Vetter
2013-03-14 15:52 ` [PATCH v2 05/16] drm/i915: pass seqno to i915_hangcheck_ring_idle Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 06/16] drm/i915: track ring progression using seqnos Mika Kuoppala
2013-03-15  9:48   ` Chris Wilson
2013-03-14 15:52 ` [PATCH v2 07/16] drm/i915: introduce i915_hangcheck_ring_hung Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 08/16] drm/i915: detect hang using per ring hangcheck_score Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 09/16] drm/i915: remove i915_hangcheck_hung Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 10/16] drm/i915: add struct ctx_reset_state Mika Kuoppala
2013-03-15  9:52   ` Chris Wilson
2013-03-17 21:51     ` Daniel Vetter
2013-03-18 20:18   ` Ian Romanick [this message]
2013-03-14 15:52 ` [PATCH v2 11/16] drm/i915: add reset_state for hw_contexts Mika Kuoppala
2013-03-15  9:53   ` Chris Wilson
2013-03-14 15:52 ` [PATCH v2 12/16] drm/i915: add batch object and context to i915_add_request() Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 13/16] drm/i915: mark rings which were waiting when hang happened Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 14/16] drm/i915: find guilty batch buffer on ring resets Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 15/16] drm/i915: refuse to submit more batchbuffers from guilty context Mika Kuoppala
2013-03-14 15:52 ` [PATCH v2 16/16] drm/i915: add i915_gem_context_get_reset_status_ioctl Mika Kuoppala
2013-03-15 10:01   ` Chris Wilson
2013-03-18 20:26   ` Ian Romanick
2013-03-19 12:58     ` Mika Kuoppala
2013-03-19 19:02       ` Ian Romanick
2013-03-19 19:21         ` Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51477696.4040807@freedesktop.org \
    --to=idr@freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@linux.intel.com \
    --cc=miku@iki.fi \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.