Re: [Intel-gfx] [PATCH 6/7] drm/i915/guc: Copy new GuC error capture logs upon G2H notification.

From: "Teres Alexis, Alan Previn" <alan.previn.teres.alexis@intel.com>
To: "intel-gfx@lists.freedesktop.org" <intel-gfx@lists.freedesktop.org>
Subject: Re: [Intel-gfx] [PATCH 6/7] drm/i915/guc: Copy new GuC error capture logs upon G2H notification.
Date: Wed, 19 Jan 2022 01:36:18 +0000	[thread overview]
Message-ID: <304ed4cc83bf360a2d8c7b89abe1fdb63a88bc10.camel@intel.com> (raw)
In-Reply-To: <20220118100358.1329655-7-alan.previn.teres.alexis@intel.com>

A fresh round of offline design discussions with internal team has decided 
that:

 - we dont want to use an interim circular buffer to copy all of the GuC
   generated register dumps of one or more 'engine-capture-groups'.
 - instead, we shall dynamically allocate multiple nodes, each node being
   a single "engine-capture-dump". A link list of one or many engine-capture-
   dumps would result from a single engine-capture-group.
 - this dynamic allocation should happen during the G2H error-capture
   notification event which happens before the corresponding G2H context-
   reset that triggers the i915_gpu_coredump (where we want to avoid
   memory allocation moving forward).
 - we also realize that during the link-list allocation we would need
   a first-parse of the guc-log-error-state-capture head-to-tail entries
   in order to duplicate global and engine-class register dumps for each
   each engine instance register dump if we find dependent-engine resets
   in a engine-capture-group.
 - later when i915_gpu_coredump calls into capture_engine, we finally
   attach the corresponding node from the link list above (detaching it
   from that link list) into i915_gpu_coredump's intel_engine_coredump
   structure when have matching LRCA/guc-id/engine-instace.
 - we would also have to add a flag through i915_gpu_coredump top level
   trigger through to capture_engine to indicate if the capture was triggered
   via a guc context reset or a forced user reset or gt-reset. In the latter
   case (user/gt reset), we should capture the register values directly
   from the HW, i.e. the pre-guc behavior without matching against GuC.

...alan

On Tue, 2022-01-18 at 02:03 -0800, Alan Previn wrote:

> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> index b637628905ec..fc80c5f31915 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c+static void __guc_capture_store_snapshot_work(struct intel_guc *guc)
..
..
> +{
> +	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
> +	unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_count;
> +	struct guc_log_buffer_state *log_buf_state;
> +	struct guc_log_buffer_state log_buf_state_local;
> +	void *src_data, *dst_data = NULL;
> +	bool new_overflow;
> +
> +	/* Lock to get the pointer to GuC capture-log-buffer-state */
> +	mutex_lock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock);
> +	log_buf_state = guc->log.buf_addr +
> +			(sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER);
> +	src_data = guc->log.buf_addr + intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER);
> +
> +	/*
> +	 * Make a copy of the state structure, inside GuC log buffer
> +	 * (which is uncached mapped), on the stack to avoid reading
> +	 * from it multiple times.
> +	 */
> +	memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state));
> +	buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER);
> +	read_offset = log_buf_state_local.read_ptr;
> +	write_offset = log_buf_state_local.sampled_write_ptr;
> +	full_count = log_buf_state_local.buffer_full_cnt;
> +
> +	/* Bookkeeping stuff */
> +	guc->log_state[GUC_CAPTURE_LOG_BUFFER].flush += log_buf_state_local.flush_to_file;
> +	new_overflow = intel_guc_check_log_buf_overflow(guc,
> +							&guc->log_state[GUC_CAPTURE_LOG_BUFFER],
> +							full_count);
> +
> +	/* Update the state of shared log buffer */
> +	log_buf_state->read_ptr = write_offset;
> +	log_buf_state->flush_to_file = 0;
> +
> +	mutex_unlock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock);
> +
> +	dst_data = guc->capture.priv->out_store.addr;
> +	if (dst_data) {
> +		mutex_lock(&guc->capture.priv->out_store.lock);
> +
> +		/* Now copy the actual logs. */
> +		if (unlikely(new_overflow)) {
> +			/* copy the whole buffer in case of overflow */
> +			read_offset = 0;
> +			write_offset = buffer_size;
> +		} else if (unlikely((read_offset > buffer_size) ||
> +			   (write_offset > buffer_size))) {
> +			drm_err(&i915->drm, "invalid GuC log capture buffer state!\n");
> +			/* copy whole buffer as offsets are unreliable */
> +			read_offset = 0;
> +			write_offset = buffer_size;
> +		}
> +
> +		/* first copy from the tail end of the GuC log capture buffer */
> +		if (read_offset > write_offset) {
> +			guc_capture_store_insert(guc, &guc->capture.priv->out_store, src_data,
> +						 write_offset);
> +			bytes_to_copy = buffer_size - read_offset;
> +		} else {
> +			bytes_to_copy = write_offset - read_offset;
> +		}
> +		guc_capture_store_insert(guc, &guc->capture.priv->out_store, src_data + read_offset,
> +					 bytes_to_copy);
> +
> +		mutex_unlock(&guc->capture.priv->out_store.lock);
> +	}
> +}
> +
> +void intel_guc_capture_store_snapshot(struct intel_guc *guc)
> +{
> +	if (guc->capture.priv)
> +		__guc_capture_store_snapshot_work(guc);
> +}
> +
> +static void guc_capture_store_destroy(struct intel_guc *guc)
> +{
> +	mutex_destroy(&guc->capture.priv->out_store.lock);
> +	guc->capture.priv->out_store.size = 0;
> +	kfree(guc->capture.priv->out_store.addr);
> +	guc->capture.priv->out_store.addr = NULL;
> +}
> +
> +static int guc_capture_store_create(struct intel_guc *guc)
> +{
> +	/*
> +	 * Make this interim buffer larger than GuC capture output buffer so that we can absorb
> +	 * a little delay when processing the raw capture dumps into text friendly logs
> +	 * for the i915_gpu_coredump output
> +	 */
> +	size_t max_dump_size;
> +
> +	GEM_BUG_ON(guc->capture.priv->out_store.addr);
> +
> +	max_dump_size = PAGE_ALIGN(intel_guc_capture_output_min_size_est(guc));
> +	max_dump_size = roundup_pow_of_two(max_dump_size);
> +
> +	guc->capture.priv->out_store.addr = kzalloc(max_dump_size, GFP_KERNEL);
> +	if (!guc->capture.priv->out_store.addr)
> +		return -ENOMEM;
> +
> +	guc->capture.priv->out_store.size = max_dump_size;
> +	mutex_init(&guc->capture.priv->out_store.lock);
> +
> +	return 0;
> +}
> +