All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v12 00/11] drm/i915: Vulkan performance query support
@ 2019-08-30 14:47 Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 01/11] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
                   ` (11 more replies)
  0 siblings, 12 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

Hi all,

This moves locking a bit, hopefully in the right direction so that it
helps dropping the use of struct_mutex.

Cheers,

Lionel Landwerlin (11):
  drm/i915: introduce a mechanism to extend execbuf2
  drm/i915: add syncobj timeline support
  drm/i915/perf: drop list of streams
  drm/i915/perf: store the associated engine of a stream
  drm/i915/perf: introduce a versioning of the i915-perf uapi
  drm/i915/perf: allow for CS OA configs to be created lazily
  drm/i915/perf: implement active wait for noa configurations
  drm/i915/perf: execute OA configuration from command stream
  drm/i915: add a new perf configuration execbuf parameter
  drm/i915/perf: allow holding preemption on filtered ctx
  drm/i915: add support for perf configuration queries

 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 505 +++++++++++--
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  25 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h      |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c           |  31 +
 drivers/gpu/drm/i915/i915_drv.c               |   3 +-
 drivers/gpu/drm/i915/i915_drv.h               |  63 +-
 drivers/gpu/drm/i915/i915_getparam.c          |   9 +
 drivers/gpu/drm/i915/i915_perf.c              | 713 +++++++++++++++---
 drivers/gpu/drm/i915/i915_perf.h              |  27 +
 drivers/gpu/drm/i915/i915_query.c             | 283 +++++++
 drivers/gpu/drm/i915/i915_reg.h               |   4 +-
 include/uapi/drm/i915_drm.h                   | 196 ++++-
 12 files changed, 1683 insertions(+), 181 deletions(-)

--
2.23.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v12 01/11] drm/i915: introduce a mechanism to extend execbuf2
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 02/11] drm/i915: add syncobj timeline support Lionel Landwerlin
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

We're planning to use this for a couple of new feature where we need
to provide additional parameters to execbuf.

v2: Check for invalid flags in execbuffer2 (Lionel)

v3: Rename I915_EXEC_EXT -> I915_EXEC_USE_EXTENSIONS (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
 include/uapi/drm/i915_drm.h                   | 26 +++++++++++--
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 27dbcb508055..4f5fd946ab28 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,6 +25,7 @@
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 
 enum {
 	FORCE_CPU_RELOC = 1,
@@ -272,6 +273,10 @@ struct i915_execbuffer {
 	 */
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
+
+	struct {
+		u64 flags; /** Available extensions parameters */
+	} extensions;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1940,7 +1945,8 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 		return false;
 
 	/* Kernel clipping was a DRI1 misfeature */
-	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+	if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
+			     I915_EXEC_USE_EXTENSIONS))) {
 		if (exec->num_cliprects || exec->cliprects_ptr)
 			return false;
 	}
@@ -2442,6 +2448,33 @@ signal_fence_array(struct i915_execbuffer *eb,
 	}
 }
 
+static const i915_user_extension_fn execbuf_extensions[] = {
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+			  struct i915_execbuffer *eb)
+{
+	eb->extensions.flags = 0;
+
+	if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
+		return 0;
+
+	/* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+	 * have another flag also using it at the same time.
+	 */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (args->num_cliprects != 0)
+		return -EINVAL;
+
+	return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+				    execbuf_extensions,
+				    ARRAY_SIZE(execbuf_extensions),
+				    eb);
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
@@ -2488,6 +2521,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		eb.batch_flags |= I915_DISPATCH_PINNED;
 
+	err = parse_execbuf2_extensions(args, &eb);
+	if (err)
+		return err;
+
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
 		if (!in_fence)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 469dc512cca3..0a99c26730e1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1007,6 +1007,10 @@ struct drm_i915_gem_exec_fence {
 	__u32 flags;
 };
 
+enum drm_i915_gem_execbuffer_ext {
+	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -1023,8 +1027,15 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 num_cliprects;
 	/**
 	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
-	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
-	 * struct drm_i915_gem_exec_fence *fences.
+	 * & I915_EXEC_USE_EXTENSIONS are not set.
+	 *
+	 * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+	 * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+	 * of the array.
+	 *
+	 * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+	 * single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is
+	 * 0.
 	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (0x3f)
@@ -1142,7 +1153,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_SUBMIT		(1 << 20)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS	(1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 02/11] drm/i915: add syncobj timeline support
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 01/11] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 03/11] drm/i915/perf: drop list of streams Lionel Landwerlin
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.

v2: Reuse i915_user_extension_fn

v3: Check that the chained extension is only present once (Chris)

v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel)

v5: Use BIT_ULL (Chris)

v6: Fix issue with already signaled timeline points,
    dma_fence_chain_find_seqno() setting fence to NULL (Chris)

v7: Report ENOENT with invalid syncobj handle (Lionel)

v8: Check for out of order timeline point insertion (Chris)

v9: After explanations on
    https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
    drop the ordering check from v8 (Lionel)

v10: Set first extension enum item to 1 (Jason)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 307 ++++++++++++++----
 drivers/gpu/drm/i915/i915_drv.c               |   3 +-
 drivers/gpu/drm/i915/i915_getparam.c          |   1 +
 include/uapi/drm/i915_drm.h                   |  39 +++
 4 files changed, 293 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4f5fd946ab28..46ad8d9642d1 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -214,6 +214,13 @@ enum {
  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
  */
 
+struct i915_eb_fences {
+	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+	struct dma_fence *dma_fence;
+	u64 value;
+	struct dma_fence_chain *chain_fence;
+};
+
 struct i915_execbuffer {
 	struct drm_i915_private *i915; /** i915 backpointer */
 	struct drm_file *file; /** per-file lookup tables and limits */
@@ -276,6 +283,7 @@ struct i915_execbuffer {
 
 	struct {
 		u64 flags; /** Available extensions parameters */
+		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
 	} extensions;
 };
 
@@ -2320,67 +2328,217 @@ eb_pin_engine(struct i915_execbuffer *eb,
 }
 
 static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
 {
-	while (n--)
-		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+	while (n--) {
+		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+		dma_fence_put(fences[n].dma_fence);
+		kfree(fences[n].chain_fence);
+	}
 	kvfree(fences);
 }
 
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_file *file)
+static struct i915_eb_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+		&eb->extensions.timeline_fences;
+	struct drm_i915_gem_exec_fence __user *user_fences;
+	struct i915_eb_fences *fences;
+	u64 __user *user_values;
+	u64 num_fences, num_user_fences = timeline_fences->fence_count;
+	unsigned long n;
+	int err;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (num_user_fences > min_t(unsigned long,
+				    ULONG_MAX / sizeof(*user_fences),
+				    SIZE_MAX / sizeof(*fences)))
+		return ERR_PTR(-EINVAL);
+
+	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
+		return ERR_PTR(-EFAULT);
+
+	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
+		return ERR_PTR(-EFAULT);
+
+	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
+				__GFP_NOWARN | GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+
+	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+		u64 point;
+
+		if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (__get_user(point, user_values++)) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+		if (!syncobj) {
+			DRM_DEBUG("Invalid syncobj handle provided\n");
+			err = -ENOENT;
+			goto err;
+		}
+
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				err = -EINVAL;
+				goto err;
+			}
+
+			err = dma_fence_chain_find_seqno(&fence, point);
+			if (err) {
+				DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+				drm_syncobj_put(syncobj);
+				goto err;
+			}
+
+			/* A point might have been signaled already and
+			 * garbage collected from the timeline. In this case
+			 * just ignore the point and carry on.
+			 */
+			if (!fence) {
+				drm_syncobj_put(syncobj);
+				continue;
+			}
+		}
+
+		/*
+		 * For timeline syncobjs we need to preallocate chains for
+		 * later signaling.
+		 */
+		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+			/*
+			 * Waiting and signaling the same point (when point !=
+			 * 0) would break the timeline.
+			 */
+			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+				DRM_DEBUG("Tring to wait & signal the same timeline point.\n");
+				err = -EINVAL;
+				drm_syncobj_put(syncobj);
+				goto err;
+			}
+
+			fences[num_fences].chain_fence =
+				kmalloc(sizeof(*fences[num_fences].chain_fence),
+					GFP_KERNEL);
+			if (!fences[num_fences].chain_fence) {
+				drm_syncobj_put(syncobj);
+				err = -ENOMEM;
+				DRM_DEBUG("Unable to alloc chain_fence\n");
+				goto err;
+			}
+		} else {
+			fences[num_fences].chain_fence = NULL;
+		}
+
+		fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[num_fences].dma_fence = fence;
+		fences[num_fences].value = point;
+		num_fences++;
+	}
+
+	*out_n_fences = num_fences;
+
+	return fences;
+
+err:
+	__free_fence_array(fences, num_fences);
+	return ERR_PTR(err);
+}
+
+static struct i915_eb_fences *
+get_legacy_fence_array(struct i915_execbuffer *eb,
+		       int *out_n_fences)
 {
-	const unsigned long nfences = args->num_cliprects;
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
 	struct drm_i915_gem_exec_fence __user *user;
-	struct drm_syncobj **fences;
+	struct i915_eb_fences *fences;
+	const u32 num_fences = args->num_cliprects;
 	unsigned long n;
 	int err;
 
-	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
-		return NULL;
+	*out_n_fences = num_fences;
 
 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
-	if (nfences > min_t(unsigned long,
-			    ULONG_MAX / sizeof(*user),
-			    SIZE_MAX / sizeof(*fences)))
+	if (*out_n_fences > min_t(unsigned long,
+				  ULONG_MAX / sizeof(*user),
+				  SIZE_MAX / sizeof(*fences)))
 		return ERR_PTR(-EINVAL);
 
 	user = u64_to_user_ptr(args->cliprects_ptr);
-	if (!access_ok(user, nfences * sizeof(*user)))
+	if (!access_ok(user, *out_n_fences * sizeof(*user)))
 		return ERR_PTR(-EFAULT);
 
-	fences = kvmalloc_array(nfences, sizeof(*fences),
+	fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
 				__GFP_NOWARN | GFP_KERNEL);
 	if (!fences)
 		return ERR_PTR(-ENOMEM);
 
-	for (n = 0; n < nfences; n++) {
-		struct drm_i915_gem_exec_fence fence;
+	for (n = 0; n < *out_n_fences; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
 		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
 
-		if (__copy_from_user(&fence, user++, sizeof(fence))) {
+		if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
 			err = -EFAULT;
 			goto err;
 		}
 
-		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
 			err = -EINVAL;
 			goto err;
 		}
 
-		syncobj = drm_syncobj_find(file, fence.handle);
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
 		if (!syncobj) {
 			DRM_DEBUG("Invalid syncobj handle provided\n");
 			err = -ENOENT;
 			goto err;
 		}
 
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				err = -EINVAL;
+				goto err;
+			}
+		}
+
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[n].dma_fence = fence;
+		fences[n].value = 0;
+		fences[n].chain_fence = NULL;
 	}
 
 	return fences;
@@ -2390,37 +2548,44 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 	return ERR_PTR(err);
 }
 
+static struct i915_eb_fences *
+get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return get_legacy_fence_array(eb, out_n_fences);
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return get_timeline_fence_array(eb, out_n_fences);
+
+	*out_n_fences = 0;
+	return NULL;
+}
+
 static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_syncobj **fences)
+put_fence_array(struct i915_eb_fences *fences, int nfences)
 {
 	if (fences)
-		__free_fence_array(fences, args->num_cliprects);
+		__free_fence_array(fences, nfences);
 }
 
 static int
 await_fence_array(struct i915_execbuffer *eb,
-		  struct drm_syncobj **fences)
+		  struct i915_eb_fences *fences,
+		  int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	unsigned int n;
 	int err;
 
 	for (n = 0; n < nfences; n++) {
 		struct drm_syncobj *syncobj;
-		struct dma_fence *fence;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_WAIT))
 			continue;
 
-		fence = drm_syncobj_fence_get(syncobj);
-		if (!fence)
-			return -EINVAL;
-
-		err = i915_request_await_dma_fence(eb->request, fence);
-		dma_fence_put(fence);
+		err = i915_request_await_dma_fence(eb->request,
+						   fences[n].dma_fence);
 		if (err < 0)
 			return err;
 	}
@@ -2430,9 +2595,9 @@ await_fence_array(struct i915_execbuffer *eb,
 
 static void
 signal_fence_array(struct i915_execbuffer *eb,
-		   struct drm_syncobj **fences)
+		   struct i915_eb_fences *fences,
+		   int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	struct dma_fence * const fence = &eb->request->fence;
 	unsigned int n;
 
@@ -2440,15 +2605,46 @@ signal_fence_array(struct i915_execbuffer *eb,
 		struct drm_syncobj *syncobj;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
 			continue;
 
-		drm_syncobj_replace_fence(syncobj, fence);
+		if (fences[n].chain_fence) {
+			drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+					      fence, fences[n].value);
+			/*
+			 * The chain's ownership is transfered to the
+			 * timeline.
+			 */
+			fences[n].chain_fence = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
 	}
 }
 
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	/* Timeline fences are incompatible with the fence array flag. */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.timeline_fences, ext,
+			   sizeof(eb->extensions.timeline_fences)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
+        [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
 };
 
 static int
@@ -2479,14 +2675,15 @@ static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
-		       struct drm_i915_gem_exec_object2 *exec,
-		       struct drm_syncobj **fences)
+		       struct drm_i915_gem_exec_object2 *exec)
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
 	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
+	struct i915_eb_fences *fences = NULL;
 	int out_fence_fd = -1;
+	int nfences = 0;
 	int err;
 
 	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
@@ -2525,10 +2722,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err)
 		return err;
 
+	fences = get_fence_array(&eb, &nfences);
+	if (IS_ERR(fences))
+		return PTR_ERR(fences);
+
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
-		if (!in_fence)
-			return -EINVAL;
+		if (!in_fence) {
+			err = -EINVAL;
+			goto err_fences;
+		}
 	}
 
 	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
@@ -2673,7 +2876,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	}
 
 	if (fences) {
-		err = await_fence_array(&eb, fences);
+		err = await_fence_array(&eb, fences, nfences);
 		if (err)
 			goto err_request;
 	}
@@ -2704,7 +2907,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_request_add(eb.request);
 
 	if (fences)
-		signal_fence_array(&eb, fences);
+		signal_fence_array(&eb, fences, nfences);
 
 	if (out_fence) {
 		if (err == 0) {
@@ -2739,6 +2942,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
+err_fences:
+	put_fence_array(fences, nfences);
 	return err;
 }
 
@@ -2832,7 +3037,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 			exec2_list[i].flags = 0;
 	}
 
-	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
 	if (exec2.flags & __EXEC_HAS_RELOC) {
 		struct drm_i915_gem_exec_object __user *user_exec_list =
 			u64_to_user_ptr(args->buffers_ptr);
@@ -2863,7 +3068,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list;
-	struct drm_syncobj **fences = NULL;
 	const size_t count = args->buffer_count;
 	int err;
 
@@ -2891,15 +3095,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	if (args->flags & I915_EXEC_FENCE_ARRAY) {
-		fences = get_fence_array(args, file);
-		if (IS_ERR(fences)) {
-			kvfree(exec2_list);
-			return PTR_ERR(fences);
-		}
-	}
-
-	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
 
 	/*
 	 * Now that we have begun execution of the batchbuffer, we ignore
@@ -2939,7 +3135,6 @@ end:;
 	}
 
 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
-	put_fence_array(args, fences);
 	kvfree(exec2_list);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bec25942d77d..4e21858fe110 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2820,7 +2820,8 @@ static struct drm_driver driver = {
 	 */
 	.driver_features =
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
 	.release = i915_driver_release,
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 5d9101376a3d..da6faa84e5b8 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -130,6 +130,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 0a99c26730e1..3d031e81648b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait {
  * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
  */
 #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 54
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1008,9 +1015,41 @@ struct drm_i915_gem_exec_fence {
 };
 
 enum drm_i915_gem_execbuffer_ext {
+	/**
+	 * See drm_i915_gem_execbuf_ext_timeline_fences.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+	struct i915_user_extension base;
+
+	/**
+	 * Number of element in the handles_ptr & value_ptr arrays.
+	 */
+	__u64 fence_count;
+
+	/**
+	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
+	 * fence_count.
+	 */
+	__u64 handles_ptr;
+
+	/**
+	 * Pointer to an array of u64 values of length fence_count. Values
+	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+	 */
+	__u64 values_ptr;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 03/11] drm/i915/perf: drop list of streams
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 01/11] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 02/11] drm/i915: add syncobj timeline support Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 04/11] drm/i915/perf: store the associated engine of a stream Lionel Landwerlin
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

At some point in time there was the idea that we could have multiple
stream from the same piece of HW but that never materialized and given
the hard time we already have making everything work with the
submission side, there is no real point having this list of 1 element
around.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  6 ------
 drivers/gpu/drm/i915/i915_perf.c | 16 +---------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index db7480831e52..75607450ba00 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1082,11 +1082,6 @@ struct i915_perf_stream {
 	 */
 	struct drm_i915_private *dev_priv;
 
-	/**
-	 * @link: Links the stream into ``&drm_i915_private->streams``
-	 */
-	struct list_head link;
-
 	/**
 	 * @wakeref: As we keep the device awake while the perf stream is
 	 * active, we track our runtime pm reference for later release.
@@ -1671,7 +1666,6 @@ struct drm_i915_private {
 		 * except exclusive_stream.
 		 */
 		struct mutex lock;
-		struct list_head streams;
 
 		/*
 		 * The stream currently using the OA unit. If accessed
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2c9f46e12622..5fd270e77a88 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1435,9 +1435,6 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
 	 */
 	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
 
-	/* Maybe make ->pollin per-stream state if we support multiple
-	 * concurrent streams in the future.
-	 */
 	stream->pollin = false;
 }
 
@@ -1494,10 +1491,6 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
 	 */
 	memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
 
-	/*
-	 * Maybe make ->pollin per-stream state if we support multiple
-	 * concurrent streams in the future.
-	 */
 	stream->pollin = false;
 }
 
@@ -2630,8 +2623,6 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
 
-	list_del(&stream->link);
-
 	if (stream->ctx)
 		i915_gem_context_put(stream->ctx);
 
@@ -2780,8 +2771,6 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		goto err_flags;
 	}
 
-	list_add(&stream->link, &dev_priv->perf.streams);
-
 	if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
 		f_flags |= O_CLOEXEC;
 	if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
@@ -2790,7 +2779,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
 	if (stream_fd < 0) {
 		ret = stream_fd;
-		goto err_open;
+		goto err_flags;
 	}
 
 	if (!(param->flags & I915_PERF_FLAG_DISABLED))
@@ -2803,8 +2792,6 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 
 	return stream_fd;
 
-err_open:
-	list_del(&stream->link);
 err_flags:
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
@@ -3640,7 +3627,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (dev_priv->perf.ops.enable_metric_set) {
-		INIT_LIST_HEAD(&dev_priv->perf.streams);
 		mutex_init(&dev_priv->perf.lock);
 
 		oa_sample_rate_hard_limit = 1000 *
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 04/11] drm/i915/perf: store the associated engine of a stream
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (2 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 03/11] drm/i915/perf: drop list of streams Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 20:59   ` kbuild test robot
  2019-08-30 14:47 ` [PATCH v12 05/11] drm/i915/perf: introduce a versioning of the i915-perf uapi Lionel Landwerlin
                   ` (7 subsequent siblings)
  11 siblings, 1 reply; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

We'll use this information later to verify that a client trying to
reconfigure the stream does so on the right engine.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  | 5 +++++
 drivers/gpu/drm/i915/i915_perf.c | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 75607450ba00..274a1193d4f0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1088,6 +1088,11 @@ struct i915_perf_stream {
 	 */
 	intel_wakeref_t wakeref;
 
+	/**
+	 * @engine: Engine associated with this performance stream.
+	 */
+	struct intel_engine_cs *engine;
+
 	/**
 	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
 	 * properties given when opening a stream, representing the contents
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5fd270e77a88..221307822ab2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -363,6 +363,8 @@ struct perf_open_properties {
 	int oa_format;
 	bool oa_periodic;
 	int oa_period_exponent;
+
+	struct intel_engine_cs *engine;
 };
 
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
@@ -2201,6 +2203,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
 	format_size = dev_priv->perf.oa_formats[props->oa_format].size;
 
+	stream->engine = props->engine;
+
 	stream->sample_flags |= SAMPLE_OA_REPORT;
 	stream->sample_size += format_size;
 
@@ -2840,6 +2844,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		return -EINVAL;
 	}
 
+	/* At the moment we only support using i915-perf on the RCS. */
+	props->engine = dev_priv->engine[RCS0];
+
 	/* Considering that ID = 0 is reserved and assuming that we don't
 	 * (currently) expect any configurations to ever specify duplicate
 	 * values for a particular property ID then the last _PROP_MAX value is
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 05/11] drm/i915/perf: introduce a versioning of the i915-perf uapi
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (3 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 04/11] drm/i915/perf: store the associated engine of a stream Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 06/11] drm/i915/perf: allow for CS OA configs to be created lazily Lionel Landwerlin
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

Reporting this version will help application figure out what level of
the support the running kernel provides.

v2: Add i915_perf_ioctl_version() (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_getparam.c |  4 ++++
 drivers/gpu/drm/i915/i915_perf.c     | 10 ++++++++++
 drivers/gpu/drm/i915/i915_perf.h     |  1 +
 include/uapi/drm/i915_drm.h          | 20 ++++++++++++++++++++
 4 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index da6faa84e5b8..bd41cc5ce906 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -5,6 +5,7 @@
 #include "gt/intel_engine_user.h"
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 
 int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
@@ -157,6 +158,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_MMAP_GTT_COHERENT:
 		value = INTEL_INFO(i915)->has_coherent_ggtt;
 		break;
+	case I915_PARAM_PERF_REVISION:
+		value = i915_perf_ioctl_version();
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 221307822ab2..e8f4ebcc9bbc 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3694,3 +3694,13 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
 
 	dev_priv->perf.initialized = false;
 }
+
+/**
+ * i915_perf_ioctl_version - Version of the i915-perf subsystem
+ *
+ * This version number is used by userspace to detect available features.
+ */
+int i915_perf_ioctl_version(void)
+{
+	return 1;
+}
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index a412b16d9ffc..95549de65212 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -18,6 +18,7 @@ void i915_perf_init(struct drm_i915_private *i915);
 void i915_perf_fini(struct drm_i915_private *i915);
 void i915_perf_register(struct drm_i915_private *i915);
 void i915_perf_unregister(struct drm_i915_private *i915);
+int i915_perf_ioctl_version(void);
 
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3d031e81648b..e98c9a7baa91 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -618,6 +618,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 54
 
+/*
+ * Revision of the i915-perf uAPI. The value returned helps determine what
+ * i915-perf features are available. See drm_i915_perf_property_id.
+ */
+#define I915_PARAM_PERF_REVISION	55
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1903,23 +1909,31 @@ enum drm_i915_perf_property_id {
 	 * Open the stream for a specific context handle (as used with
 	 * execbuffer2). A stream opened for a specific context this way
 	 * won't typically require root privileges.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_CTX_HANDLE = 1,
 
 	/**
 	 * A value of 1 requests the inclusion of raw OA unit reports as
 	 * part of stream samples.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_SAMPLE_OA,
 
 	/**
 	 * The value specifies which set of OA unit metrics should be
 	 * be configured, defining the contents of any OA unit reports.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_OA_METRICS_SET,
 
 	/**
 	 * The value specifies the size and layout of OA unit reports.
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_OA_FORMAT,
 
@@ -1929,6 +1943,8 @@ enum drm_i915_perf_property_id {
 	 * from this exponent as follows:
 	 *
 	 *   80ns * 2^(period_exponent + 1)
+	 *
+	 * This property is available in perf revision 1.
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
@@ -1960,6 +1976,8 @@ struct drm_i915_perf_open_param {
  * to close and re-open a stream with the same configuration.
  *
  * It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
  */
 #define I915_PERF_IOCTL_ENABLE	_IO('i', 0x0)
 
@@ -1967,6 +1985,8 @@ struct drm_i915_perf_open_param {
  * Disable data capture for a stream.
  *
  * It is an error to try and read a stream that is disabled.
+ *
+ * This ioctl is available in perf revision 1.
  */
 #define I915_PERF_IOCTL_DISABLE	_IO('i', 0x1)
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 06/11] drm/i915/perf: allow for CS OA configs to be created lazily
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (4 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 05/11] drm/i915/perf: introduce a versioning of the i915-perf uapi Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 21:39   ` kbuild test robot
  2019-08-30 14:47 ` [PATCH v12 07/11] drm/i915/perf: implement active wait for noa configurations Lionel Landwerlin
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
    Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
    (Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_drv.h              |  19 +-
 drivers/gpu/drm/i915/i915_perf.c             | 270 ++++++++++++++++---
 drivers/gpu/drm/i915/i915_perf.h             |  26 ++
 4 files changed, 274 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 86e00a2db8a4..a7f1377a54a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -133,6 +133,7 @@
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
 #define   MI_LRI_FORCE_POSTED		(1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 274a1193d4f0..c0ff6f0fd33e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -990,6 +990,8 @@ struct i915_oa_reg {
 };
 
 struct i915_oa_config {
+	struct drm_i915_private *i915;
+
 	char uuid[UUID_STRING_LEN + 1];
 	int id;
 
@@ -1004,7 +1006,7 @@ struct i915_oa_config {
 	struct attribute *attrs[2];
 	struct device_attribute sysfs_metric_id;
 
-	atomic_t ref_count;
+	struct kref ref;
 };
 
 struct i915_perf_stream;
@@ -1126,11 +1128,22 @@ struct i915_perf_stream {
 	 */
 	const struct i915_perf_stream_ops *ops;
 
+	/**
+	 * @active_config_mutex: Protects access to @oa_config & @oa_config_bos.
+	 */
+	struct mutex config_mutex;
+
 	/**
 	 * @oa_config: The OA configuration used by the stream.
 	 */
 	struct i915_oa_config *oa_config;
 
+	/**
+	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
+	 * each time @oa_config changes.
+	 */
+	struct list_head oa_config_bos;
+
 	/**
 	 * The OA context specific information.
 	 */
@@ -1661,8 +1674,8 @@ struct drm_i915_private {
 		struct mutex metrics_lock;
 
 		/*
-		 * List of dynamic configurations, you need to hold
-		 * dev_priv->perf.metrics_lock to access it.
+		 * List of dynamic configurations (struct i915_oa_config), you
+		 * need to hold dev_priv->perf.metrics_lock to access it.
 		 */
 		struct idr metrics_idr;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e8f4ebcc9bbc..08869660f1f2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -367,11 +367,19 @@ struct perf_open_properties {
 	struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+	struct list_head link;
+
+	struct i915_oa_config *oa_config;
+	struct drm_i915_gem_object *bo;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct drm_i915_private *dev_priv,
-			   struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+	struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref);
+
 	if (!PTR_ERR(oa_config->flex_regs))
 		kfree(oa_config->flex_regs);
 	if (!PTR_ERR(oa_config->b_counter_regs))
@@ -381,40 +389,194 @@ static void free_oa_config(struct drm_i915_private *dev_priv,
 	kfree(oa_config);
 }
 
-static void put_oa_config(struct drm_i915_private *dev_priv,
-			  struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs)
 {
-	if (!atomic_dec_and_test(&oa_config->ref_count))
-		return;
+	u32 i;
+
+	for (i = 0; i < n_regs; i++) {
+		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+			u32 n_lri = min(n_regs - i,
+					(u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
 
-	free_oa_config(dev_priv, oa_config);
+			*cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+		}
+		*cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+		*cs++ = reg_data[i].value;
+	}
+
+	return cs;
 }
 
-static int get_oa_config(struct drm_i915_private *dev_priv,
-			 int metrics_set,
-			 struct i915_oa_config **out_config)
+static struct i915_oa_config_bo* alloc_oa_config_buffer(struct drm_i915_private *i915,
+							struct i915_oa_config *oa_config)
 {
-	int ret;
+	struct i915_oa_config_bo *oa_bo;
+	size_t config_length = 0;
+	u32 *cs;
+	int err;
+
+	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
+	if (!oa_bo)
+		return ERR_PTR(-ENOMEM);
+
+	oa_bo->oa_config = i915_oa_config_get(oa_config);
+
+	if (oa_config->mux_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->mux_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->mux_regs_len * 8;
+	}
+	if (oa_config->b_counter_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->b_counter_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->b_counter_regs_len * 8;
+	}
+	if (oa_config->flex_regs_len > 0) {
+		config_length += DIV_ROUND_UP(oa_config->flex_regs_len,
+					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
+		config_length += oa_config->flex_regs_len * 8;
+	}
+	config_length += 4; /* MI_BATCH_BUFFER_END */
+	config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
+
+	oa_bo->bo = i915_gem_object_create_shmem(i915, config_length);
+	if (IS_ERR(oa_bo->bo)) {
+		err = PTR_ERR(oa_bo->bo);
+		goto err_oa_config;
+	}
+
+	cs = i915_gem_object_pin_map(oa_bo->bo, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_oa_bo;
+	}
+
+	cs = write_cs_mi_lri(cs, oa_config->mux_regs, oa_config->mux_regs_len);
+	cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
+	cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(oa_bo->bo);
+	i915_gem_object_unpin_map(oa_bo->bo);
+
+	return oa_bo;
+
+err_oa_bo:
+	i915_gem_object_put(oa_bo->bo);
+err_oa_config:
+	i915_oa_config_put(oa_bo->oa_config);
+	kfree(oa_bo);
+
+	return ERR_PTR(err);
+}
+
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+			    int metrics_set,
+			    struct i915_oa_config **out_config)
+{
+	struct i915_oa_config *oa_config;
+	int err;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	err = mutex_lock_interruptible(&i915->perf.metrics_lock);
+	if (err)
+		return err;
 
 	if (metrics_set == 1) {
-		*out_config = &dev_priv->perf.test_config;
-		atomic_inc(&dev_priv->perf.test_config.ref_count);
-		return 0;
+		oa_config = &i915->perf.test_config;
+	} else {
+		oa_config = idr_find(&i915->perf.metrics_idr, metrics_set);
+		if (!oa_config)
+			err = -EINVAL;
 	}
 
-	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
-	if (ret)
-		return ret;
+	if (!err)
+		*out_config = i915_oa_config_get(oa_config);
 
-	*out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set);
-	if (!*out_config)
-		ret = -EINVAL;
-	else
-		atomic_inc(&(*out_config)->ref_count);
+	mutex_unlock(&i915->perf.metrics_lock);
 
-	mutex_unlock(&dev_priv->perf.metrics_lock);
+	return err;
+}
 
-	return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+	i915_oa_config_put(oa_bo->oa_config);
+	i915_gem_object_put(oa_bo->bo);
+	kfree(oa_bo);
+}
+
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+				   int metrics_set,
+				   struct i915_oa_config **out_config,
+				   struct drm_i915_gem_object **out_obj)
+{
+	struct drm_i915_private *i915 = stream->dev_priv;
+	struct i915_oa_config *oa_config;
+	int err = 0;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	err = i915_perf_get_oa_config(i915, metrics_set, &oa_config);
+	if (err)
+		return err;
+
+	if (out_config)
+		*out_config = oa_config;
+
+	if (out_obj) {
+		struct i915_oa_config_bo *oa_bo = NULL, *oa_bo_iter;
+
+		/* Look for the buffer in the already allocated BOs attached
+		 * to the stream.
+		 */
+		err = mutex_lock_interruptible(&stream->config_mutex);
+		if (err)
+			goto err;
+
+		list_for_each_entry(oa_bo_iter, &stream->oa_config_bos, link) {
+			if (oa_bo_iter->oa_config == oa_config &&
+			    memcmp(oa_bo_iter->oa_config->uuid,
+				   oa_config->uuid,
+				   sizeof(oa_config->uuid)) == 0) {
+				oa_bo = oa_bo_iter;
+				break;
+			}
+		}
+
+		mutex_unlock(&stream->config_mutex);
+
+		if (!oa_bo) {
+			oa_bo = alloc_oa_config_buffer(i915, oa_config);
+			if (IS_ERR(oa_bo)) {
+				err = PTR_ERR(oa_bo);
+				goto err;
+			}
+
+			err = mutex_lock_interruptible(&stream->config_mutex);
+			if (err) {
+				free_oa_config_bo(oa_bo);
+				goto err;
+			}
+
+			list_add(&oa_bo->link, &stream->oa_config_bos);
+
+			mutex_unlock(&stream->config_mutex);
+		}
+
+		*out_obj = i915_gem_object_get(oa_bo->bo);
+	}
+
+err:
+	if (err) {
+		i915_oa_config_put(oa_config);
+		*out_config = NULL;
+	}
+
+	return err;
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1362,6 +1524,18 @@ free_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+	struct i915_oa_config_bo *oa_bo, *tmp;
+
+	i915_oa_config_put(stream->oa_config);
+	list_for_each_entry_safe(oa_bo, tmp, &stream->oa_config_bos, link) {
+		list_del(&oa_bo->link);
+		free_oa_config_bo(oa_bo);
+	}
+}
+
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
@@ -1385,7 +1559,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
-	put_oa_config(dev_priv, stream->oa_config);
+	free_oa_configs(stream);
 
 	if (dev_priv->perf.spurious_report_rs.missed) {
 		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -2199,6 +2373,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
+	mutex_init(&stream->config_mutex);
+
 	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
 	format_size = dev_priv->perf.oa_formats[props->oa_format].size;
@@ -2227,7 +2403,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
-	ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
+	ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
+				      &stream->oa_config);
 	if (ret) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
 		goto err_config;
@@ -2265,6 +2442,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_enable;
 	}
 
+	DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
+
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	hrtimer_init(&stream->poll_check_timer,
@@ -2284,11 +2463,13 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-	put_oa_config(dev_priv, stream->oa_config);
+	i915_oa_config_put(stream->oa_config);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
 
+	free_oa_configs(stream);
+
 err_config:
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
@@ -2306,8 +2487,12 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 		return;
 
 	stream = engine->i915->perf.exclusive_stream;
-	if (stream)
+
+	if (stream) {
+		mutex_lock(&stream->config_mutex);
 		gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config);
+		mutex_unlock(&stream->config_mutex);
+	}
 }
 
 /**
@@ -2650,7 +2835,9 @@ static int i915_perf_release(struct inode *inode, struct file *file)
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 
 	mutex_lock(&dev_priv->perf.lock);
+
 	i915_perf_destroy_locked(stream);
+
 	mutex_unlock(&dev_priv->perf.lock);
 
 	/* Release the reference the perf stream kept on the driver. */
@@ -2759,6 +2946,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		goto err_ctx;
 	}
 
+	INIT_LIST_HEAD(&stream->oa_config_bos);
 	stream->dev_priv = dev_priv;
 	stream->ctx = specific_ctx;
 
@@ -3085,7 +3273,8 @@ void i915_perf_register(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto sysfs_error;
 
-	atomic_set(&dev_priv->perf.test_config.ref_count, 1);
+	dev_priv->perf.test_config.i915 = dev_priv;
+	kref_init(&dev_priv->perf.test_config.ref);
 
 	goto exit;
 
@@ -3341,7 +3530,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		return -ENOMEM;
 	}
 
-	atomic_set(&oa_config->ref_count, 1);
+	oa_config->i915 = dev_priv;
+	kref_init(&oa_config->ref);
 
 	if (!uuid_is_valid(args->uuid)) {
 		DRM_DEBUG("Invalid uuid format for OA config\n");
@@ -3440,7 +3630,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 sysfs_err:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 reg_err:
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 	DRM_DEBUG("Failed to add new OA config\n");
 	return err;
 }
@@ -3476,13 +3666,13 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock);
 	if (ret)
-		goto lock_err;
+		return ret;
 
 	oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg);
 	if (!oa_config) {
 		DRM_DEBUG("Failed to remove unknown OA config\n");
 		ret = -ENOENT;
-		goto config_err;
+		goto err_unlock;
 	}
 
 	GEM_BUG_ON(*arg != oa_config->id);
@@ -3492,13 +3682,16 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 
 	idr_remove(&dev_priv->perf.metrics_idr, *arg);
 
+	mutex_unlock(&dev_priv->perf.metrics_lock);
+
 	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
 
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
+
+	return 0;
 
-config_err:
+err_unlock:
 	mutex_unlock(&dev_priv->perf.metrics_lock);
-lock_err:
 	return ret;
 }
 
@@ -3668,10 +3861,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 
 static int destroy_config(int id, void *p, void *data)
 {
-	struct drm_i915_private *dev_priv = data;
 	struct i915_oa_config *oa_config = p;
 
-	put_oa_config(dev_priv, oa_config);
+	i915_oa_config_put(oa_config);
 
 	return 0;
 }
@@ -3685,7 +3877,7 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
 	if (!dev_priv->perf.initialized)
 		return;
 
-	idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv);
+	idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, NULL);
 	idr_destroy(&dev_priv->perf.metrics_idr);
 
 	unregister_sysctl_table(dev_priv->perf.sysctl_header);
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 95549de65212..a216e9e2de15 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -7,10 +7,14 @@
 #define __I915_PERF_H__
 
 #include <linux/types.h>
+#include <linux/kref.h>
 
 struct drm_device;
 struct drm_file;
+struct drm_i915_gem_object;
 struct drm_i915_private;
+struct i915_oa_config;
+struct i915_perf_stream;
 struct intel_context;
 struct intel_engine_cs;
 
@@ -29,5 +33,27 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 			    struct intel_context *ce,
 			    u32 *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+			    int metrics_set,
+			    struct i915_oa_config **out_config);
+int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
+				   int metrics_set,
+				   struct i915_oa_config **out_config,
+				   struct drm_i915_gem_object **out_obj);
+void i915_oa_config_release(struct kref *ref);
+
+static inline struct i915_oa_config *i915_oa_config_get(struct i915_oa_config *oa_config)
+{
+	kref_get(&oa_config->ref);
+	return oa_config;
+}
+
+static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
+{
+	if (!oa_config)
+		return;
+
+	kref_put(&oa_config->ref, i915_oa_config_release);
+}
 
 #endif /* __I915_PERF_H__ */
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 07/11] drm/i915/perf: implement active wait for noa configurations
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (5 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 06/11] drm/i915/perf: allow for CS OA configs to be created lazily Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

NOA configuration take some amount of time to apply. That amount of
time depends on the size of the GT. There is no documented time for
this. For example, past experimentations with powergating
configuration changes seem to indicate a 60~70us delay. We go with
500us as default for now which should be over the required amount of
time (according to HW architects).

v2: Don't forget to save/restore registers used for the wait (Chris)

v3: Name used CS_GPR registers (Chris)
    Fix compile issue due to rebase (Lionel)

v4: Fix save/restore helpers (Umesh)

v5: Move noa_wait from drm_i915_private to i915_perf_stream (Lionel)

v6: Add missing struct declarations in i915_perf.h

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  24 ++
 drivers/gpu/drm/i915/gt/intel_gt_types.h     |   5 +
 drivers/gpu/drm/i915/i915_debugfs.c          |  31 +++
 drivers/gpu/drm/i915/i915_drv.h              |   8 +
 drivers/gpu/drm/i915/i915_perf.c             | 234 ++++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h              |   4 +-
 6 files changed, 302 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index a7f1377a54a2..f133f8dbacb1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -158,6 +158,7 @@
 #define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
 #define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_PREDICATE         (1 << 15) /* HSW+ on RCS only*/
 
 /*
  * 3D instructions used by the kernel
@@ -236,6 +237,29 @@
 #define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
+#define MI_MATH(x) MI_INSTR(0x1a, (x)-1)
+#define   MI_ALU_OP(op, src1, src2) (((op) << 20) | ((src1) << 10) | (src2))
+/* operands */
+#define   MI_ALU_OP_NOOP     0
+#define   MI_ALU_OP_LOAD     128
+#define   MI_ALU_OP_LOADINV  1152
+#define   MI_ALU_OP_LOAD0    129
+#define   MI_ALU_OP_LOAD1    1153
+#define   MI_ALU_OP_ADD      256
+#define   MI_ALU_OP_SUB      257
+#define   MI_ALU_OP_AND      258
+#define   MI_ALU_OP_OR       259
+#define   MI_ALU_OP_XOR      260
+#define   MI_ALU_OP_STORE    384
+#define   MI_ALU_OP_STOREINV 1408
+/* sources */
+#define   MI_ALU_SRC_REG(x)  (x) /* 0 -> 15 */
+#define   MI_ALU_SRC_SRCA    32
+#define   MI_ALU_SRC_SRCB    33
+#define   MI_ALU_SRC_ACCU    49
+#define   MI_ALU_SRC_ZF      50
+#define   MI_ALU_SRC_CF      51
+
 /*
  * Commands used only by the command parser
  */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..f752b6cf9ea1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -97,6 +97,11 @@ enum intel_gt_scratch_field {
 	/* 8 bytes */
 	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
 
+	/* 6 * 8 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+	/* 4 bytes */
+	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5e81c4fc13ae..4234bf133903 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3577,6 +3577,36 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
 			i915_wedged_get, i915_wedged_set,
 			"%llu\n");
 
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+	struct drm_i915_private *i915 = data;
+
+	/* This would lead to infinite waits as we're doing timestamp
+	 * difference on the CS with only 32bits.
+	 */
+	if (val > mul_u32_u32(U32_MAX, RUNTIME_INFO(i915)->cs_timestamp_frequency_khz))
+		return -EINVAL;
+
+	atomic64_set(&i915->perf.noa_programming_delay, val);
+	return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+	struct drm_i915_private *i915 = data;
+
+	*val = atomic64_read(&i915->perf.noa_programming_delay);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+			i915_perf_noa_delay_get,
+			i915_perf_noa_delay_set,
+			"%llu\n");
+
+
 #define DROP_UNBOUND	BIT(0)
 #define DROP_BOUND	BIT(1)
 #define DROP_RETIRE	BIT(2)
@@ -4353,6 +4383,7 @@ static const struct i915_debugfs_files {
 	const char *name;
 	const struct file_operations *fops;
 } i915_debugfs_files[] = {
+	{"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
 	{"i915_wedged", &i915_wedged_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
 	{"i915_gem_drop_caches", &i915_drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c0ff6f0fd33e..20b6a7023097 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1223,6 +1223,12 @@ struct i915_perf_stream {
 		 */
 		u32 head;
 	} oa_buffer;
+
+	/**
+	 * A batch buffer doing a wait on the GPU for the NOA logic to be
+	 * reprogrammed.
+	 */
+	struct i915_vma *noa_wait;
 };
 
 /**
@@ -1714,6 +1720,8 @@ struct drm_i915_private {
 
 		struct i915_oa_ops ops;
 		const struct i915_oa_format *oa_formats;
+
+		atomic64_t noa_programming_delay;
 	} perf;
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 08869660f1f2..d0764852b347 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -197,6 +197,7 @@
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
 #include "gt/intel_lrc_reg.h"
 
 #include "i915_drv.h"
@@ -408,6 +409,7 @@ static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_r
 }
 
 static struct i915_oa_config_bo* alloc_oa_config_buffer(struct drm_i915_private *i915,
+							struct i915_vma *noa_wait,
 							struct i915_oa_config *oa_config)
 {
 	struct i915_oa_config_bo *oa_bo;
@@ -436,7 +438,7 @@ static struct i915_oa_config_bo* alloc_oa_config_buffer(struct drm_i915_private
 					      MI_LOAD_REGISTER_IMM_MAX_REGS) * 4;
 		config_length += oa_config->flex_regs_len * 8;
 	}
-	config_length += 4; /* MI_BATCH_BUFFER_END */
+	config_length += 12; /* MI_BATCH_BUFFER_START into noa_wait loop */
 	config_length = ALIGN(config_length, I915_GTT_PAGE_SIZE);
 
 	oa_bo->bo = i915_gem_object_create_shmem(i915, config_length);
@@ -455,7 +457,12 @@ static struct i915_oa_config_bo* alloc_oa_config_buffer(struct drm_i915_private
 	cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len);
 	cs = write_cs_mi_lri(cs, oa_config->flex_regs, oa_config->flex_regs_len);
 
-	*cs++ = MI_BATCH_BUFFER_END;
+
+	/* Jump into the NOA wait busy loop. */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START : MI_BATCH_BUFFER_START_GEN8);
+	*cs++ = i915_ggtt_offset(noa_wait);
+	*cs++ = 0;
 
 	i915_gem_object_flush_map(oa_bo->bo);
 	i915_gem_object_unpin_map(oa_bo->bo);
@@ -550,7 +557,9 @@ int i915_perf_get_oa_config_and_bo(struct i915_perf_stream *stream,
 		mutex_unlock(&stream->config_mutex);
 
 		if (!oa_bo) {
-			oa_bo = alloc_oa_config_buffer(i915, oa_config);
+			oa_bo = alloc_oa_config_buffer(i915,
+						       stream->noa_wait,
+						       oa_config);
 			if (IS_ERR(oa_bo)) {
 				err = PTR_ERR(oa_bo);
 				goto err;
@@ -1524,6 +1533,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
 	stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_noa_wait(struct i915_perf_stream *stream)
+{
+	struct drm_i915_private *i915 = stream->dev_priv;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	i915_vma_unpin_and_release(&stream->noa_wait, 0);
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
 static void
 free_oa_configs(struct i915_perf_stream *stream)
 {
@@ -1552,6 +1571,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	free_oa_buffer(stream);
+	free_noa_wait(stream);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
@@ -1731,6 +1751,202 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
 	return ret;
 }
 
+static u32 *save_restore_register(struct drm_i915_private *i915, u32 *cs,
+				  bool save, i915_reg_t reg, u32 offset,
+				  u32 dword_count)
+{
+	uint32_t d;
+
+	for (d = 0; d < dword_count; d++) {
+		if (save) {
+			*cs++ = INTEL_GEN(i915) >= 8 ?
+				MI_STORE_REGISTER_MEM_GEN8 :
+				MI_STORE_REGISTER_MEM;
+		} else  {
+			*cs++ = INTEL_GEN(i915) >= 8 ?
+				MI_LOAD_REGISTER_MEM_GEN8 :
+				MI_LOAD_REGISTER_MEM;
+		}
+		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
+		*cs++ = intel_gt_scratch_offset(&i915->gt, offset) + 4 * d;
+		*cs++ = 0;
+	}
+
+	return cs;
+}
+
+static int alloc_noa_wait(struct i915_perf_stream *stream)
+{
+	struct drm_i915_private *i915 = stream->dev_priv;
+	struct drm_i915_gem_object *bo;
+	struct i915_vma *vma;
+	const u64 delay_ticks = 0xffffffffffffffff -
+		DIV64_U64_ROUND_UP(
+			atomic64_read(&i915->perf.noa_programming_delay) *
+			RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
+			1000000ull);
+	u32 *batch, *ts0, *cs, *jump;
+	int ret, i;
+	enum { START_TS, NOW_TS, DELTA_TS, JUMP_PREDICATE, DELTA_TARGET, N_CS_GPR };
+
+	bo = i915_gem_object_create_internal(i915, 4096);
+	if (IS_ERR(bo)) {
+		DRM_ERROR("Failed to allocate NOA wait batchbuffer\n");
+		return PTR_ERR(bo);
+	}
+
+	/*
+	 * We pin in GGTT because we jump into this buffer now because
+	 * multiple OA config BOs will have a jump to this address and it
+	 * needs to be fixed during the lifetime of the i915/perf stream.
+	 */
+	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 4096, 0);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unref;
+	}
+
+	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+	if (IS_ERR(batch)) {
+		ret = PTR_ERR(batch);
+		goto err_unpin;
+	}
+
+	/* Save registers. */
+	for (i = 0; i < N_CS_GPR; i++) {
+		cs = save_restore_register(
+			i915, cs, true /* save */, HSW_CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	}
+	cs = save_restore_register(
+		i915, cs, true /* save */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* First timestamp snapshot location. */
+	ts0 = cs;
+
+	/*
+	 * Initial snapshot of the timestamp register to implement the wait.
+	 * We work with 32b values, so clear out the top 32b bits of the
+	 * register because the ALU works 64bits.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(START_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(RENDER_RING_BASE));
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(START_TS));
+
+	/*
+	 * This is the location we're going to jump back into until the
+	 * required amount of time has passed.
+	 */
+	jump = cs;
+
+	/*
+	 * Take another snapshot of the timestamp register. Take care to clear
+	 * up the top 32bits of CS_GPR(1) as we're using it for other
+	 * operations below.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(NOW_TS)) + 4;
+	*cs++ = 0;
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(RENDER_RING_BASE));
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(NOW_TS));
+
+	/*
+	 * Do a diff between the 2 timestamps and store the result back into
+	 * CS_GPR(1).
+	 */
+	*cs++ = MI_MATH(5);
+	*cs++ = MI_ALU_OP(MI_ALU_OP_LOAD, MI_ALU_SRC_SRCA, MI_ALU_SRC_REG(NOW_TS));
+	*cs++ = MI_ALU_OP(MI_ALU_OP_LOAD, MI_ALU_SRC_SRCB, MI_ALU_SRC_REG(START_TS));
+	*cs++ = MI_ALU_OP(MI_ALU_OP_SUB, 0, 0);
+	*cs++ = MI_ALU_OP(MI_ALU_OP_STORE, MI_ALU_SRC_REG(DELTA_TS), MI_ALU_SRC_ACCU);
+	*cs++ = MI_ALU_OP(MI_ALU_OP_STORE, MI_ALU_SRC_REG(JUMP_PREDICATE), MI_ALU_SRC_CF);
+
+	/*
+	 * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
+	 * timestamp have rolled over the 32bits) into the predicate register
+	 * to be used for the predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Restart from the beginning if we had timestamps roll over. */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START : MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
+	*cs++ = 0;
+
+	/*
+	 * Now add the diff between to previous timestamps and add it to :
+	 *      (((1 * << 64) - 1) - delay_ns)
+	 *
+	 * When the Carry Flag contains 1 this means the elapsed time is
+	 * longer than the expected delay, and we can exit the wait loop.
+	 */
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(DELTA_TARGET));
+	*cs++ = lower_32_bits(delay_ticks);
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(DELTA_TARGET)) + 4;
+	*cs++ = upper_32_bits(delay_ticks);
+
+	*cs++ = MI_MATH(4);
+	*cs++ = MI_ALU_OP(MI_ALU_OP_LOAD, MI_ALU_SRC_SRCA, MI_ALU_SRC_REG(DELTA_TS));
+	*cs++ = MI_ALU_OP(MI_ALU_OP_LOAD, MI_ALU_SRC_SRCB, MI_ALU_SRC_REG(DELTA_TARGET));
+	*cs++ = MI_ALU_OP(MI_ALU_OP_ADD, 0, 0);
+	*cs++ = MI_ALU_OP(MI_ALU_OP_STOREINV, MI_ALU_SRC_REG(JUMP_PREDICATE), MI_ALU_SRC_CF);
+
+	/*
+	 * Transfer the result into the predicate register to be used for the
+	 * predicated jump.
+	 */
+	*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+	*cs++ = i915_mmio_reg_offset(HSW_CS_GPR(JUMP_PREDICATE));
+	*cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+	/* Predicate the jump.  */
+	*cs++ = (INTEL_GEN(i915) < 8 ?
+		 MI_BATCH_BUFFER_START : MI_BATCH_BUFFER_START_GEN8) |
+		MI_BATCH_PREDICATE;
+	*cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
+	*cs++ = 0;
+
+	/* Restore registers. */
+	for (i = 0; i < N_CS_GPR; i++) {
+		cs = save_restore_register(
+			i915, cs, false /* save */, HSW_CS_GPR(i),
+			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+	}
+	cs = save_restore_register(
+		i915, cs, false /* save */, MI_PREDICATE_RESULT_1,
+		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+	/* And return to the ring. */
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	GEM_BUG_ON((cs - batch) > (PAGE_SIZE / sizeof(*batch)));
+
+	i915_gem_object_flush_map(bo);
+	i915_gem_object_unpin_map(bo);
+
+	stream->noa_wait = vma;
+
+	return 0;
+
+err_unpin:
+	__i915_vma_unpin(vma);
+
+err_unref:
+	i915_gem_object_put(bo);
+
+	return ret;
+}
+
 static void config_oa_regs(struct drm_i915_private *dev_priv,
 			   const struct i915_oa_reg *regs,
 			   u32 n_regs)
@@ -2403,6 +2619,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		}
 	}
 
+	ret = alloc_noa_wait(stream);
+	if (ret) {
+		DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+		goto err_noa_wait_alloc;
+	}
+
 	ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
 				      &stream->oa_config);
 	if (ret) {
@@ -2471,6 +2693,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	free_oa_configs(stream);
 
 err_config:
+	free_noa_wait(stream);
+
+err_noa_wait_alloc:
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
 
@@ -3855,6 +4080,9 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 		ratelimit_set_flags(&dev_priv->perf.spurious_report_rs,
 				    RATELIMIT_MSG_ON_RELEASE);
 
+		atomic64_set(&dev_priv->perf.noa_programming_delay,
+			     500 * 1000 /* 500us */);
+
 		dev_priv->perf.initialized = true;
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3cfdab18c0cf..ddb73678b2b7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define MI_PREDICATE_SRC0_UDW	_MMIO(0x2400 + 4)
 #define MI_PREDICATE_SRC1	_MMIO(0x2408)
 #define MI_PREDICATE_SRC1_UDW	_MMIO(0x2408 + 4)
-
+#define MI_PREDICATE_DATA       _MMIO(0x2410)
+#define MI_PREDICATE_RESULT     _MMIO(0x2418)
+#define MI_PREDICATE_RESULT_1   _MMIO(0x241c)
 #define MI_PREDICATE_RESULT_2	_MMIO(0x2214)
 #define  LOWER_SLICE_ENABLED	(1 << 0)
 #define  LOWER_SLICE_DISABLED	(0 << 0)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (6 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 07/11] drm/i915/perf: implement active wait for noa configurations Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 15:48   ` Chris Wilson
                     ` (2 more replies)
  2019-08-30 14:47 ` [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter Lionel Landwerlin
                   ` (3 subsequent siblings)
  11 siblings, 3 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

We haven't run into issues with programming the global OA/NOA
registers configuration from CPU so far, but HW engineers actually
recommend doing this from the command streamer. On TGL in particular
one of the clock domain in which some of that programming goes might
not be powered when we poke things from the CPU.

Since we have a command buffer prepared for the execbuffer side of
things, we can reuse that approach here too.

This also allows us to significantly reduce the amount of time we hold
the main lock.

v2: Drop the global lock as much as possible

v3: Take global lock to pin global

v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel)

v5: Move locking to the stream (Lionel)

v6: Move active reconfiguration request into i915_perf_stream (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  13 ++-
 drivers/gpu/drm/i915/i915_perf.c | 166 ++++++++++++++++++++-----------
 2 files changed, 118 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 20b6a7023097..084cdd115d5a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1129,7 +1129,8 @@ struct i915_perf_stream {
 	const struct i915_perf_stream_ops *ops;
 
 	/**
-	 * @active_config_mutex: Protects access to @oa_config & @oa_config_bos.
+	 * @active_config_mutex: Protects access to @active_config_rq,
+	 * @oa_config & @oa_config_bos.
 	 */
 	struct mutex config_mutex;
 
@@ -1144,6 +1145,16 @@ struct i915_perf_stream {
 	 */
 	struct list_head oa_config_bos;
 
+	/**
+	 * @active_config_rq: Last request reconfiguring the HW.
+	 */
+	struct i915_active_request active_config_rq;
+
+	/**
+	 * @initial_oa_config_bo: First OA configuration BO to be run.
+	 */
+	struct drm_i915_gem_object *initial_oa_config_bo;
+
 	/**
 	 * The OA context specific information.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index d0764852b347..2bc24a82f897 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1558,18 +1558,23 @@ free_oa_configs(struct i915_perf_stream *stream)
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
+	int err;
 
 	BUG_ON(stream != dev_priv->perf.exclusive_stream);
 
-	/*
-	 * Unset exclusive_stream first, it will be checked while disabling
-	 * the metric set on gen8+.
-	 */
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	dev_priv->perf.exclusive_stream = NULL;
+	mutex_lock(&stream->config_mutex);
 	dev_priv->perf.ops.disable_metric_set(stream);
+	err = i915_active_request_retire(&stream->active_config_rq,
+					 &stream->config_mutex);
+	mutex_unlock(&stream->config_mutex);
+	dev_priv->perf.exclusive_stream = NULL;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	if (err)
+		DRM_ERROR("Failed to disable perf stream\n");
+
+
 	free_oa_buffer(stream);
 	free_noa_wait(stream);
 
@@ -1795,6 +1800,10 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 		return PTR_ERR(bo);
 	}
 
+	ret = i915_mutex_lock_interruptible(&i915->drm);
+	if (ret)
+		goto err_unref;
+
 	/*
 	 * We pin in GGTT because we jump into this buffer now because
 	 * multiple OA config BOs will have a jump to this address and it
@@ -1802,10 +1811,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 	 */
 	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 4096, 0);
 	if (IS_ERR(vma)) {
+		mutex_unlock(&i915->drm.struct_mutex);
 		ret = PTR_ERR(vma);
 		goto err_unref;
 	}
 
+	mutex_unlock(&i915->drm.struct_mutex);
+
 	batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
 	if (IS_ERR(batch)) {
 		ret = PTR_ERR(batch);
@@ -1939,7 +1951,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 	return 0;
 
 err_unpin:
-	__i915_vma_unpin(vma);
+	mutex_lock(&i915->drm.struct_mutex);
+	i915_vma_unpin_and_release(&vma, 0);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 err_unref:
 	i915_gem_object_put(bo);
@@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 	return ret;
 }
 
-static void config_oa_regs(struct drm_i915_private *dev_priv,
-			   const struct i915_oa_reg *regs,
-			   u32 n_regs)
+static int emit_oa_config(struct drm_i915_private *i915,
+			  struct i915_perf_stream *stream)
 {
-	u32 i;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
 
-	for (i = 0; i < n_regs; i++) {
-		const struct i915_oa_reg *reg = regs + i;
+	lockdep_assert_held(&stream->config_mutex);
 
-		I915_WRITE(reg->addr, reg->value);
+	rq = i915_request_create(stream->engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	err = i915_active_request_set(&stream->active_config_rq,
+				      rq);
+	if (err)
+		goto err_add_request;
+
+	vma = i915_vma_instance(stream->initial_oa_config_bo,
+				&i915->ggtt.vm, NULL);
+	if (unlikely(IS_ERR(vma))) {
+		err = PTR_ERR(vma);
+		goto err_add_request;
 	}
-}
 
-static void delay_after_mux(void)
-{
-	/*
-	 * It apparently takes a fairly long time for a new MUX
-	 * configuration to be be applied after these register writes.
-	 * This delay duration was derived empirically based on the
-	 * render_basic config but hopefully it covers the maximum
-	 * configuration latency.
-	 *
-	 * As a fallback, the checks in _append_oa_reports() to skip
-	 * invalid OA reports do also seem to work to discard reports
-	 * generated before this config has completed - albeit not
-	 * silently.
-	 *
-	 * Unfortunately this is essentially a magic number, since we
-	 * don't currently know of a reliable mechanism for predicting
-	 * how long the MUX config will take to apply and besides
-	 * seeing invalid reports we don't know of a reliable way to
-	 * explicitly check that the MUX config has landed.
-	 *
-	 * It's even possible we've miss characterized the underlying
-	 * problem - it just seems like the simplest explanation why
-	 * a delay at this location would mitigate any invalid reports.
-	 */
-	usleep_range(15000, 20000);
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err)
+		goto err_add_request;
+
+	err = i915_vma_move_to_active(vma, rq, 0);
+	if (err)
+		goto err_vma_unpin;
+
+	cs = intel_ring_begin(rq, INTEL_GEN(i915) >= 8 ? 4 : 2);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_vma_unpin;
+	}
+
+	if (INTEL_GEN(i915) > 8) {
+		*cs++ = MI_BATCH_BUFFER_START_GEN8;
+		*cs++ = lower_32_bits(vma->node.start);
+		*cs++ = upper_32_bits(vma->node.start);
+		*cs++ = MI_NOOP;
+	} else {
+		*cs++ = MI_BATCH_BUFFER_START;
+		*cs++ = vma->node.start;
+	}
+
+	intel_ring_advance(rq, cs);
+
+err_vma_unpin:
+	i915_vma_unpin(vma);
+err_add_request:
+	i915_request_add(rq);
+
+	return err;
 }
 
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
-	const struct i915_oa_config *oa_config = stream->oa_config;
 
 	/*
 	 * PRM:
@@ -2007,13 +2040,7 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 	I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
 				  GEN6_CSUNIT_CLOCK_GATE_DISABLE));
 
-	config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
-	delay_after_mux();
-
-	config_oa_regs(dev_priv, oa_config->b_counter_regs,
-		       oa_config->b_counter_regs_len);
-
-	return 0;
+	return emit_oa_config(dev_priv, stream);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2372,13 +2399,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
 	if (ret)
 		return ret;
 
-	config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
-	delay_after_mux();
-
-	config_oa_regs(dev_priv, oa_config->b_counter_regs,
-		       oa_config->b_counter_regs_len);
-
-	return 0;
+	return emit_oa_config(dev_priv, stream);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -2597,6 +2618,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
 	stream->engine = props->engine;
 
+	mutex_init(&stream->config_mutex);
+	INIT_ACTIVE_REQUEST(&stream->active_config_rq,
+			    &stream->config_mutex);
+
 	stream->sample_flags |= SAMPLE_OA_REPORT;
 	stream->sample_size += format_size;
 
@@ -2625,8 +2650,9 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		goto err_noa_wait_alloc;
 	}
 
-	ret = i915_perf_get_oa_config(dev_priv, props->metrics_set,
-				      &stream->oa_config);
+	ret = i915_perf_get_oa_config_and_bo(stream, props->metrics_set,
+					     &stream->oa_config,
+					     &stream->initial_oa_config_bo);
 	if (ret) {
 		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
 		goto err_config;
@@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	stream->ops = &i915_oa_stream_ops;
 	dev_priv->perf.exclusive_stream = stream;
 
+	mutex_lock(&stream->config_mutex);
 	ret = dev_priv->perf.ops.enable_metric_set(stream);
 	if (ret) {
 		DRM_DEBUG("Unable to enable metric set\n");
-		goto err_enable;
+		/*
+		 * Ignore the return value since we already have an error from
+		 * the enable vfunc.
+		 */
+		i915_active_request_retire(&stream->active_config_rq,
+					   &stream->config_mutex);
+	} else {
+		ret = i915_active_request_retire(&stream->active_config_rq,
+						 &stream->config_mutex);
 	}
 
-	DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
-
+	mutex_unlock(&stream->config_mutex);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	i915_gem_object_put(stream->initial_oa_config_bo);
+	stream->initial_oa_config_bo = NULL;
+	if (ret)
+		goto err_enable;
+
+	DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
+
 	hrtimer_init(&stream->poll_check_timer,
 		     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	stream->poll_check_timer.function = oa_poll_check_timer_cb;
@@ -2677,8 +2718,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	return 0;
 
 err_enable:
-	dev_priv->perf.exclusive_stream = NULL;
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	mutex_lock(&stream->config_mutex);
 	dev_priv->perf.ops.disable_metric_set(stream);
+	mutex_unlock(&stream->config_mutex);
+	dev_priv->perf.exclusive_stream = NULL;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 err_lock:
@@ -2692,6 +2736,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
 	free_oa_configs(stream);
 
+	i915_gem_object_put(stream->initial_oa_config_bo);
+
 err_config:
 	free_noa_wait(stream);
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (7 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 15:32   ` Chris Wilson
  2019-08-30 14:47 ` [PATCH v12 10/11] drm/i915/perf: allow holding preemption on filtered ctx Lionel Landwerlin
                   ` (2 subsequent siblings)
  11 siblings, 1 reply; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
    Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

v8: Remove active request from engine (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 151 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_getparam.c          |   4 +
 include/uapi/drm/i915_drm.h                   |  39 +++++
 3 files changed, 193 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 46ad8d9642d1..0037bf8c2e6f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -284,7 +285,13 @@ struct i915_execbuffer {
 	struct {
 		u64 flags; /** Available extensions parameters */
 		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+		struct drm_i915_gem_execbuffer_ext_perf perf_config;
 	} extensions;
+
+	struct file *perf_file;
+	struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */
+	struct drm_i915_gem_object *oa_bo;
+	struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1152,6 +1159,46 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
 	return err;
 }
 
+
+static int
+get_execbuf_oa_config(struct i915_execbuffer *eb)
+{
+	int err = 0;
+
+	eb->perf_file = NULL;
+	eb->oa_config = NULL;
+	eb->oa_vma = NULL;
+	eb->oa_bo = NULL;
+
+	if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
+		return 0;
+
+	eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
+	if (!eb->perf_file)
+		return -EINVAL;
+
+	err = i915_mutex_lock_interruptible(&eb->i915->drm);
+	if (err) {
+		fput(eb->perf_file);
+		eb->perf_file = NULL;
+		return err;
+	}
+
+	if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
+		err = -EINVAL;
+
+	mutex_unlock(&eb->i915->drm.struct_mutex);
+
+	if (!err) {
+		err = i915_perf_get_oa_config_and_bo(
+			eb->i915->perf.exclusive_stream,
+			eb->extensions.perf_config.oa_config,
+			&eb->oa_config, &eb->oa_bo);
+	}
+
+	return err;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
 	spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+	struct i915_perf_stream *perf_stream;
+	int err;
+
+	if (!eb->oa_config)
+		return 0;
+
+	perf_stream = eb->perf_file->private_data;
+
+	err = mutex_lock_interruptible(&perf_stream->config_mutex);
+	if (err)
+		return err;
+
+	err = i915_active_request_set(&perf_stream->active_config_rq,
+				      eb->request);
+	if (err)
+		goto out;
+
+	/*
+	 * If the config hasn't changed, skip reconfiguring the HW (this is
+	 * subject to a delay we want to avoid has much as possible).
+	 */
+	if (eb->oa_config == perf_stream->oa_config)
+		goto out;
+
+	err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
+	if (err)
+		goto out;
+
+	err = eb->engine->emit_bb_start(eb->request,
+					eb->oa_vma->node.start,
+					0, I915_DISPATCH_SECURE);
+	if (err)
+		goto out;
+
+	swap(eb->oa_config, perf_stream->oa_config);
+
+out:
+	mutex_unlock(&perf_stream->config_mutex);
+
+	return err;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
 	int err;
@@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
+	err = eb_oa_config(eb);
+	if (err)
+		return err;
+
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
 					eb->batch_start_offset,
@@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
 	return 0;
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.perf_config, ext,
+			   sizeof(eb->extensions.perf_config)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
         [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
 };
 
 static int
@@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
-	err = eb_create(&eb);
+	err = get_execbuf_oa_config(&eb);
 	if (err)
 		goto err_out_fence;
 
+	err = eb_create(&eb);
+	if (err)
+		goto err_oa_config;
+
 	GEM_BUG_ON(!eb.lut_size);
 
 	err = eb_select_context(&eb);
@@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_context;
 
+	if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+		struct i915_perf_stream *perf_stream =
+			eb.perf_file->private_data;
+
+		if (perf_stream->engine != eb.engine) {
+			err = -EINVAL;
+			goto err_engine;
+		}
+	}
+
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
 		goto err_engine;
@@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		}
 	}
 
+	if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+		eb.oa_vma = i915_vma_instance(eb.oa_bo,
+					      &eb.engine->i915->ggtt.vm, NULL);
+		if (unlikely(IS_ERR(eb.oa_vma))) {
+			err = PTR_ERR(eb.oa_vma);
+			eb.oa_vma = NULL;
+			goto err_request;
+		}
+
+		err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL);
+		if (err)
+			goto err_request;
+	}
+
 	/*
 	 * Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2935,6 +3075,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
+err_oa_config:
+	if (eb.perf_file)
+		fput(eb.perf_file);
+	if (eb.oa_config) {
+		i915_gem_object_put(eb.oa_bo);
+		i915_oa_config_put(eb.oa_config);
+	}
+	if (eb.oa_vma)
+		i915_vma_unpin(eb.oa_vma);
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index bd41cc5ce906..39d4c2c2e0f4 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -161,6 +161,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_PERF_REVISION:
 		value = i915_perf_ioctl_version();
 		break;
+	case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+		/* Obviously requires perf support. */
+		value = i915->perf.initialized;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e98c9a7baa91..3166c9ca85f3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
 	 */
 	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
 
+	/**
+	 * This identifier is associated with
+	 * drm_i915_gem_execbuffer_perf_ext.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
@@ -1056,6 +1072,29 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
 	__u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_ext_perf {
+	struct i915_user_extension base;
+
+	/**
+	 * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+	 * This is used to identify that the application requesting a HW
+	 * performance configuration change actually has a right to do so
+	 * because it also has access the i915-perf stream.
+	 */
+	__s32 perf_fd;
+
+	/**
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 pad;
+
+	/**
+	 * OA configuration ID to switch to before executing the commands
+	 * associated to the execbuf.
+	 */
+	__u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 10/11] drm/i915/perf: allow holding preemption on filtered ctx
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (8 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-08-30 14:47 ` [PATCH v12 11/11] drm/i915: add support for perf configuration queries Lionel Landwerlin
  2019-08-30 16:23 ` ✗ Fi.CI.BUILD: failure for drm/i915: Vulkan performance query support (rev13) Patchwork
  11 siblings, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

We would like to make use of perf in Vulkan. The Vulkan API is much
lower level than OpenGL, with applications directly exposed to the
concept of command buffers (pretty much equivalent to our batch
buffers). In Vulkan, queries are always limited in scope to a command
buffer. In OpenGL, the lack of command buffer concept meant that
queries' duration could span multiple command buffers.

With that restriction gone in Vulkan, we would like to simplify
measuring performance just by measuring the deltas between the counter
snapshots written by 2 MI_RECORD_PERF_COUNT commands, rather than the
more complex scheme we currently have in the GL driver, using 2
MI_RECORD_PERF_COUNT commands and doing some post processing on the
stream of OA reports, coming from the global OA buffer, to remove any
unrelated deltas in between the 2 MI_RECORD_PERF_COUNT.

Disabling preemption only apply to a single context with which want to
query performance counters for and is considered a privileged
operation, by default protected by CAP_SYS_ADMIN. It is possible to
enable it for a normal user by disabling the paranoid stream setting.

v2: Store preemption setting in intel_context (Chris)

v3: Use priorities to avoid preemption rather than the HW mechanism

v4: Just modify the port priority reporting function

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  8 +++++
 drivers/gpu/drm/i915/i915_drv.h               |  8 +++++
 drivers/gpu/drm/i915/i915_perf.c              | 31 +++++++++++++++++--
 include/uapi/drm/i915_drm.h                   | 11 +++++++
 4 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 0037bf8c2e6f..3deb3e336f50 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2117,6 +2117,14 @@ static int eb_oa_config(struct i915_execbuffer *eb)
 	if (err)
 		goto out;
 
+	/*
+	 * If the perf stream was opened with hold preemption, flag the
+	 * request properly so that the priority of the request is bumped once
+	 * it reaches the execlist ports.
+	 */
+	if (eb->i915->perf.exclusive_stream->hold_preemption)
+		eb->request->flags |= I915_REQUEST_NOPREEMPT;
+
 	/*
 	 * If the config hasn't changed, skip reconfiguring the HW (this is
 	 * subject to a delay we want to avoid has much as possible).
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 084cdd115d5a..d18e12ada4e1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1122,6 +1122,14 @@ struct i915_perf_stream {
 	 */
 	bool enabled;
 
+	/**
+	 * @hold_preemption: Whether preemption is put on hold for command
+	 * submissions done on the @ctx. This is useful for some drivers that
+	 * cannot easily post process the OA buffer context to subtract delta
+	 * of performance counters not associated with @ctx.
+	 */
+	bool hold_preemption;
+
 	/**
 	 * @ops: The callbacks providing the implementation of this specific
 	 * type of configured stream.
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2bc24a82f897..39681fb43034 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -343,6 +343,8 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
  * struct perf_open_properties - for validated properties given to open a stream
  * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
  * @single_context: Whether a single or all gpu contexts should be monitored
+ * @hold_preemption: Whether the preemption is disabled for the filtered
+ *                   context
  * @ctx_handle: A gem ctx handle for use with @single_context
  * @metrics_set: An ID for an OA unit metric set advertised via sysfs
  * @oa_format: An OA unit HW report format
@@ -357,6 +359,7 @@ struct perf_open_properties {
 	u32 sample_flags;
 
 	u64 single_context:1;
+	u64 hold_preemption:1;
 	u64 ctx_handle;
 
 	/* OA sampling state */
@@ -2629,6 +2632,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	if (WARN_ON(stream->oa_buffer.format_size == 0))
 		return -EINVAL;
 
+	stream->hold_preemption = props->hold_preemption;
+
 	stream->oa_buffer.format =
 		dev_priv->perf.oa_formats[props->oa_format].format;
 
@@ -3182,6 +3187,15 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 		}
 	}
 
+	if (props->hold_preemption) {
+		if (!props->single_context) {
+			DRM_DEBUG("preemption disable with no context\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		privileged_op = true;
+	}
+
 	/*
 	 * On Haswell the OA unit supports clock gating off for a specific
 	 * context and in this mode there's no visibility of metrics for the
@@ -3196,8 +3210,9 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
 	 * enable the OA unit by default.
 	 */
-	if (IS_HASWELL(dev_priv) && specific_ctx)
+	if (IS_HASWELL(dev_priv) && specific_ctx && !props->hold_preemption) {
 		privileged_op = false;
+	}
 
 	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
 	 * we check a dev.i915.perf_stream_paranoid sysctl option
@@ -3206,7 +3221,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 */
 	if (privileged_op &&
 	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
-		DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
+		DRM_DEBUG("Insufficient privileges to open i915 perf stream\n");
 		ret = -EACCES;
 		goto err_ctx;
 	}
@@ -3403,6 +3418,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
+		case DRM_I915_PERF_PROP_HOLD_PREEMPTION:
+			props->hold_preemption = !!value;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -4168,5 +4186,12 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
  */
 int i915_perf_ioctl_version(void)
 {
-	return 1;
+	/* 1: initial version
+	 *
+	 * 2: Add DRM_I915_PERF_PROP_HOLD_PREEMPTION parameter to hold
+	 *    preemption on a particular context so that performance data is
+	 *    accessible from a delta of MI_RPC reports without looking at the
+	 *    OA buffer.
+	 */
+	return 2;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3166c9ca85f3..5850d68327ec 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1987,6 +1987,17 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
+	/**
+	 * Specifying this property is only valid when specify a context to
+	 * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property
+	 * will hold preemption of the particular context we want to gather
+	 * performance data about. The execbuf2 submissions must include a
+	 * drm_i915_gem_execbuffer_ext_perf parameter for this to apply.
+	 *
+	 * This property is available in perf revision 2.
+	 */
+	DRM_I915_PERF_PROP_HOLD_PREEMPTION,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v12 11/11] drm/i915: add support for perf configuration queries
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (9 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 10/11] drm/i915/perf: allow holding preemption on filtered ctx Lionel Landwerlin
@ 2019-08-30 14:47 ` Lionel Landwerlin
  2019-09-02 20:08   ` Dan Carpenter
  2019-08-30 16:23 ` ✗ Fi.CI.BUILD: failure for drm/i915: Vulkan performance query support (rev13) Patchwork
  11 siblings, 1 reply; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 14:47 UTC (permalink / raw)
  To: intel-gfx

Listing configurations at the moment is supported only through sysfs.
This might cause issues for applications wanting to list
configurations from a container where sysfs isn't available.

This change adds a way to query the number of configurations and their
content through the i915 query uAPI.

v2: Fix sparse warnings (Lionel)
    Add support to query configuration using uuid (Lionel)

v3: Fix some inconsistency in uapi header (Lionel)
    Fix unlocking when not locked issue (Lionel)
    Add debug messages (Lionel)

v4: Fix missing unlock (Dan)

v5: Drop lock when copying config content to userspace (Chris)

v6: Drop lock when copying config list to userspace (Chris)
    Fix deadlock when calling i915_perf_get_oa_config() under
    perf.metrics_lock (Lionel)
    Add i915_oa_config_get() (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h   |   6 +
 drivers/gpu/drm/i915/i915_perf.c  |   3 +
 drivers/gpu/drm/i915/i915_query.c | 283 ++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h       |  65 ++++++-
 4 files changed, 354 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d18e12ada4e1..04d538dcc3c1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1704,6 +1704,12 @@ struct drm_i915_private {
 		 */
 		struct idr metrics_idr;
 
+		/*
+		 * Number of dynamic configurations, you need to hold
+		 * dev_priv->perf.metrics_lock to access it.
+		 */
+		u32 n_metrics;
+
 		/*
 		 * Lock associated with anything below within this structure
 		 * except exclusive_stream.
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 39681fb43034..54dba3487dfe 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3910,6 +3910,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
 		goto sysfs_err;
 	}
 
+	dev_priv->perf.n_metrics++;
+
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 
 	DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
@@ -3970,6 +3972,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
 			   &oa_config->sysfs_metric);
 
 	idr_remove(&dev_priv->perf.metrics_idr, *arg);
+	dev_priv->perf.n_metrics--;
 
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index abac5042da2b..89b2821be4a0 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -7,6 +7,7 @@
 #include <linux/nospec.h>
 
 #include "i915_drv.h"
+#include "i915_perf.h"
 #include "i915_query.h"
 #include <uapi/drm/i915_drm.h>
 
@@ -140,10 +141,292 @@ query_engine_info(struct drm_i915_private *i915,
 	return len;
 }
 
+static int can_copy_perf_config_registers_or_number(u32 user_n_regs,
+						    u64 user_regs_ptr,
+						    u32 kernel_n_regs)
+{
+	/*
+	 * We'll just put the number of registers, and won't copy the
+	 * register.
+	 */
+	if (user_n_regs == 0)
+		return 0;
+
+	if (user_n_regs < kernel_n_regs)
+		return -EINVAL;
+
+	if (!access_ok(u64_to_user_ptr(user_regs_ptr),
+		       2 * sizeof(u32) * kernel_n_regs))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int copy_perf_config_registers_or_number(const struct i915_oa_reg *kernel_regs,
+						u32 kernel_n_regs,
+						u64 user_regs_ptr,
+						u32 *user_n_regs)
+{
+	u32 r;
+
+	if (*user_n_regs == 0) {
+		*user_n_regs = kernel_n_regs;
+		return 0;
+	}
+
+	*user_n_regs = kernel_n_regs;
+
+	for (r = 0; r < kernel_n_regs; r++) {
+		u32 __user *user_reg_ptr =
+			u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2);
+		u32 __user *user_val_ptr =
+			u64_to_user_ptr(user_regs_ptr + sizeof(u32) * r * 2 +
+					sizeof(u32));
+		int ret;
+
+		ret = __put_user(i915_mmio_reg_offset(kernel_regs[r].addr),
+				 user_reg_ptr);
+		if (ret)
+			return -EFAULT;
+
+		ret = __put_user(kernel_regs[r].value, user_val_ptr);
+		if (ret)
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int query_perf_config_data(struct drm_i915_private *i915,
+				  struct drm_i915_query_item *query_item,
+				  bool use_uuid)
+{
+	struct drm_i915_query_perf_config __user *user_query_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr);
+	struct drm_i915_perf_oa_config __user *user_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr +
+				sizeof(struct drm_i915_query_perf_config));
+	struct drm_i915_perf_oa_config user_config;
+	struct i915_oa_config *oa_config = NULL;
+	char uuid[UUID_STRING_LEN + 1];
+	u64 config_id;
+	u32 flags, total_size;
+	int ret;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	total_size = sizeof(struct drm_i915_query_perf_config) +
+		sizeof(struct drm_i915_perf_oa_config);
+
+	if (query_item->length == 0)
+		return total_size;
+
+	if (query_item->length < total_size) {
+		DRM_DEBUG("Invalid query config data item size=%u expected=%u\n",
+			  query_item->length, total_size);
+		return -EINVAL;
+	}
+
+	if (!access_ok(user_query_config_ptr, total_size))
+		return -EFAULT;
+
+	if (__get_user(flags, &user_query_config_ptr->flags))
+		return -EFAULT;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	if (use_uuid) {
+		BUILD_BUG_ON(sizeof(user_query_config_ptr->uuid) >= sizeof(uuid));
+
+		memset(&uuid, 0, sizeof(uuid));
+		if (__copy_from_user(uuid, user_query_config_ptr->uuid,
+				     sizeof(user_query_config_ptr->uuid)))
+			return -EFAULT;
+	} else {
+		if (__get_user(config_id, &user_query_config_ptr->config)) {
+			return -EFAULT;
+		}
+	}
+
+	if (use_uuid) {
+		struct i915_oa_config *tmp;
+		int id;
+
+		ret = mutex_lock_interruptible(&i915->perf.metrics_lock);
+		if (ret)
+			return ret;
+
+		idr_for_each_entry(&i915->perf.metrics_idr, tmp, id) {
+			if (!strcmp(tmp->uuid, uuid)) {
+				oa_config = i915_oa_config_get(tmp);
+				break;
+			}
+		}
+
+		mutex_unlock(&i915->perf.metrics_lock);
+	} else {
+		ret = i915_perf_get_oa_config(i915, config_id, &oa_config);
+	}
+
+	if (ret || !oa_config)
+		return -ENOENT;
+
+	if (__copy_from_user(&user_config, user_config_ptr,
+			     sizeof(user_config))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_boolean_regs,
+						       user_config.boolean_regs_ptr,
+						       oa_config->b_counter_regs_len);
+	if (ret)
+		goto out;
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_flex_regs,
+						       user_config.flex_regs_ptr,
+						       oa_config->flex_regs_len);
+	if (ret)
+		goto out;
+
+	ret = can_copy_perf_config_registers_or_number(user_config.n_mux_regs,
+						       user_config.mux_regs_ptr,
+						       oa_config->mux_regs_len);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->b_counter_regs,
+						   oa_config->b_counter_regs_len,
+						   user_config.boolean_regs_ptr,
+						   &user_config.n_boolean_regs);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->flex_regs,
+						   oa_config->flex_regs_len,
+						   user_config.flex_regs_ptr,
+						   &user_config.n_flex_regs);
+	if (ret)
+		goto out;
+
+	ret = copy_perf_config_registers_or_number(oa_config->mux_regs,
+						   oa_config->mux_regs_len,
+						   user_config.mux_regs_ptr,
+						   &user_config.n_mux_regs);
+	if (ret)
+		goto out;
+
+	memcpy(user_config.uuid, oa_config->uuid, sizeof(user_config.uuid));
+
+	if (__copy_to_user(user_config_ptr, &user_config,
+			   sizeof(user_config))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = total_size;
+
+out:
+	i915_oa_config_put(oa_config);
+
+	return ret;
+}
+
+static int query_perf_config_list(struct drm_i915_private *i915,
+				  struct drm_i915_query_item *query_item)
+{
+	struct drm_i915_query_perf_config __user *user_query_config_ptr =
+		u64_to_user_ptr(query_item->data_ptr);
+	struct i915_oa_config *oa_config;
+	u32 flags, total_size;
+	u64 i, n_configs, *oa_config_ids;
+	int ret, id;
+
+	if (!i915->perf.initialized)
+		return -ENODEV;
+
+	/* Count the default test configuration */
+	n_configs = i915->perf.n_metrics + 1;
+	total_size = sizeof(struct drm_i915_query_perf_config) +
+		sizeof(u64) * n_configs;
+
+	if (query_item->length == 0)
+		return total_size;
+
+	if (query_item->length < total_size) {
+		DRM_DEBUG("Invalid query config list item size=%u expected=%u\n",
+			  query_item->length, total_size);
+		return -EINVAL;
+	}
+
+	if (!access_ok(user_query_config_ptr, total_size))
+		return -EFAULT;
+
+	if (__get_user(flags, &user_query_config_ptr->flags))
+		return -EFAULT;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	if (__put_user(n_configs, &user_query_config_ptr->config))
+		return -EFAULT;
+
+	ret = mutex_lock_interruptible(&i915->perf.metrics_lock);
+	if (ret)
+		return ret;
+
+	/* Count the configs. */
+	n_configs = 1;
+	idr_for_each_entry(&i915->perf.metrics_idr, oa_config, id)
+		n_configs++;
+
+	oa_config_ids =
+		kmalloc_array(n_configs, sizeof(*oa_config_ids), GFP_KERNEL);
+	if (!oa_config_ids) {
+		mutex_unlock(&i915->perf.metrics_lock);
+		return -ENOMEM;
+	}
+
+	i = 0;
+	oa_config_ids[i++] = 1ull;
+	idr_for_each_entry(&i915->perf.metrics_idr, oa_config, id)
+		oa_config_ids[i++] = id;
+
+	mutex_unlock(&i915->perf.metrics_lock);
+
+	ret = copy_to_user(u64_to_user_ptr(query_item->data_ptr +
+					   sizeof(struct drm_i915_query_perf_config)),
+			   oa_config_ids,
+			   n_configs * sizeof(*oa_config_ids));
+	kfree(oa_config_ids);
+	if (ret)
+		return ret;
+
+	return total_size;
+}
+
+static int query_perf_config(struct drm_i915_private *i915,
+			     struct drm_i915_query_item *query_item)
+{
+	switch (query_item->flags) {
+	case DRM_I915_QUERY_PERF_CONFIG_LIST:
+		return query_perf_config_list(i915, query_item);
+	case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID:
+		return query_perf_config_data(i915, query_item, true);
+	case DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID:
+		return query_perf_config_data(i915, query_item, false);
+	default:
+		return -EINVAL;
+	}
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
 	query_topology_info,
 	query_engine_info,
+	query_perf_config,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5850d68327ec..2e7215989df2 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1037,8 +1037,7 @@ enum drm_i915_gem_execbuffer_ext {
 	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
 
 	/**
-	 * This identifier is associated with
-	 * drm_i915_gem_execbuffer_perf_ext.
+	 * See drm_i915_gem_execbuffer_perf_ext.
 	 */
 	DRM_I915_GEM_EXECBUFFER_EXT_PERF,
 
@@ -2113,6 +2112,7 @@ struct drm_i915_query_item {
 	__u64 query_id;
 #define DRM_I915_QUERY_TOPOLOGY_INFO    1
 #define DRM_I915_QUERY_ENGINE_INFO	2
+#define DRM_I915_QUERY_PERF_CONFIG      3
 /* Must be kept compact -- no holes and well documented */
 
 	/*
@@ -2124,9 +2124,18 @@ struct drm_i915_query_item {
 	__s32 length;
 
 	/*
-	 * Unused for now. Must be cleared to zero.
+	 * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
+	 *
+	 * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the
+	 * following :
+	 *         - DRM_I915_QUERY_PERF_CONFIG_LIST
+	 *         - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+	 *         - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
 	 */
 	__u32 flags;
+#define DRM_I915_QUERY_PERF_CONFIG_LIST          1
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID   3
 
 	/*
 	 * Data will be written at the location pointed by data_ptr when the
@@ -2252,6 +2261,56 @@ struct drm_i915_query_engine_info {
 	struct drm_i915_engine_info engines[];
 };
 
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG.
+ */
+struct drm_i915_query_perf_config {
+	union {
+		/*
+		 * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets
+		 * this fields to the number of configurations available.
+		 */
+		__u64 n_configs;
+
+		/*
+		 * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID,
+		 * i915 will use the value in this field as configuration
+		 * identifier to decide what data to write into config_ptr.
+		 */
+		__u64 config;
+
+		/*
+		 * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
+		 * i915 will use the value in this field as configuration
+		 * identifier to decide what data to write into config_ptr.
+		 *
+		 * String formatted like "%08x-%04x-%04x-%04x-%012x"
+		 */
+		char uuid[36];
+	};
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will
+	 * write an array of __u64 of configuration identifiers.
+	 *
+	 * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will
+	 * write a struct drm_i915_perf_oa_config. If the following fields of
+	 * drm_i915_perf_oa_config are set not set to 0, i915 will write into
+	 * the associated pointers the values of submitted when the
+	 * configuration was created :
+	 *
+	 *         - n_mux_regs
+	 *         - n_boolean_regs
+	 *         - n_flex_regs
+	 */
+	__u8 data[];
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter
  2019-08-30 14:47 ` [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter Lionel Landwerlin
@ 2019-08-30 15:32   ` Chris Wilson
  2019-08-30 21:03     ` Lionel Landwerlin
  0 siblings, 1 reply; 23+ messages in thread
From: Chris Wilson @ 2019-08-30 15:32 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-08-30 15:47:24)
> +static int
> +get_execbuf_oa_config(struct i915_execbuffer *eb)
> +{
> +       int err = 0;
> +
> +       eb->perf_file = NULL;
> +       eb->oa_config = NULL;
> +       eb->oa_vma = NULL;
> +       eb->oa_bo = NULL;
> +
> +       if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
> +               return 0;
> +
> +       eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
> +       if (!eb->perf_file)
> +               return -EINVAL;
> +
> +       err = i915_mutex_lock_interruptible(&eb->i915->drm);
> +       if (err) {
> +               fput(eb->perf_file);
> +               eb->perf_file = NULL;
> +               return err;
> +       }
> +
> +       if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
> +               err = -EINVAL;
> +
> +       mutex_unlock(&eb->i915->drm.struct_mutex);

So why can i915->perf.execlusive_stream not change after this point?

> +
> +       if (!err) {
> +               err = i915_perf_get_oa_config_and_bo(
> +                       eb->i915->perf.exclusive_stream,
> +                       eb->extensions.perf_config.oa_config,
> +                       &eb->oa_config, &eb->oa_bo);
> +       }
> +
> +       return err;
> +}
> +
>  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>                              struct i915_vma *vma,
>                              unsigned int len)
> @@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
>         spin_unlock(&file_priv->mm.lock);
>  }
>  
> +static int eb_oa_config(struct i915_execbuffer *eb)
> +{
> +       struct i915_perf_stream *perf_stream;
> +       int err;
> +
> +       if (!eb->oa_config)
> +               return 0;
> +
> +       perf_stream = eb->perf_file->private_data;
> +
> +       err = mutex_lock_interruptible(&perf_stream->config_mutex);
> +       if (err)
> +               return err;
> +
> +       err = i915_active_request_set(&perf_stream->active_config_rq,
> +                                     eb->request);
> +       if (err)
> +               goto out;
> +
> +       /*
> +        * If the config hasn't changed, skip reconfiguring the HW (this is
> +        * subject to a delay we want to avoid has much as possible).
> +        */
> +       if (eb->oa_config == perf_stream->oa_config)
> +               goto out;
> +

We need to wait for any exclusive fences in case we migrate this object
in future:

i915_vma_lock(eb->oa_vma);
err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */
if (err = 0)
	err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
i915_vma_unlock(eb->oa_vma);

Though this raises an interesting point, we do not actually want to emit
a semaphore here (albeit the engine is fixed atm) as we are after the
point where we have declared all semaphore waits completed. (Not an
issue to worry about right now!)

> +       if (err)
> +               goto out;
> +
> +       err = eb->engine->emit_bb_start(eb->request,
> +                                       eb->oa_vma->node.start,
> +                                       0, I915_DISPATCH_SECURE);
> +       if (err)
> +               goto out;
> +
> +       swap(eb->oa_config, perf_stream->oa_config);
> +
> +out:
> +       mutex_unlock(&perf_stream->config_mutex);
> +
> +       return err;
> +}
> +
>  static int eb_submit(struct i915_execbuffer *eb)
>  {
>         int err;
> @@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb)
>                         return err;
>         }
>  
> +       err = eb_oa_config(eb);
> +       if (err)
> +               return err;

Ok, definitely needs to be after the waits!

> +
>         err = eb->engine->emit_bb_start(eb->request,
>                                         eb->batch->node.start +
>                                         eb->batch_start_offset,
> @@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
>         return 0;
>  }
>  
> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
> +{
> +       struct i915_execbuffer *eb = data;
> +
> +       if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
> +               return -EINVAL;
> +
> +       if (copy_from_user(&eb->extensions.perf_config, ext,
> +                          sizeof(eb->extensions.perf_config)))
> +               return -EFAULT;
> +
> +       eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
> +
> +       return 0;
> +}
> +
>  static const i915_user_extension_fn execbuf_extensions[] = {
>          [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> +        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
>  };
>  
>  static int
> @@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>                 }
>         }
>  
> -       err = eb_create(&eb);
> +       err = get_execbuf_oa_config(&eb);
>         if (err)
>                 goto err_out_fence;
>  
> +       err = eb_create(&eb);
> +       if (err)
> +               goto err_oa_config;
> +
>         GEM_BUG_ON(!eb.lut_size);
>  
>         err = eb_select_context(&eb);
> @@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>         if (unlikely(err))
>                 goto err_context;
>  
> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> +               struct i915_perf_stream *perf_stream =
> +                       eb.perf_file->private_data;
> +
> +               if (perf_stream->engine != eb.engine) {
> +                       err = -EINVAL;
> +                       goto err_engine;
> +               }
> +       }
> +
>         err = i915_mutex_lock_interruptible(dev);
>         if (err)
>                 goto err_engine;
> @@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>                 }
>         }
>  
> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
> +               eb.oa_vma = i915_vma_instance(eb.oa_bo,
> +                                             &eb.engine->i915->ggtt.vm, NULL);

eb.engine->gt->ggtt.vm

The i915_vma_instance() can be created outside of the struct_mutex, and
once we have the oa_vma, we don't need to keep a oa_bo pointer.

Later we can do the i915_vma_pin() before struct_mutex as well.

Thanks, that's looking a better with regard the plan to eliminate
struct_mutex.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream
  2019-08-30 14:47 ` [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
@ 2019-08-30 15:48   ` Chris Wilson
  2019-08-30 21:08     ` Lionel Landwerlin
  2019-09-02 11:17     ` Lionel Landwerlin
  2019-08-30 21:28   ` kbuild test robot
  2019-08-31  4:04   ` kbuild test robot
  2 siblings, 2 replies; 23+ messages in thread
From: Chris Wilson @ 2019-08-30 15:48 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-08-30 15:47:23)
>  err_unpin:
> -       __i915_vma_unpin(vma);
> +       mutex_lock(&i915->drm.struct_mutex);
> +       i915_vma_unpin_and_release(&vma, 0);
> +       mutex_unlock(&i915->drm.struct_mutex);

Strangely unpin_and_release() doesn't need the mutex. But I can clean
that up later.

>  
>  err_unref:
>         i915_gem_object_put(bo);
> @@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>         return ret;
>  }
>  
> -static void config_oa_regs(struct drm_i915_private *dev_priv,
> -                          const struct i915_oa_reg *regs,
> -                          u32 n_regs)
> +static int emit_oa_config(struct drm_i915_private *i915,
> +                         struct i915_perf_stream *stream)
>  {
> -       u32 i;
> +       struct i915_request *rq;
> +       struct i915_vma *vma;
> +       u32 *cs;
> +       int err;
>  
> -       for (i = 0; i < n_regs; i++) {
> -               const struct i915_oa_reg *reg = regs + i;
> +       lockdep_assert_held(&stream->config_mutex);
>  
> -               I915_WRITE(reg->addr, reg->value);
> +       rq = i915_request_create(stream->engine->kernel_context);
> +       if (IS_ERR(rq))
> +               return PTR_ERR(rq);
> +
> +       err = i915_active_request_set(&stream->active_config_rq,
> +                                     rq);
> +       if (err)
> +               goto err_add_request;
> +
> +       vma = i915_vma_instance(stream->initial_oa_config_bo,
> +                               &i915->ggtt.vm, NULL);

Safer with stream->engine->gt->ggtt.vm

> +       if (unlikely(IS_ERR(vma))) {
> +               err = PTR_ERR(vma);
> +               goto err_add_request;
>         }
> -}
>  
> -static void delay_after_mux(void)
> -{
> -       /*
> -        * It apparently takes a fairly long time for a new MUX
> -        * configuration to be be applied after these register writes.
> -        * This delay duration was derived empirically based on the
> -        * render_basic config but hopefully it covers the maximum
> -        * configuration latency.
> -        *
> -        * As a fallback, the checks in _append_oa_reports() to skip
> -        * invalid OA reports do also seem to work to discard reports
> -        * generated before this config has completed - albeit not
> -        * silently.
> -        *
> -        * Unfortunately this is essentially a magic number, since we
> -        * don't currently know of a reliable mechanism for predicting
> -        * how long the MUX config will take to apply and besides
> -        * seeing invalid reports we don't know of a reliable way to
> -        * explicitly check that the MUX config has landed.
> -        *
> -        * It's even possible we've miss characterized the underlying
> -        * problem - it just seems like the simplest explanation why
> -        * a delay at this location would mitigate any invalid reports.
> -        */
> -       usleep_range(15000, 20000);
> +       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
> +       if (err)
> +               goto err_add_request;

Don't pin inside a request. do the pining before i915_request_create().
One day, we may need to allocate a request to do the pin.

Be safe,

i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
(yes, we need a better wrapper here)
if (err)
> +       err = i915_vma_move_to_active(vma, rq, 0);
i915_vma_unlock(vma);
> +       if (err)
> +               goto err_vma_unpin;
> +


> @@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>         stream->ops = &i915_oa_stream_ops;
>         dev_priv->perf.exclusive_stream = stream;
>  
> +       mutex_lock(&stream->config_mutex);
>         ret = dev_priv->perf.ops.enable_metric_set(stream);
>         if (ret) {
>                 DRM_DEBUG("Unable to enable metric set\n");
> -               goto err_enable;
> +               /*
> +                * Ignore the return value since we already have an error from
> +                * the enable vfunc.
> +                */
> +               i915_active_request_retire(&stream->active_config_rq,
> +                                          &stream->config_mutex);
> +       } else {
> +               ret = i915_active_request_retire(&stream->active_config_rq,
> +                                                &stream->config_mutex);

This function doesn't exist anymore. It's basically just waiting for the
old request. Why do you need it? (Your request flow is otherwise ordered.)

> -       DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
> -
> +       mutex_unlock(&stream->config_mutex);
>         mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> +       i915_gem_object_put(stream->initial_oa_config_bo);
> +       stream->initial_oa_config_bo = NULL;
> +       if (ret)
> +               goto err_enable;

Is it because of this err that may end up freeing the stream? I would
expect a similar wait-before-free on stream destroy. In which case I
would have the active request hold a reference on the stream. (And you
might find it more convenient to use i915_active rather than the raw
i915_active_request. i915_active is geared to tracking multiple
timelines, so definitely overkill for you use case, just has more
utility/mid-layer! built in)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* ✗ Fi.CI.BUILD: failure for drm/i915: Vulkan performance query support (rev13)
  2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
                   ` (10 preceding siblings ...)
  2019-08-30 14:47 ` [PATCH v12 11/11] drm/i915: add support for perf configuration queries Lionel Landwerlin
@ 2019-08-30 16:23 ` Patchwork
  11 siblings, 0 replies; 23+ messages in thread
From: Patchwork @ 2019-08-30 16:23 UTC (permalink / raw)
  To: Zhou, David(ChunMing); +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Vulkan performance query support (rev13)
URL   : https://patchwork.freedesktop.org/series/60916/
State : failure

== Summary ==

CALL    scripts/checksyscalls.sh
  CALL    scripts/atomic/check-atomics.sh
  DESCEND  objtool
  CHK     include/generated/compile.h
  AR      drivers/gpu/drm/i915/built-in.a
  CC      drivers/gpu/drm/i915/i915_perf.h.s
In file included from <command-line>:0:0:
./drivers/gpu/drm/i915/i915_perf.h: In function ‘i915_oa_config_get’:
./drivers/gpu/drm/i915/i915_perf.h:47:21: error: dereferencing pointer to incomplete type ‘struct i915_oa_config’
  kref_get(&oa_config->ref);
                     ^~
scripts/Makefile.build:308: recipe for target 'drivers/gpu/drm/i915/i915_perf.h.s' failed
make[4]: *** [drivers/gpu/drm/i915/i915_perf.h.s] Error 1
scripts/Makefile.build:497: recipe for target 'drivers/gpu/drm/i915' failed
make[3]: *** [drivers/gpu/drm/i915] Error 2
scripts/Makefile.build:497: recipe for target 'drivers/gpu/drm' failed
make[2]: *** [drivers/gpu/drm] Error 2
scripts/Makefile.build:497: recipe for target 'drivers/gpu' failed
make[1]: *** [drivers/gpu] Error 2
Makefile:1083: recipe for target 'drivers' failed
make: *** [drivers] Error 2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 04/11] drm/i915/perf: store the associated engine of a stream
  2019-08-30 14:47 ` [PATCH v12 04/11] drm/i915/perf: store the associated engine of a stream Lionel Landwerlin
@ 2019-08-30 20:59   ` kbuild test robot
  0 siblings, 0 replies; 23+ messages in thread
From: kbuild test robot @ 2019-08-30 20:59 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 20641 bytes --]

Hi Lionel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[cannot apply to v5.3-rc6 next-20190830]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190831-033234
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
reproduce: make htmldocs

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   include/linux/netdevice.h:2040: warning: Function parameter or member 'xdp_prog' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'gro_flush_timeout' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'nf_hooks_ingress' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member '____cacheline_aligned_in_smp' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'qdisc_hash' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'xps_cpus_map' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'xps_rxqs_map' not described in 'net_device'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising' not described in 'phylink_link_state'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising' not described in 'phylink_link_state'
   drivers/net/phy/phylink.c:593: warning: Function parameter or member 'config' not described in 'phylink_create'
   drivers/net/phy/phylink.c:593: warning: Excess function parameter 'ndev' description in 'phylink_create'
   include/net/mac80211.h:2006: warning: Function parameter or member 'txpwr' not described in 'ieee80211_sta'
   Error: Cannot open file drivers/dma-buf/reservation.c
   Error: Cannot open file drivers/dma-buf/reservation.c
   Error: Cannot open file drivers/dma-buf/reservation.c
   Error: Cannot open file include/linux/reservation.h
   Error: Cannot open file include/linux/reservation.h
   mm/util.c:1: warning: 'get_user_pages_fast' not found
   mm/slab.c:4215: warning: Function parameter or member 'objp' not described in '__ksize'
   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c:335: warning: Excess function parameter 'dev' description in 'amdgpu_gem_prime_export'
   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c:336: warning: Excess function parameter 'dev' description in 'amdgpu_gem_prime_export'
   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c:142: warning: Function parameter or member 'blockable' not described in 'amdgpu_mn_read_lock'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:347: warning: cannot understand function prototype: 'struct amdgpu_vm_pt_cursor '
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:348: warning: cannot understand function prototype: 'struct amdgpu_vm_pt_cursor '
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:494: warning: Function parameter or member 'start' not described in 'amdgpu_vm_pt_first_dfs'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'adev' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'vm' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'start' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'cursor' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'entry' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:823: warning: Function parameter or member 'level' not described in 'amdgpu_vm_bo_param'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'params' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'bo' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'level' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'pe' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'addr' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'count' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'incr' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'flags' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:2823: warning: Function parameter or member 'pasid' not described in 'amdgpu_vm_make_compute'
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:378: warning: Excess function parameter 'entry' description in 'amdgpu_irq_dispatch'
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:379: warning: Function parameter or member 'ih' not described in 'amdgpu_irq_dispatch'
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:379: warning: Excess function parameter 'entry' description in 'amdgpu_irq_dispatch'
   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c:1: warning: no structured comments found
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1: warning: no structured comments found
   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c:1: warning: 'pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie' not found
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:132: warning: Incorrect use of kernel-doc format:          * @atomic_obj
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:238: warning: Incorrect use of kernel-doc format:          * gpu_info FW provided soc bounding box struct or 0 if not
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'atomic_obj' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'backlight_link' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'backlight_caps' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'freesync_module' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'fw_dmcu' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'dmcu_fw_version' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'soc_bounding_box' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:1: warning: 'register_hpd_handlers' not found
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:1: warning: 'dm_crtc_high_irq' not found
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:1: warning: 'dm_pflip_high_irq' not found
   include/drm/drm_modeset_helper_vtables.h:1053: warning: Function parameter or member 'prepare_writeback_job' not described in 'drm_connector_helper_funcs'
   include/drm/drm_modeset_helper_vtables.h:1053: warning: Function parameter or member 'cleanup_writeback_job' not described in 'drm_connector_helper_funcs'
   include/drm/drm_atomic_state_helper.h:1: warning: no structured comments found
   include/drm/drm_gem_shmem_helper.h:87: warning: Function parameter or member 'madv' not described in 'drm_gem_shmem_object'
   include/drm/drm_gem_shmem_helper.h:87: warning: Function parameter or member 'madv_list' not described in 'drm_gem_shmem_object'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Enum value 'DPLL_ID_TGL_MGPLL5' not described in enum 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Enum value 'DPLL_ID_TGL_MGPLL6' not described in enum 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Excess enum value 'DPLL_ID_TGL_TCPLL6' description in 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Excess enum value 'DPLL_ID_TGL_TCPLL5' description in 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:342: warning: Function parameter or member 'wakeref' not described in 'intel_shared_dpll'
   drivers/gpu/drm/i915/i915_drv.h:1135: warning: Incorrect use of kernel-doc format:          * The OA context specific information.
   drivers/gpu/drm/i915/i915_drv.h:1149: warning: Incorrect use of kernel-doc format:          * State of the OA buffer.
   drivers/gpu/drm/i915/i915_drv.h:1160: warning: Incorrect use of kernel-doc format:                  * Locks reads and writes to all head/tail state
   drivers/gpu/drm/i915/i915_drv.h:1182: warning: Incorrect use of kernel-doc format:                  * One 'aging' tail pointer and one 'aged' tail pointer ready to
   drivers/gpu/drm/i915/i915_drv.h:1194: warning: Incorrect use of kernel-doc format:                  * Index for the aged tail ready to read() data up to.
   drivers/gpu/drm/i915/i915_drv.h:1199: warning: Incorrect use of kernel-doc format:                  * A monotonic timestamp for when the current aging tail pointer
   drivers/gpu/drm/i915/i915_drv.h:1205: warning: Incorrect use of kernel-doc format:                  * Although we can always read back the head pointer register,
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'pinned_ctx' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'specific_ctx_id' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'specific_ctx_id_mask' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'poll_check_timer' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'poll_wq' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'pollin' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'periodic' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'period_exponent' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1213: warning: Function parameter or member 'oa_buffer' not described in 'i915_perf_stream'
   include/net/cfg80211.h:1092: warning: Function parameter or member 'txpwr' not described in 'station_parameters'
   include/net/mac80211.h:4043: warning: Function parameter or member 'sta_set_txpwr' not described in 'ieee80211_ops'
   drivers/gpu/drm/i915/i915_drv.h:1135: warning: Incorrect use of kernel-doc format:          * The OA context specific information.
   drivers/gpu/drm/i915/i915_drv.h:1149: warning: Incorrect use of kernel-doc format:          * State of the OA buffer.
   drivers/gpu/drm/i915/i915_drv.h:1160: warning: Incorrect use of kernel-doc format:                  * Locks reads and writes to all head/tail state
   drivers/gpu/drm/i915/i915_drv.h:1182: warning: Incorrect use of kernel-doc format:                  * One 'aging' tail pointer and one 'aged' tail pointer ready to
   drivers/gpu/drm/i915/i915_drv.h:1194: warning: Incorrect use of kernel-doc format:                  * Index for the aged tail ready to read() data up to.
   drivers/gpu/drm/i915/i915_drv.h:1199: warning: Incorrect use of kernel-doc format:                  * A monotonic timestamp for when the current aging tail pointer
   drivers/gpu/drm/i915/i915_drv.h:1205: warning: Incorrect use of kernel-doc format:                  * Although we can always read back the head pointer register,
   drivers/gpu/drm/i915/i915_drv.h:1135: warning: Incorrect use of kernel-doc format:          * The OA context specific information.
   drivers/gpu/drm/i915/i915_drv.h:1149: warning: Incorrect use of kernel-doc format:          * State of the OA buffer.
   drivers/gpu/drm/i915/i915_drv.h:1160: warning: Incorrect use of kernel-doc format:                  * Locks reads and writes to all head/tail state
   drivers/gpu/drm/i915/i915_drv.h:1182: warning: Incorrect use of kernel-doc format:                  * One 'aging' tail pointer and one 'aged' tail pointer ready to
   drivers/gpu/drm/i915/i915_drv.h:1194: warning: Incorrect use of kernel-doc format:                  * Index for the aged tail ready to read() data up to.
   drivers/gpu/drm/i915/i915_drv.h:1199: warning: Incorrect use of kernel-doc format:                  * A monotonic timestamp for when the current aging tail pointer
   drivers/gpu/drm/i915/i915_drv.h:1205: warning: Incorrect use of kernel-doc format:                  * Although we can always read back the head pointer register,
>> drivers/gpu/drm/i915/i915_perf.c:369: warning: Function parameter or member 'engine' not described in 'perf_open_properties'
   drivers/gpu/drm/mcde/mcde_drv.c:1: warning: 'ST-Ericsson MCDE DRM Driver' not found
   Documentation/admin-guide/xfs.rst:257: WARNING: Block quote ends without a blank line; unexpected unindent.
   include/uapi/linux/firewire-cdev.h:312: WARNING: Inline literal start-string without end-string.
   drivers/firewire/core-transaction.c:606: WARNING: Inline strong start-string without end-string.
   Documentation/trace/kprobetrace.rst:99: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/translations/it_IT/process/maintainer-pgp-guide.rst:458: WARNING: Unknown target name: "nitrokey pro".
   Documentation/security/keys/core.rst:1110: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1110: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1108: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1108: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1108: WARNING: Inline emphasis start-string without end-string.
   drivers/message/fusion/mptbase.c:5057: WARNING: Definition list ends without a blank line; unexpected unindent.
   drivers/tty/serial/serial_core.c:1964: WARNING: Definition list ends without a blank line; unexpected unindent.
   include/linux/i2c.h:522: WARNING: Inline strong start-string without end-string.
   include/linux/spi/spi.h:382: WARNING: Unexpected indentation.
   include/linux/regulator/driver.h:284: WARNING: Unknown target name: "regulator_regmap_x_voltage".
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/posix_acl.c:636: WARNING: Inline emphasis start-string without end-string.
   fs/debugfs/inode.c:399: WARNING: Inline literal start-string without end-string.
   fs/debugfs/inode.c:478: WARNING: Inline literal start-string without end-string.
   fs/debugfs/inode.c:510: WARNING: Inline literal start-string without end-string.
   fs/debugfs/inode.c:603: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:394: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:400: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:439: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:445: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:484: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:490: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:530: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:536: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:578: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:584: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:845: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:851: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:898: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:904: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:1090: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:1096: WARNING: Inline literal start-string without end-string.
   drivers/ata/libata-core.c:5945: WARNING: Unknown target name: "hw".
   include/linux/netdevice.h:3482: WARNING: Inline emphasis start-string without end-string.
   include/linux/netdevice.h:3482: WARNING: Inline emphasis start-string without end-string.
   net/core/dev.c:5008: WARNING: Unknown target name: "page_is".
   WARNING: kernel-doc 'scripts/kernel-doc -rst -enable-lineno -function Reservation Object Overview drivers/dma-buf/reservation.c' failed with return code 1
   WARNING: kernel-doc 'scripts/kernel-doc -rst -enable-lineno -export drivers/dma-buf/reservation.c' failed with return code 2
   WARNING: kernel-doc 'scripts/kernel-doc -rst -enable-lineno -internal include/linux/reservation.h' failed with return code 2
   Documentation/kbuild/makefiles.rst:1142: WARNING: Inline emphasis start-string without end-string.
   Documentation/kbuild/makefiles.rst:1152: WARNING: Inline emphasis start-string without end-string.
   Documentation/kbuild/makefiles.rst:1154: WARNING: Inline emphasis start-string without end-string.
   Documentation/index.rst:94: WARNING: toctree contains reference to nonexisting document 'virtual/index'
   include/linux/xarray.h:232: WARNING: Unexpected indentation.
   Documentation/crypto/crypto_engine.rst:2: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:245: WARNING: Unexpected indentation.
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:248: WARNING: Block quote ends without a blank line; unexpected unindent.
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:211: WARNING: Unexpected indentation.
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:222: WARNING: Unexpected indentation.
   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c:2199: WARNING: Inline emphasis start-string without end-string.
   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c:2201: WARNING: Inline emphasis start-string without end-string.
   Documentation/virt/index.rst: WARNING: document isn't included in any toctree
   include/linux/slab.h:500: WARNING: undefined label: memory-allocation (if the link has no caption the label must precede a section header)
   Documentation/gpu/drm-internals.rst:302: WARNING: Could not lex literal_block as "c". Highlighting skipped.
   WARNING: LaTeX command 'latex' cannot be run (needed for math display), check the imgmath_latex setting
   WARNING: LaTeX command 'latex' cannot be run (needed for math display), check the imgmath_latex setting
   Documentation/trace/kprobetrace.rst:68: WARNING: undefined label: user_mem_access (if the link has no caption the label must precede a section header)
   WARNING: LaTeX command 'latex' cannot be run (needed for math display), check the imgmath_latex setting

vim +369 drivers/gpu/drm/i915/i915_perf.c

eec688e1420da5 Robert Bragg 2016-11-07 @369  

:::::: The code at line 369 was first introduced by commit
:::::: eec688e1420da584afb36ffa5f0cad75f53cf286 drm/i915: Add i915 perf infrastructure

:::::: TO: Robert Bragg <robert@sixbynine.org>
:::::: CC: Daniel Vetter <daniel.vetter@ffwll.ch>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 7282 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter
  2019-08-30 15:32   ` Chris Wilson
@ 2019-08-30 21:03     ` Lionel Landwerlin
  0 siblings, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 21:03 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 30/08/2019 18:32, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-30 15:47:24)
>> +static int
>> +get_execbuf_oa_config(struct i915_execbuffer *eb)
>> +{
>> +       int err = 0;
>> +
>> +       eb->perf_file = NULL;
>> +       eb->oa_config = NULL;
>> +       eb->oa_vma = NULL;
>> +       eb->oa_bo = NULL;
>> +
>> +       if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0)
>> +               return 0;
>> +
>> +       eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
>> +       if (!eb->perf_file)
>> +               return -EINVAL;
>> +
>> +       err = i915_mutex_lock_interruptible(&eb->i915->drm);
>> +       if (err) {
>> +               fput(eb->perf_file);
>> +               eb->perf_file = NULL;
>> +               return err;
>> +       }
>> +
>> +       if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
>> +               err = -EINVAL;
>> +
>> +       mutex_unlock(&eb->i915->drm.struct_mutex);
> So why can i915->perf.execlusive_stream not change after this point?


It changes only when the last FD reference is dropped, so if we got it 
here there is at least one ref from the app doing the ioctl and we grab 
another ref on it which hold onto until we return.


>
>> +
>> +       if (!err) {
>> +               err = i915_perf_get_oa_config_and_bo(
>> +                       eb->i915->perf.exclusive_stream,
>> +                       eb->extensions.perf_config.oa_config,
>> +                       &eb->oa_config, &eb->oa_bo);
>> +       }
>> +
>> +       return err;
>> +}
>> +
>>   static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
>>                               struct i915_vma *vma,
>>                               unsigned int len)
>> @@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file *file)
>>          spin_unlock(&file_priv->mm.lock);
>>   }
>>   
>> +static int eb_oa_config(struct i915_execbuffer *eb)
>> +{
>> +       struct i915_perf_stream *perf_stream;
>> +       int err;
>> +
>> +       if (!eb->oa_config)
>> +               return 0;
>> +
>> +       perf_stream = eb->perf_file->private_data;
>> +
>> +       err = mutex_lock_interruptible(&perf_stream->config_mutex);
>> +       if (err)
>> +               return err;
>> +
>> +       err = i915_active_request_set(&perf_stream->active_config_rq,
>> +                                     eb->request);
>> +       if (err)
>> +               goto out;
>> +
>> +       /*
>> +        * If the config hasn't changed, skip reconfiguring the HW (this is
>> +        * subject to a delay we want to avoid has much as possible).
>> +        */
>> +       if (eb->oa_config == perf_stream->oa_config)
>> +               goto out;
>> +
> We need to wait for any exclusive fences in case we migrate this object
> in future:
>
> i915_vma_lock(eb->oa_vma);
> err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* await_resv already! */
> if (err = 0)
> 	err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
> i915_vma_unlock(eb->oa_vma);
>
> Though this raises an interesting point, we do not actually want to emit
> a semaphore here (albeit the engine is fixed atm) as we are after the
> point where we have declared all semaphore waits completed. (Not an
> issue to worry about right now!)


If we do that for this VMA, don't we have to do it for any?

If that's happening in a different function, I could put it there.


>
>> +       if (err)
>> +               goto out;
>> +
>> +       err = eb->engine->emit_bb_start(eb->request,
>> +                                       eb->oa_vma->node.start,
>> +                                       0, I915_DISPATCH_SECURE);
>> +       if (err)
>> +               goto out;
>> +
>> +       swap(eb->oa_config, perf_stream->oa_config);
>> +
>> +out:
>> +       mutex_unlock(&perf_stream->config_mutex);
>> +
>> +       return err;
>> +}
>> +
>>   static int eb_submit(struct i915_execbuffer *eb)
>>   {
>>          int err;
>> @@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb)
>>                          return err;
>>          }
>>   
>> +       err = eb_oa_config(eb);
>> +       if (err)
>> +               return err;
> Ok, definitely needs to be after the waits!
>
>> +
>>          err = eb->engine->emit_bb_start(eb->request,
>>                                          eb->batch->node.start +
>>                                          eb->batch_start_offset,
>> @@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d
>>          return 0;
>>   }
>>   
>> +static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
>> +{
>> +       struct i915_execbuffer *eb = data;
>> +
>> +       if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
>> +               return -EINVAL;
>> +
>> +       if (copy_from_user(&eb->extensions.perf_config, ext,
>> +                          sizeof(eb->extensions.perf_config)))
>> +               return -EFAULT;
>> +
>> +       eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
>> +
>> +       return 0;
>> +}
>> +
>>   static const i915_user_extension_fn execbuf_extensions[] = {
>>           [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>> +        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
>>   };
>>   
>>   static int
>> @@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>                  }
>>          }
>>   
>> -       err = eb_create(&eb);
>> +       err = get_execbuf_oa_config(&eb);
>>          if (err)
>>                  goto err_out_fence;
>>   
>> +       err = eb_create(&eb);
>> +       if (err)
>> +               goto err_oa_config;
>> +
>>          GEM_BUG_ON(!eb.lut_size);
>>   
>>          err = eb_select_context(&eb);
>> @@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>          if (unlikely(err))
>>                  goto err_context;
>>   
>> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
>> +               struct i915_perf_stream *perf_stream =
>> +                       eb.perf_file->private_data;
>> +
>> +               if (perf_stream->engine != eb.engine) {
>> +                       err = -EINVAL;
>> +                       goto err_engine;
>> +               }
>> +       }
>> +
>>          err = i915_mutex_lock_interruptible(dev);
>>          if (err)
>>                  goto err_engine;
>> @@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>>                  }
>>          }
>>   
>> +       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
>> +               eb.oa_vma = i915_vma_instance(eb.oa_bo,
>> +                                             &eb.engine->i915->ggtt.vm, NULL);
> eb.engine->gt->ggtt.vm


Thanks!


>
> The i915_vma_instance() can be created outside of the struct_mutex, and
> once we have the oa_vma, we don't need to keep a oa_bo pointer.
>
> Later we can do the i915_vma_pin() before struct_mutex as well.
>
> Thanks, that's looking a better with regard the plan to eliminate
> struct_mutex.
> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream
  2019-08-30 15:48   ` Chris Wilson
@ 2019-08-30 21:08     ` Lionel Landwerlin
  2019-09-02 11:17     ` Lionel Landwerlin
  1 sibling, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-08-30 21:08 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 30/08/2019 18:48, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-30 15:47:23)
>>   err_unpin:
>> -       __i915_vma_unpin(vma);
>> +       mutex_lock(&i915->drm.struct_mutex);
>> +       i915_vma_unpin_and_release(&vma, 0);
>> +       mutex_unlock(&i915->drm.struct_mutex);
> Strangely unpin_and_release() doesn't need the mutex. But I can clean
> that up later.
>
>>   
>>   err_unref:
>>          i915_gem_object_put(bo);
>> @@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>          return ret;
>>   }
>>   
>> -static void config_oa_regs(struct drm_i915_private *dev_priv,
>> -                          const struct i915_oa_reg *regs,
>> -                          u32 n_regs)
>> +static int emit_oa_config(struct drm_i915_private *i915,
>> +                         struct i915_perf_stream *stream)
>>   {
>> -       u32 i;
>> +       struct i915_request *rq;
>> +       struct i915_vma *vma;
>> +       u32 *cs;
>> +       int err;
>>   
>> -       for (i = 0; i < n_regs; i++) {
>> -               const struct i915_oa_reg *reg = regs + i;
>> +       lockdep_assert_held(&stream->config_mutex);
>>   
>> -               I915_WRITE(reg->addr, reg->value);
>> +       rq = i915_request_create(stream->engine->kernel_context);
>> +       if (IS_ERR(rq))
>> +               return PTR_ERR(rq);
>> +
>> +       err = i915_active_request_set(&stream->active_config_rq,
>> +                                     rq);
>> +       if (err)
>> +               goto err_add_request;
>> +
>> +       vma = i915_vma_instance(stream->initial_oa_config_bo,
>> +                               &i915->ggtt.vm, NULL);
> Safer with stream->engine->gt->ggtt.vm
>
>> +       if (unlikely(IS_ERR(vma))) {
>> +               err = PTR_ERR(vma);
>> +               goto err_add_request;
>>          }
>> -}
>>   
>> -static void delay_after_mux(void)
>> -{
>> -       /*
>> -        * It apparently takes a fairly long time for a new MUX
>> -        * configuration to be be applied after these register writes.
>> -        * This delay duration was derived empirically based on the
>> -        * render_basic config but hopefully it covers the maximum
>> -        * configuration latency.
>> -        *
>> -        * As a fallback, the checks in _append_oa_reports() to skip
>> -        * invalid OA reports do also seem to work to discard reports
>> -        * generated before this config has completed - albeit not
>> -        * silently.
>> -        *
>> -        * Unfortunately this is essentially a magic number, since we
>> -        * don't currently know of a reliable mechanism for predicting
>> -        * how long the MUX config will take to apply and besides
>> -        * seeing invalid reports we don't know of a reliable way to
>> -        * explicitly check that the MUX config has landed.
>> -        *
>> -        * It's even possible we've miss characterized the underlying
>> -        * problem - it just seems like the simplest explanation why
>> -        * a delay at this location would mitigate any invalid reports.
>> -        */
>> -       usleep_range(15000, 20000);
>> +       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
>> +       if (err)
>> +               goto err_add_request;
> Don't pin inside a request. do the pining before i915_request_create().
> One day, we may need to allocate a request to do the pin.
>
> Be safe,
>
> i915_vma_lock(vma);
> err = i915_request_await_object(rq, vma->obj, 0);
> (yes, we need a better wrapper here)
> if (err)
>> +       err = i915_vma_move_to_active(vma, rq, 0);
> i915_vma_unlock(vma);
>> +       if (err)
>> +               goto err_vma_unpin;
>> +
>
>> @@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>          stream->ops = &i915_oa_stream_ops;
>>          dev_priv->perf.exclusive_stream = stream;
>>   
>> +       mutex_lock(&stream->config_mutex);
>>          ret = dev_priv->perf.ops.enable_metric_set(stream);
>>          if (ret) {
>>                  DRM_DEBUG("Unable to enable metric set\n");
>> -               goto err_enable;
>> +               /*
>> +                * Ignore the return value since we already have an error from
>> +                * the enable vfunc.
>> +                */
>> +               i915_active_request_retire(&stream->active_config_rq,
>> +                                          &stream->config_mutex);
>> +       } else {
>> +               ret = i915_active_request_retire(&stream->active_config_rq,
>> +                                                &stream->config_mutex);
> This function doesn't exist anymore. It's basically just waiting for the
> old request. Why do you need it? (Your request flow is otherwise ordered.)
>
>> -       DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
>> -
>> +       mutex_unlock(&stream->config_mutex);
>>          mutex_unlock(&dev_priv->drm.struct_mutex);
>>   
>> +       i915_gem_object_put(stream->initial_oa_config_bo);
>> +       stream->initial_oa_config_bo = NULL;
>> +       if (ret)
>> +               goto err_enable;
> Is it because of this err that may end up freeing the stream? I would
> expect a similar wait-before-free on stream destroy.


There meant to be a wait-before-free at destroy. Looks like I screw up 
somewhere...


>   In which case I
> would have the active request hold a reference on the stream.


There is already a refcounting at the FD level.

I need to think about it.


>   (And you
> might find it more convenient to use i915_active rather than the raw
> i915_active_request. i915_active is geared to tracking multiple
> timelines, so definitely overkill for you use case, just has more
> utility/mid-layer! built in)
> -Chris
>
Thanks a lot,


-Lionel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream
  2019-08-30 14:47 ` [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
  2019-08-30 15:48   ` Chris Wilson
@ 2019-08-30 21:28   ` kbuild test robot
  2019-08-31  4:04   ` kbuild test robot
  2 siblings, 0 replies; 23+ messages in thread
From: kbuild test robot @ 2019-08-30 21:28 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 8369 bytes --]

Hi Lionel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[cannot apply to v5.3-rc6 next-20190830]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190831-033234
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-11) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/i915/i915_perf.c: In function 'i915_oa_stream_init':
>> drivers/gpu/drm/i915/i915_perf.c:2695:3: warning: ignoring return value of 'i915_active_request_retire', declared with attribute warn_unused_result [-Wunused-result]
      i915_active_request_retire(&stream->active_config_rq,
      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            &stream->config_mutex);
            ~~~~~~~~~~~~~~~~~~~~~~

vim +/i915_active_request_retire +2695 drivers/gpu/drm/i915/i915_perf.c

  2553	
  2554	/**
  2555	 * i915_oa_stream_init - validate combined props for OA stream and init
  2556	 * @stream: An i915 perf stream
  2557	 * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
  2558	 * @props: The property state that configures stream (individually validated)
  2559	 *
  2560	 * While read_properties_unlocked() validates properties in isolation it
  2561	 * doesn't ensure that the combination necessarily makes sense.
  2562	 *
  2563	 * At this point it has been determined that userspace wants a stream of
  2564	 * OA metrics, but still we need to further validate the combined
  2565	 * properties are OK.
  2566	 *
  2567	 * If the configuration makes sense then we can allocate memory for
  2568	 * a circular OA buffer and apply the requested metric set configuration.
  2569	 *
  2570	 * Returns: zero on success or a negative error code.
  2571	 */
  2572	static int i915_oa_stream_init(struct i915_perf_stream *stream,
  2573				       struct drm_i915_perf_open_param *param,
  2574				       struct perf_open_properties *props)
  2575	{
  2576		struct drm_i915_private *dev_priv = stream->dev_priv;
  2577		int format_size;
  2578		int ret;
  2579	
  2580		/* If the sysfs metrics/ directory wasn't registered for some
  2581		 * reason then don't let userspace try their luck with config
  2582		 * IDs
  2583		 */
  2584		if (!dev_priv->perf.metrics_kobj) {
  2585			DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
  2586			return -EINVAL;
  2587		}
  2588	
  2589		if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
  2590			DRM_DEBUG("Only OA report sampling supported\n");
  2591			return -EINVAL;
  2592		}
  2593	
  2594		if (!dev_priv->perf.ops.enable_metric_set) {
  2595			DRM_DEBUG("OA unit not supported\n");
  2596			return -ENODEV;
  2597		}
  2598	
  2599		/* To avoid the complexity of having to accurately filter
  2600		 * counter reports and marshal to the appropriate client
  2601		 * we currently only allow exclusive access
  2602		 */
  2603		if (dev_priv->perf.exclusive_stream) {
  2604			DRM_DEBUG("OA unit already in use\n");
  2605			return -EBUSY;
  2606		}
  2607	
  2608		if (!props->oa_format) {
  2609			DRM_DEBUG("OA report format not specified\n");
  2610			return -EINVAL;
  2611		}
  2612	
  2613		mutex_init(&stream->config_mutex);
  2614	
  2615		stream->sample_size = sizeof(struct drm_i915_perf_record_header);
  2616	
  2617		format_size = dev_priv->perf.oa_formats[props->oa_format].size;
  2618	
  2619		stream->engine = props->engine;
  2620	
  2621		mutex_init(&stream->config_mutex);
  2622		INIT_ACTIVE_REQUEST(&stream->active_config_rq,
  2623				    &stream->config_mutex);
  2624	
  2625		stream->sample_flags |= SAMPLE_OA_REPORT;
  2626		stream->sample_size += format_size;
  2627	
  2628		stream->oa_buffer.format_size = format_size;
  2629		if (WARN_ON(stream->oa_buffer.format_size == 0))
  2630			return -EINVAL;
  2631	
  2632		stream->oa_buffer.format =
  2633			dev_priv->perf.oa_formats[props->oa_format].format;
  2634	
  2635		stream->periodic = props->oa_periodic;
  2636		if (stream->periodic)
  2637			stream->period_exponent = props->oa_period_exponent;
  2638	
  2639		if (stream->ctx) {
  2640			ret = oa_get_render_ctx_id(stream);
  2641			if (ret) {
  2642				DRM_DEBUG("Invalid context id to filter with\n");
  2643				return ret;
  2644			}
  2645		}
  2646	
  2647		ret = alloc_noa_wait(stream);
  2648		if (ret) {
  2649			DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
  2650			goto err_noa_wait_alloc;
  2651		}
  2652	
  2653		ret = i915_perf_get_oa_config_and_bo(stream, props->metrics_set,
  2654						     &stream->oa_config,
  2655						     &stream->initial_oa_config_bo);
  2656		if (ret) {
  2657			DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
  2658			goto err_config;
  2659		}
  2660	
  2661		/* PRM - observability performance counters:
  2662		 *
  2663		 *   OACONTROL, performance counter enable, note:
  2664		 *
  2665		 *   "When this bit is set, in order to have coherent counts,
  2666		 *   RC6 power state and trunk clock gating must be disabled.
  2667		 *   This can be achieved by programming MMIO registers as
  2668		 *   0xA094=0 and 0xA090[31]=1"
  2669		 *
  2670		 *   In our case we are expecting that taking pm + FORCEWAKE
  2671		 *   references will effectively disable RC6.
  2672		 */
  2673		stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
  2674		intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
  2675	
  2676		ret = alloc_oa_buffer(stream);
  2677		if (ret)
  2678			goto err_oa_buf_alloc;
  2679	
  2680		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
  2681		if (ret)
  2682			goto err_lock;
  2683	
  2684		stream->ops = &i915_oa_stream_ops;
  2685		dev_priv->perf.exclusive_stream = stream;
  2686	
  2687		mutex_lock(&stream->config_mutex);
  2688		ret = dev_priv->perf.ops.enable_metric_set(stream);
  2689		if (ret) {
  2690			DRM_DEBUG("Unable to enable metric set\n");
  2691			/*
  2692			 * Ignore the return value since we already have an error from
  2693			 * the enable vfunc.
  2694			 */
> 2695			i915_active_request_retire(&stream->active_config_rq,
  2696						   &stream->config_mutex);
  2697		} else {
  2698			ret = i915_active_request_retire(&stream->active_config_rq,
  2699							 &stream->config_mutex);
  2700		}
  2701	
  2702		mutex_unlock(&stream->config_mutex);
  2703		mutex_unlock(&dev_priv->drm.struct_mutex);
  2704	
  2705		i915_gem_object_put(stream->initial_oa_config_bo);
  2706		stream->initial_oa_config_bo = NULL;
  2707		if (ret)
  2708			goto err_enable;
  2709	
  2710		DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
  2711	
  2712		hrtimer_init(&stream->poll_check_timer,
  2713			     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  2714		stream->poll_check_timer.function = oa_poll_check_timer_cb;
  2715		init_waitqueue_head(&stream->poll_wq);
  2716		spin_lock_init(&stream->oa_buffer.ptr_lock);
  2717	
  2718		return 0;
  2719	
  2720	err_enable:
  2721		mutex_lock(&dev_priv->drm.struct_mutex);
  2722		mutex_lock(&stream->config_mutex);
  2723		dev_priv->perf.ops.disable_metric_set(stream);
  2724		mutex_unlock(&stream->config_mutex);
  2725		dev_priv->perf.exclusive_stream = NULL;
  2726		mutex_unlock(&dev_priv->drm.struct_mutex);
  2727	
  2728	err_lock:
  2729		free_oa_buffer(stream);
  2730	
  2731	err_oa_buf_alloc:
  2732		i915_oa_config_put(stream->oa_config);
  2733	
  2734		intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
  2735		intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
  2736	
  2737		free_oa_configs(stream);
  2738	
  2739		i915_gem_object_put(stream->initial_oa_config_bo);
  2740	
  2741	err_config:
  2742		free_noa_wait(stream);
  2743	
  2744	err_noa_wait_alloc:
  2745		if (stream->ctx)
  2746			oa_put_render_ctx_id(stream);
  2747	
  2748		return ret;
  2749	}
  2750	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 69533 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 06/11] drm/i915/perf: allow for CS OA configs to be created lazily
  2019-08-30 14:47 ` [PATCH v12 06/11] drm/i915/perf: allow for CS OA configs to be created lazily Lionel Landwerlin
@ 2019-08-30 21:39   ` kbuild test robot
  0 siblings, 0 replies; 23+ messages in thread
From: kbuild test robot @ 2019-08-30 21:39 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 35859 bytes --]

Hi Lionel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[cannot apply to v5.3-rc6 next-20190830]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190831-033234
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
reproduce: make htmldocs

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   include/net/sock.h:233: warning: Function parameter or member 'skc_v6_daddr' not described in 'sock_common'
   include/net/sock.h:233: warning: Function parameter or member 'skc_v6_rcv_saddr' not described in 'sock_common'
   include/net/sock.h:233: warning: Function parameter or member 'skc_cookie' not described in 'sock_common'
   include/net/sock.h:233: warning: Function parameter or member 'skc_listener' not described in 'sock_common'
   include/net/sock.h:233: warning: Function parameter or member 'skc_tw_dr' not described in 'sock_common'
   include/net/sock.h:233: warning: Function parameter or member 'skc_rcv_wnd' not described in 'sock_common'
   include/net/sock.h:233: warning: Function parameter or member 'skc_tw_rcv_nxt' not described in 'sock_common'
   include/net/sock.h:515: warning: Function parameter or member 'sk_rx_skb_cache' not described in 'sock'
   include/net/sock.h:515: warning: Function parameter or member 'sk_wq_raw' not described in 'sock'
   include/net/sock.h:515: warning: Function parameter or member 'tcp_rtx_queue' not described in 'sock'
   include/net/sock.h:515: warning: Function parameter or member 'sk_tx_skb_cache' not described in 'sock'
   include/net/sock.h:515: warning: Function parameter or member 'sk_route_forced_caps' not described in 'sock'
   include/net/sock.h:515: warning: Function parameter or member 'sk_txtime_report_errors' not described in 'sock'
   include/net/sock.h:515: warning: Function parameter or member 'sk_validate_xmit_skb' not described in 'sock'
   include/net/sock.h:515: warning: Function parameter or member 'sk_bpf_storage' not described in 'sock'
   include/net/sock.h:2439: warning: Function parameter or member 'tcp_rx_skb_cache_key' not described in 'DECLARE_STATIC_KEY_FALSE'
   include/net/sock.h:2439: warning: Excess function parameter 'sk' description in 'DECLARE_STATIC_KEY_FALSE'
   include/net/sock.h:2439: warning: Excess function parameter 'skb' description in 'DECLARE_STATIC_KEY_FALSE'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'gso_partial_features' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'l3mdev_ops' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'xfrmdev_ops' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'tlsdev_ops' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'name_assign_type' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'ieee802154_ptr' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'mpls_ptr' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'xdp_prog' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'gro_flush_timeout' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'nf_hooks_ingress' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member '____cacheline_aligned_in_smp' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'qdisc_hash' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'xps_cpus_map' not described in 'net_device'
   include/linux/netdevice.h:2040: warning: Function parameter or member 'xps_rxqs_map' not described in 'net_device'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising' not described in 'phylink_link_state'
   include/linux/phylink.h:56: warning: Function parameter or member '__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising' not described in 'phylink_link_state'
   drivers/net/phy/phylink.c:593: warning: Function parameter or member 'config' not described in 'phylink_create'
   drivers/net/phy/phylink.c:593: warning: Excess function parameter 'ndev' description in 'phylink_create'
   include/net/mac80211.h:2006: warning: Function parameter or member 'txpwr' not described in 'ieee80211_sta'
   Error: Cannot open file drivers/dma-buf/reservation.c
   Error: Cannot open file drivers/dma-buf/reservation.c
   Error: Cannot open file drivers/dma-buf/reservation.c
   Error: Cannot open file include/linux/reservation.h
   Error: Cannot open file include/linux/reservation.h
   mm/util.c:1: warning: 'get_user_pages_fast' not found
   mm/slab.c:4215: warning: Function parameter or member 'objp' not described in '__ksize'
   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c:335: warning: Excess function parameter 'dev' description in 'amdgpu_gem_prime_export'
   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c:336: warning: Excess function parameter 'dev' description in 'amdgpu_gem_prime_export'
   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c:142: warning: Function parameter or member 'blockable' not described in 'amdgpu_mn_read_lock'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:347: warning: cannot understand function prototype: 'struct amdgpu_vm_pt_cursor '
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:348: warning: cannot understand function prototype: 'struct amdgpu_vm_pt_cursor '
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:494: warning: Function parameter or member 'start' not described in 'amdgpu_vm_pt_first_dfs'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'adev' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'vm' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'start' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'cursor' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:546: warning: Function parameter or member 'entry' not described in 'for_each_amdgpu_vm_pt_dfs_safe'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:823: warning: Function parameter or member 'level' not described in 'amdgpu_vm_bo_param'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'params' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'bo' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'level' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'pe' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'addr' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'count' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'incr' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:1285: warning: Function parameter or member 'flags' not described in 'amdgpu_vm_update_flags'
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c:2823: warning: Function parameter or member 'pasid' not described in 'amdgpu_vm_make_compute'
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:378: warning: Excess function parameter 'entry' description in 'amdgpu_irq_dispatch'
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:379: warning: Function parameter or member 'ih' not described in 'amdgpu_irq_dispatch'
   drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c:379: warning: Excess function parameter 'entry' description in 'amdgpu_irq_dispatch'
   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c:1: warning: no structured comments found
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:1: warning: no structured comments found
   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c:1: warning: 'pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie' not found
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:132: warning: Incorrect use of kernel-doc format:          * @atomic_obj
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:238: warning: Incorrect use of kernel-doc format:          * gpu_info FW provided soc bounding box struct or 0 if not
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'atomic_obj' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'backlight_link' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'backlight_caps' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'freesync_module' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'fw_dmcu' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'dmcu_fw_version' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h:243: warning: Function parameter or member 'soc_bounding_box' not described in 'amdgpu_display_manager'
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:1: warning: 'dm_crtc_high_irq' not found
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:1: warning: 'register_hpd_handlers' not found
   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c:1: warning: 'dm_pflip_high_irq' not found
   include/drm/drm_modeset_helper_vtables.h:1053: warning: Function parameter or member 'prepare_writeback_job' not described in 'drm_connector_helper_funcs'
   include/drm/drm_modeset_helper_vtables.h:1053: warning: Function parameter or member 'cleanup_writeback_job' not described in 'drm_connector_helper_funcs'
   include/drm/drm_atomic_state_helper.h:1: warning: no structured comments found
   include/drm/drm_gem_shmem_helper.h:87: warning: Function parameter or member 'madv' not described in 'drm_gem_shmem_object'
   include/drm/drm_gem_shmem_helper.h:87: warning: Function parameter or member 'madv_list' not described in 'drm_gem_shmem_object'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Enum value 'DPLL_ID_TGL_MGPLL5' not described in enum 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Enum value 'DPLL_ID_TGL_MGPLL6' not described in enum 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Excess enum value 'DPLL_ID_TGL_TCPLL5' description in 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:158: warning: Excess enum value 'DPLL_ID_TGL_TCPLL6' description in 'intel_dpll_id'
   drivers/gpu/drm/i915/display/intel_dpll_mgr.h:342: warning: Function parameter or member 'wakeref' not described in 'intel_shared_dpll'
   drivers/gpu/drm/i915/i915_drv.h:1148: warning: Incorrect use of kernel-doc format:          * The OA context specific information.
   drivers/gpu/drm/i915/i915_drv.h:1162: warning: Incorrect use of kernel-doc format:          * State of the OA buffer.
   drivers/gpu/drm/i915/i915_drv.h:1173: warning: Incorrect use of kernel-doc format:                  * Locks reads and writes to all head/tail state
   drivers/gpu/drm/i915/i915_drv.h:1195: warning: Incorrect use of kernel-doc format:                  * One 'aging' tail pointer and one 'aged' tail pointer ready to
   drivers/gpu/drm/i915/i915_drv.h:1207: warning: Incorrect use of kernel-doc format:                  * Index for the aged tail ready to read() data up to.
   drivers/gpu/drm/i915/i915_drv.h:1212: warning: Incorrect use of kernel-doc format:                  * A monotonic timestamp for when the current aging tail pointer
   drivers/gpu/drm/i915/i915_drv.h:1218: warning: Incorrect use of kernel-doc format:                  * Although we can always read back the head pointer register,
>> drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'config_mutex' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'pinned_ctx' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'specific_ctx_id' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'specific_ctx_id_mask' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'poll_check_timer' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'poll_wq' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'pollin' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'periodic' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'period_exponent' not described in 'i915_perf_stream'
   drivers/gpu/drm/i915/i915_drv.h:1226: warning: Function parameter or member 'oa_buffer' not described in 'i915_perf_stream'
   include/net/cfg80211.h:1092: warning: Function parameter or member 'txpwr' not described in 'station_parameters'
   include/net/mac80211.h:4043: warning: Function parameter or member 'sta_set_txpwr' not described in 'ieee80211_ops'
   drivers/gpu/drm/i915/i915_drv.h:1148: warning: Incorrect use of kernel-doc format:          * The OA context specific information.
   drivers/gpu/drm/i915/i915_drv.h:1162: warning: Incorrect use of kernel-doc format:          * State of the OA buffer.
   drivers/gpu/drm/i915/i915_drv.h:1173: warning: Incorrect use of kernel-doc format:                  * Locks reads and writes to all head/tail state
   drivers/gpu/drm/i915/i915_drv.h:1195: warning: Incorrect use of kernel-doc format:                  * One 'aging' tail pointer and one 'aged' tail pointer ready to
   drivers/gpu/drm/i915/i915_drv.h:1207: warning: Incorrect use of kernel-doc format:                  * Index for the aged tail ready to read() data up to.
   drivers/gpu/drm/i915/i915_drv.h:1212: warning: Incorrect use of kernel-doc format:                  * A monotonic timestamp for when the current aging tail pointer
   drivers/gpu/drm/i915/i915_drv.h:1218: warning: Incorrect use of kernel-doc format:                  * Although we can always read back the head pointer register,
   drivers/gpu/drm/i915/i915_drv.h:1148: warning: Incorrect use of kernel-doc format:          * The OA context specific information.
   drivers/gpu/drm/i915/i915_drv.h:1162: warning: Incorrect use of kernel-doc format:          * State of the OA buffer.
   drivers/gpu/drm/i915/i915_drv.h:1173: warning: Incorrect use of kernel-doc format:                  * Locks reads and writes to all head/tail state
   drivers/gpu/drm/i915/i915_drv.h:1195: warning: Incorrect use of kernel-doc format:                  * One 'aging' tail pointer and one 'aged' tail pointer ready to
   drivers/gpu/drm/i915/i915_drv.h:1207: warning: Incorrect use of kernel-doc format:                  * Index for the aged tail ready to read() data up to.
   drivers/gpu/drm/i915/i915_drv.h:1212: warning: Incorrect use of kernel-doc format:                  * A monotonic timestamp for when the current aging tail pointer
   drivers/gpu/drm/i915/i915_drv.h:1218: warning: Incorrect use of kernel-doc format:                  * Although we can always read back the head pointer register,
   drivers/gpu/drm/i915/i915_perf.c:369: warning: Function parameter or member 'engine' not described in 'perf_open_properties'
   drivers/gpu/drm/mcde/mcde_drv.c:1: warning: 'ST-Ericsson MCDE DRM Driver' not found
   Documentation/admin-guide/xfs.rst:257: WARNING: Block quote ends without a blank line; unexpected unindent.
   include/uapi/linux/firewire-cdev.h:312: WARNING: Inline literal start-string without end-string.
   drivers/firewire/core-transaction.c:606: WARNING: Inline strong start-string without end-string.
   Documentation/trace/kprobetrace.rst:99: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   Documentation/translations/it_IT/process/maintainer-pgp-guide.rst:458: WARNING: Unknown target name: "nitrokey pro".
   Documentation/security/keys/core.rst:1110: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1110: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1108: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1108: WARNING: Inline emphasis start-string without end-string.
   Documentation/security/keys/core.rst:1108: WARNING: Inline emphasis start-string without end-string.
   drivers/message/fusion/mptbase.c:5057: WARNING: Definition list ends without a blank line; unexpected unindent.
   drivers/tty/serial/serial_core.c:1964: WARNING: Definition list ends without a blank line; unexpected unindent.
   include/linux/i2c.h:522: WARNING: Inline strong start-string without end-string.
   include/linux/regulator/driver.h:284: WARNING: Unknown target name: "regulator_regmap_x_voltage".
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/seq_file.c:40: WARNING: Inline strong start-string without end-string.
   fs/posix_acl.c:636: WARNING: Inline emphasis start-string without end-string.
   fs/debugfs/inode.c:399: WARNING: Inline literal start-string without end-string.
   fs/debugfs/inode.c:478: WARNING: Inline literal start-string without end-string.
   fs/debugfs/inode.c:510: WARNING: Inline literal start-string without end-string.
   fs/debugfs/inode.c:603: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:394: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:400: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:439: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:445: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:484: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:490: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:530: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:536: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:578: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:584: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:845: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:851: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:898: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:904: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:1090: WARNING: Inline literal start-string without end-string.
   fs/debugfs/file.c:1096: WARNING: Inline literal start-string without end-string.
   drivers/ata/libata-core.c:5945: WARNING: Unknown target name: "hw".
   include/linux/spi/spi.h:382: WARNING: Unexpected indentation.
   include/linux/netdevice.h:3482: WARNING: Inline emphasis start-string without end-string.
   include/linux/netdevice.h:3482: WARNING: Inline emphasis start-string without end-string.
   net/core/dev.c:5008: WARNING: Unknown target name: "page_is".
   Documentation/kbuild/makefiles.rst:1142: WARNING: Inline emphasis start-string without end-string.
   Documentation/kbuild/makefiles.rst:1152: WARNING: Inline emphasis start-string without end-string.
   Documentation/kbuild/makefiles.rst:1154: WARNING: Inline emphasis start-string without end-string.
   WARNING: kernel-doc 'scripts/kernel-doc -rst -enable-lineno -function Reservation Object Overview drivers/dma-buf/reservation.c' failed with return code 1
   WARNING: kernel-doc 'scripts/kernel-doc -rst -enable-lineno -export drivers/dma-buf/reservation.c' failed with return code 2
   WARNING: kernel-doc 'scripts/kernel-doc -rst -enable-lineno -internal include/linux/reservation.h' failed with return code 2
   Documentation/index.rst:94: WARNING: toctree contains reference to nonexisting document 'virtual/index'
   include/linux/xarray.h:232: WARNING: Unexpected indentation.
   Documentation/crypto/crypto_engine.rst:2: WARNING: Explicit markup ends without a blank line; unexpected unindent.
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:245: WARNING: Unexpected indentation.
   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c:248: WARNING: Block quote ends without a blank line; unexpected unindent.
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:211: WARNING: Unexpected indentation.
   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:222: WARNING: Unexpected indentation.
   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c:2199: WARNING: Inline emphasis start-string without end-string.
   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c:2201: WARNING: Inline emphasis start-string without end-string.
   Documentation/virt/index.rst: WARNING: document isn't included in any toctree
   include/linux/slab.h:500: WARNING: undefined label: memory-allocation (if the link has no caption the label must precede a section header)
   Documentation/gpu/drm-internals.rst:302: WARNING: Could not lex literal_block as "c". Highlighting skipped.
   WARNING: LaTeX command 'latex' cannot be run (needed for math display), check the imgmath_latex setting
   WARNING: LaTeX command 'latex' cannot be run (needed for math display), check the imgmath_latex setting
   Documentation/trace/kprobetrace.rst:68: WARNING: undefined label: user_mem_access (if the link has no caption the label must precede a section header)
   WARNING: LaTeX command 'latex' cannot be run (needed for math display), check the imgmath_latex setting

vim +1226 drivers/gpu/drm/i915/i915_drv.h

eec688e1420da5 Robert Bragg          2016-11-07  1077  
16d98b31f80775 Robert Bragg          2016-12-07  1078  /**
16d98b31f80775 Robert Bragg          2016-12-07  1079   * struct i915_perf_stream - state for a single open stream FD
16d98b31f80775 Robert Bragg          2016-12-07  1080   */
eec688e1420da5 Robert Bragg          2016-11-07  1081  struct i915_perf_stream {
16d98b31f80775 Robert Bragg          2016-12-07  1082  	/**
16d98b31f80775 Robert Bragg          2016-12-07  1083  	 * @dev_priv: i915 drm device
16d98b31f80775 Robert Bragg          2016-12-07  1084  	 */
eec688e1420da5 Robert Bragg          2016-11-07  1085  	struct drm_i915_private *dev_priv;
eec688e1420da5 Robert Bragg          2016-11-07  1086  
16d98b31f80775 Robert Bragg          2016-12-07  1087  	/**
6d2438c8233bd0 Chris Wilson          2019-01-15  1088  	 * @wakeref: As we keep the device awake while the perf stream is
6d2438c8233bd0 Chris Wilson          2019-01-15  1089  	 * active, we track our runtime pm reference for later release.
6d2438c8233bd0 Chris Wilson          2019-01-15  1090  	 */
6619c0075f784d Chris Wilson          2019-01-14  1091  	intel_wakeref_t wakeref;
6619c0075f784d Chris Wilson          2019-01-14  1092  
16d98b31f80775 Robert Bragg          2016-12-07  1093  	/**
b078378630a2d3 Lionel Landwerlin     2019-08-30  1094  	 * @engine: Engine associated with this performance stream.
b078378630a2d3 Lionel Landwerlin     2019-08-30  1095  	 */
b078378630a2d3 Lionel Landwerlin     2019-08-30  1096  	struct intel_engine_cs *engine;
b078378630a2d3 Lionel Landwerlin     2019-08-30  1097  
b078378630a2d3 Lionel Landwerlin     2019-08-30  1098  	/**
16d98b31f80775 Robert Bragg          2016-12-07  1099  	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
16d98b31f80775 Robert Bragg          2016-12-07  1100  	 * properties given when opening a stream, representing the contents
16d98b31f80775 Robert Bragg          2016-12-07  1101  	 * of a single sample as read() by userspace.
16d98b31f80775 Robert Bragg          2016-12-07  1102  	 */
eec688e1420da5 Robert Bragg          2016-11-07  1103  	u32 sample_flags;
16d98b31f80775 Robert Bragg          2016-12-07  1104  
16d98b31f80775 Robert Bragg          2016-12-07  1105  	/**
16d98b31f80775 Robert Bragg          2016-12-07  1106  	 * @sample_size: Considering the configured contents of a sample
16d98b31f80775 Robert Bragg          2016-12-07  1107  	 * combined with the required header size, this is the total size
16d98b31f80775 Robert Bragg          2016-12-07  1108  	 * of a single sample record.
16d98b31f80775 Robert Bragg          2016-12-07  1109  	 */
d79651522e89c4 Robert Bragg          2016-11-07  1110  	int sample_size;
eec688e1420da5 Robert Bragg          2016-11-07  1111  
16d98b31f80775 Robert Bragg          2016-12-07  1112  	/**
16d98b31f80775 Robert Bragg          2016-12-07  1113  	 * @ctx: %NULL if measuring system-wide across all contexts or a
16d98b31f80775 Robert Bragg          2016-12-07  1114  	 * specific context that is being monitored.
16d98b31f80775 Robert Bragg          2016-12-07  1115  	 */
eec688e1420da5 Robert Bragg          2016-11-07  1116  	struct i915_gem_context *ctx;
16d98b31f80775 Robert Bragg          2016-12-07  1117  
16d98b31f80775 Robert Bragg          2016-12-07  1118  	/**
16d98b31f80775 Robert Bragg          2016-12-07  1119  	 * @enabled: Whether the stream is currently enabled, considering
16d98b31f80775 Robert Bragg          2016-12-07  1120  	 * whether the stream was opened in a disabled state and based
16d98b31f80775 Robert Bragg          2016-12-07  1121  	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
16d98b31f80775 Robert Bragg          2016-12-07  1122  	 */
eec688e1420da5 Robert Bragg          2016-11-07  1123  	bool enabled;
eec688e1420da5 Robert Bragg          2016-11-07  1124  
16d98b31f80775 Robert Bragg          2016-12-07  1125  	/**
16d98b31f80775 Robert Bragg          2016-12-07  1126  	 * @ops: The callbacks providing the implementation of this specific
16d98b31f80775 Robert Bragg          2016-12-07  1127  	 * type of configured stream.
16d98b31f80775 Robert Bragg          2016-12-07  1128  	 */
d79651522e89c4 Robert Bragg          2016-11-07  1129  	const struct i915_perf_stream_ops *ops;
701f8231a2fe17 Lionel Landwerlin     2017-08-03  1130  
701f8231a2fe17 Lionel Landwerlin     2017-08-03  1131  	/**
288323a55c115b Lionel Landwerlin     2019-08-30  1132  	 * @active_config_mutex: Protects access to @oa_config & @oa_config_bos.
288323a55c115b Lionel Landwerlin     2019-08-30  1133  	 */
288323a55c115b Lionel Landwerlin     2019-08-30  1134  	struct mutex config_mutex;
288323a55c115b Lionel Landwerlin     2019-08-30  1135  
288323a55c115b Lionel Landwerlin     2019-08-30  1136  	/**
701f8231a2fe17 Lionel Landwerlin     2017-08-03  1137  	 * @oa_config: The OA configuration used by the stream.
701f8231a2fe17 Lionel Landwerlin     2017-08-03  1138  	 */
701f8231a2fe17 Lionel Landwerlin     2017-08-03  1139  	struct i915_oa_config *oa_config;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1140  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1141  	/**
288323a55c115b Lionel Landwerlin     2019-08-30  1142  	 * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily
288323a55c115b Lionel Landwerlin     2019-08-30  1143  	 * each time @oa_config changes.
288323a55c115b Lionel Landwerlin     2019-08-30  1144  	 */
288323a55c115b Lionel Landwerlin     2019-08-30  1145  	struct list_head oa_config_bos;
288323a55c115b Lionel Landwerlin     2019-08-30  1146  
288323a55c115b Lionel Landwerlin     2019-08-30  1147  	/**
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06 @1148  	 * The OA context specific information.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1149  	 */
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1150  	struct intel_context *pinned_ctx;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1151  	u32 specific_ctx_id;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1152  	u32 specific_ctx_id_mask;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1153  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1154  	struct hrtimer poll_check_timer;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1155  	wait_queue_head_t poll_wq;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1156  	bool pollin;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1157  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1158  	bool periodic;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1159  	int period_exponent;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1160  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1161  	/**
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1162  	 * State of the OA buffer.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1163  	 */
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1164  	struct {
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1165  		struct i915_vma *vma;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1166  		u8 *vaddr;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1167  		u32 last_ctx_id;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1168  		int format;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1169  		int format_size;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1170  		int size_exponent;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1171  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1172  		/**
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1173  		 * Locks reads and writes to all head/tail state
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1174  		 *
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1175  		 * Consider: the head and tail pointer state needs to be read
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1176  		 * consistently from a hrtimer callback (atomic context) and
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1177  		 * read() fop (user context) with tail pointer updates happening
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1178  		 * in atomic context and head updates in user context and the
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1179  		 * (unlikely) possibility of read() errors needing to reset all
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1180  		 * head/tail state.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1181  		 *
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1182  		 * Note: Contention/performance aren't currently a significant
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1183  		 * concern here considering the relatively low frequency of
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1184  		 * hrtimer callbacks (5ms period) and that reads typically only
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1185  		 * happen in response to a hrtimer event and likely complete
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1186  		 * before the next callback.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1187  		 *
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1188  		 * Note: This lock is not held *while* reading and copying data
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1189  		 * to userspace so the value of head observed in htrimer
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1190  		 * callbacks won't represent any partial consumption of data.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1191  		 */
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1192  		spinlock_t ptr_lock;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1193  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1194  		/**
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1195  		 * One 'aging' tail pointer and one 'aged' tail pointer ready to
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1196  		 * used for reading.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1197  		 *
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1198  		 * Initial values of 0xffffffff are invalid and imply that an
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1199  		 * update is required (and should be ignored by an attempted
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1200  		 * read)
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1201  		 */
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1202  		struct {
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1203  			u32 offset;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1204  		} tails[2];
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1205  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1206  		/**
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1207  		 * Index for the aged tail ready to read() data up to.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1208  		 */
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1209  		unsigned int aged_tail_idx;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1210  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1211  		/**
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1212  		 * A monotonic timestamp for when the current aging tail pointer
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1213  		 * was read; used to determine when it is old enough to trust.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1214  		 */
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1215  		u64 aging_timestamp;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1216  
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1217  		/**
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1218  		 * Although we can always read back the head pointer register,
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1219  		 * we prefer to avoid trusting the HW state, just to avoid any
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1220  		 * risk that some hardware condition could * somehow bump the
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1221  		 * head pointer unpredictably and cause us to forward the wrong
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1222  		 * OA buffer data to userspace.
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1223  		 */
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1224  		u32 head;
a37f08a882b01a Umesh Nerlige Ramappa 2019-08-06  1225  	} oa_buffer;
d79651522e89c4 Robert Bragg          2016-11-07 @1226  };
d79651522e89c4 Robert Bragg          2016-11-07  1227  

:::::: The code at line 1226 was first introduced by commit
:::::: d79651522e89c4ffa8992b48dfe449f0c583f809 drm/i915: Enable i915 perf stream for Haswell OA unit

:::::: TO: Robert Bragg <robert@sixbynine.org>
:::::: CC: Daniel Vetter <daniel.vetter@ffwll.ch>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 7282 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream
  2019-08-30 14:47 ` [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
  2019-08-30 15:48   ` Chris Wilson
  2019-08-30 21:28   ` kbuild test robot
@ 2019-08-31  4:04   ` kbuild test robot
  2 siblings, 0 replies; 23+ messages in thread
From: kbuild test robot @ 2019-08-31  4:04 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx, kbuild-all

[-- Attachment #1: Type: text/plain, Size: 8420 bytes --]

Hi Lionel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[cannot apply to v5.3-rc6 next-20190830]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190831-033234
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-h002-201934 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-11) 7.4.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/i915_perf.c: In function 'i915_oa_stream_init':
>> drivers/gpu/drm/i915/i915_perf.c:2695:3: error: ignoring return value of 'i915_active_request_retire', declared with attribute warn_unused_result [-Werror=unused-result]
      i915_active_request_retire(&stream->active_config_rq,
      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            &stream->config_mutex);
            ~~~~~~~~~~~~~~~~~~~~~~
   cc1: all warnings being treated as errors

vim +/i915_active_request_retire +2695 drivers/gpu/drm/i915/i915_perf.c

  2553	
  2554	/**
  2555	 * i915_oa_stream_init - validate combined props for OA stream and init
  2556	 * @stream: An i915 perf stream
  2557	 * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
  2558	 * @props: The property state that configures stream (individually validated)
  2559	 *
  2560	 * While read_properties_unlocked() validates properties in isolation it
  2561	 * doesn't ensure that the combination necessarily makes sense.
  2562	 *
  2563	 * At this point it has been determined that userspace wants a stream of
  2564	 * OA metrics, but still we need to further validate the combined
  2565	 * properties are OK.
  2566	 *
  2567	 * If the configuration makes sense then we can allocate memory for
  2568	 * a circular OA buffer and apply the requested metric set configuration.
  2569	 *
  2570	 * Returns: zero on success or a negative error code.
  2571	 */
  2572	static int i915_oa_stream_init(struct i915_perf_stream *stream,
  2573				       struct drm_i915_perf_open_param *param,
  2574				       struct perf_open_properties *props)
  2575	{
  2576		struct drm_i915_private *dev_priv = stream->dev_priv;
  2577		int format_size;
  2578		int ret;
  2579	
  2580		/* If the sysfs metrics/ directory wasn't registered for some
  2581		 * reason then don't let userspace try their luck with config
  2582		 * IDs
  2583		 */
  2584		if (!dev_priv->perf.metrics_kobj) {
  2585			DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
  2586			return -EINVAL;
  2587		}
  2588	
  2589		if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
  2590			DRM_DEBUG("Only OA report sampling supported\n");
  2591			return -EINVAL;
  2592		}
  2593	
  2594		if (!dev_priv->perf.ops.enable_metric_set) {
  2595			DRM_DEBUG("OA unit not supported\n");
  2596			return -ENODEV;
  2597		}
  2598	
  2599		/* To avoid the complexity of having to accurately filter
  2600		 * counter reports and marshal to the appropriate client
  2601		 * we currently only allow exclusive access
  2602		 */
  2603		if (dev_priv->perf.exclusive_stream) {
  2604			DRM_DEBUG("OA unit already in use\n");
  2605			return -EBUSY;
  2606		}
  2607	
  2608		if (!props->oa_format) {
  2609			DRM_DEBUG("OA report format not specified\n");
  2610			return -EINVAL;
  2611		}
  2612	
  2613		mutex_init(&stream->config_mutex);
  2614	
  2615		stream->sample_size = sizeof(struct drm_i915_perf_record_header);
  2616	
  2617		format_size = dev_priv->perf.oa_formats[props->oa_format].size;
  2618	
  2619		stream->engine = props->engine;
  2620	
  2621		mutex_init(&stream->config_mutex);
  2622		INIT_ACTIVE_REQUEST(&stream->active_config_rq,
  2623				    &stream->config_mutex);
  2624	
  2625		stream->sample_flags |= SAMPLE_OA_REPORT;
  2626		stream->sample_size += format_size;
  2627	
  2628		stream->oa_buffer.format_size = format_size;
  2629		if (WARN_ON(stream->oa_buffer.format_size == 0))
  2630			return -EINVAL;
  2631	
  2632		stream->oa_buffer.format =
  2633			dev_priv->perf.oa_formats[props->oa_format].format;
  2634	
  2635		stream->periodic = props->oa_periodic;
  2636		if (stream->periodic)
  2637			stream->period_exponent = props->oa_period_exponent;
  2638	
  2639		if (stream->ctx) {
  2640			ret = oa_get_render_ctx_id(stream);
  2641			if (ret) {
  2642				DRM_DEBUG("Invalid context id to filter with\n");
  2643				return ret;
  2644			}
  2645		}
  2646	
  2647		ret = alloc_noa_wait(stream);
  2648		if (ret) {
  2649			DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
  2650			goto err_noa_wait_alloc;
  2651		}
  2652	
  2653		ret = i915_perf_get_oa_config_and_bo(stream, props->metrics_set,
  2654						     &stream->oa_config,
  2655						     &stream->initial_oa_config_bo);
  2656		if (ret) {
  2657			DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
  2658			goto err_config;
  2659		}
  2660	
  2661		/* PRM - observability performance counters:
  2662		 *
  2663		 *   OACONTROL, performance counter enable, note:
  2664		 *
  2665		 *   "When this bit is set, in order to have coherent counts,
  2666		 *   RC6 power state and trunk clock gating must be disabled.
  2667		 *   This can be achieved by programming MMIO registers as
  2668		 *   0xA094=0 and 0xA090[31]=1"
  2669		 *
  2670		 *   In our case we are expecting that taking pm + FORCEWAKE
  2671		 *   references will effectively disable RC6.
  2672		 */
  2673		stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
  2674		intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
  2675	
  2676		ret = alloc_oa_buffer(stream);
  2677		if (ret)
  2678			goto err_oa_buf_alloc;
  2679	
  2680		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
  2681		if (ret)
  2682			goto err_lock;
  2683	
  2684		stream->ops = &i915_oa_stream_ops;
  2685		dev_priv->perf.exclusive_stream = stream;
  2686	
  2687		mutex_lock(&stream->config_mutex);
  2688		ret = dev_priv->perf.ops.enable_metric_set(stream);
  2689		if (ret) {
  2690			DRM_DEBUG("Unable to enable metric set\n");
  2691			/*
  2692			 * Ignore the return value since we already have an error from
  2693			 * the enable vfunc.
  2694			 */
> 2695			i915_active_request_retire(&stream->active_config_rq,
  2696						   &stream->config_mutex);
  2697		} else {
  2698			ret = i915_active_request_retire(&stream->active_config_rq,
  2699							 &stream->config_mutex);
  2700		}
  2701	
  2702		mutex_unlock(&stream->config_mutex);
  2703		mutex_unlock(&dev_priv->drm.struct_mutex);
  2704	
  2705		i915_gem_object_put(stream->initial_oa_config_bo);
  2706		stream->initial_oa_config_bo = NULL;
  2707		if (ret)
  2708			goto err_enable;
  2709	
  2710		DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
  2711	
  2712		hrtimer_init(&stream->poll_check_timer,
  2713			     CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  2714		stream->poll_check_timer.function = oa_poll_check_timer_cb;
  2715		init_waitqueue_head(&stream->poll_wq);
  2716		spin_lock_init(&stream->oa_buffer.ptr_lock);
  2717	
  2718		return 0;
  2719	
  2720	err_enable:
  2721		mutex_lock(&dev_priv->drm.struct_mutex);
  2722		mutex_lock(&stream->config_mutex);
  2723		dev_priv->perf.ops.disable_metric_set(stream);
  2724		mutex_unlock(&stream->config_mutex);
  2725		dev_priv->perf.exclusive_stream = NULL;
  2726		mutex_unlock(&dev_priv->drm.struct_mutex);
  2727	
  2728	err_lock:
  2729		free_oa_buffer(stream);
  2730	
  2731	err_oa_buf_alloc:
  2732		i915_oa_config_put(stream->oa_config);
  2733	
  2734		intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
  2735		intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
  2736	
  2737		free_oa_configs(stream);
  2738	
  2739		i915_gem_object_put(stream->initial_oa_config_bo);
  2740	
  2741	err_config:
  2742		free_noa_wait(stream);
  2743	
  2744	err_noa_wait_alloc:
  2745		if (stream->ctx)
  2746			oa_put_render_ctx_id(stream);
  2747	
  2748		return ret;
  2749	}
  2750	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 34295 bytes --]

[-- Attachment #3: Type: text/plain, Size: 159 bytes --]

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream
  2019-08-30 15:48   ` Chris Wilson
  2019-08-30 21:08     ` Lionel Landwerlin
@ 2019-09-02 11:17     ` Lionel Landwerlin
  1 sibling, 0 replies; 23+ messages in thread
From: Lionel Landwerlin @ 2019-09-02 11:17 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 30/08/2019 18:48, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-30 15:47:23)
>>   err_unpin:
>> -       __i915_vma_unpin(vma);
>> +       mutex_lock(&i915->drm.struct_mutex);
>> +       i915_vma_unpin_and_release(&vma, 0);
>> +       mutex_unlock(&i915->drm.struct_mutex);
> Strangely unpin_and_release() doesn't need the mutex. But I can clean
> that up later.
>
>>   
>>   err_unref:
>>          i915_gem_object_put(bo);
>> @@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>          return ret;
>>   }
>>   
>> -static void config_oa_regs(struct drm_i915_private *dev_priv,
>> -                          const struct i915_oa_reg *regs,
>> -                          u32 n_regs)
>> +static int emit_oa_config(struct drm_i915_private *i915,
>> +                         struct i915_perf_stream *stream)
>>   {
>> -       u32 i;
>> +       struct i915_request *rq;
>> +       struct i915_vma *vma;
>> +       u32 *cs;
>> +       int err;
>>   
>> -       for (i = 0; i < n_regs; i++) {
>> -               const struct i915_oa_reg *reg = regs + i;
>> +       lockdep_assert_held(&stream->config_mutex);
>>   
>> -               I915_WRITE(reg->addr, reg->value);
>> +       rq = i915_request_create(stream->engine->kernel_context);
>> +       if (IS_ERR(rq))
>> +               return PTR_ERR(rq);
>> +
>> +       err = i915_active_request_set(&stream->active_config_rq,
>> +                                     rq);
>> +       if (err)
>> +               goto err_add_request;
>> +
>> +       vma = i915_vma_instance(stream->initial_oa_config_bo,
>> +                               &i915->ggtt.vm, NULL);
> Safer with stream->engine->gt->ggtt.vm
>
>> +       if (unlikely(IS_ERR(vma))) {
>> +               err = PTR_ERR(vma);
>> +               goto err_add_request;
>>          }
>> -}
>>   
>> -static void delay_after_mux(void)
>> -{
>> -       /*
>> -        * It apparently takes a fairly long time for a new MUX
>> -        * configuration to be be applied after these register writes.
>> -        * This delay duration was derived empirically based on the
>> -        * render_basic config but hopefully it covers the maximum
>> -        * configuration latency.
>> -        *
>> -        * As a fallback, the checks in _append_oa_reports() to skip
>> -        * invalid OA reports do also seem to work to discard reports
>> -        * generated before this config has completed - albeit not
>> -        * silently.
>> -        *
>> -        * Unfortunately this is essentially a magic number, since we
>> -        * don't currently know of a reliable mechanism for predicting
>> -        * how long the MUX config will take to apply and besides
>> -        * seeing invalid reports we don't know of a reliable way to
>> -        * explicitly check that the MUX config has landed.
>> -        *
>> -        * It's even possible we've miss characterized the underlying
>> -        * problem - it just seems like the simplest explanation why
>> -        * a delay at this location would mitigate any invalid reports.
>> -        */
>> -       usleep_range(15000, 20000);
>> +       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
>> +       if (err)
>> +               goto err_add_request;
> Don't pin inside a request. do the pining before i915_request_create().
> One day, we may need to allocate a request to do the pin.
>
> Be safe,
>
> i915_vma_lock(vma);
> err = i915_request_await_object(rq, vma->obj, 0);
> (yes, we need a better wrapper here)
> if (err)
>> +       err = i915_vma_move_to_active(vma, rq, 0);
> i915_vma_unlock(vma);
>> +       if (err)
>> +               goto err_vma_unpin;
>> +
>
>> @@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>          stream->ops = &i915_oa_stream_ops;
>>          dev_priv->perf.exclusive_stream = stream;
>>   
>> +       mutex_lock(&stream->config_mutex);
>>          ret = dev_priv->perf.ops.enable_metric_set(stream);
>>          if (ret) {
>>                  DRM_DEBUG("Unable to enable metric set\n");
>> -               goto err_enable;
>> +               /*
>> +                * Ignore the return value since we already have an error from
>> +                * the enable vfunc.
>> +                */
>> +               i915_active_request_retire(&stream->active_config_rq,
>> +                                          &stream->config_mutex);
>> +       } else {
>> +               ret = i915_active_request_retire(&stream->active_config_rq,
>> +                                                &stream->config_mutex);
> This function doesn't exist anymore. It's basically just waiting for the
> old request. Why do you need it? (Your request flow is otherwise ordered.)


I don't want to enable the OA reports to be written into the OA buffer 
until I know the configuration work has completed.

Otherwise it would write invalid/unconfigured data.


>
>> -       DRM_DEBUG("opening stream oa config uuid=%s\n", stream->oa_config->uuid);
>> -
>> +       mutex_unlock(&stream->config_mutex);
>>          mutex_unlock(&dev_priv->drm.struct_mutex);
>>   
>> +       i915_gem_object_put(stream->initial_oa_config_bo);
>> +       stream->initial_oa_config_bo = NULL;
>> +       if (ret)
>> +               goto err_enable;
> Is it because of this err that may end up freeing the stream? I would
> expect a similar wait-before-free on stream destroy.


Actually it's there, although a bit hidden :


static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
         struct drm_i915_private *dev_priv = stream->dev_priv;
         int err;

         BUG_ON(stream != dev_priv->perf.exclusive_stream);

         mutex_lock(&dev_priv->drm.struct_mutex);
         mutex_lock(&stream->config_mutex);
         dev_priv->perf.ops.disable_metric_set(stream);
=>   err = i915_active_request_retire(&stream->active_config_rq,
&stream->config_mutex);
         mutex_unlock(&stream->config_mutex);
         dev_priv->perf.exclusive_stream = NULL;
         mutex_unlock(&dev_priv->drm.struct_mutex);



>   In which case I
> would have the active request hold a reference on the stream. (And you
> might find it more convenient to use i915_active rather than the raw
> i915_active_request. i915_active is geared to tracking multiple
> timelines, so definitely overkill for you use case, just has more
> utility/mid-layer! built in)
> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v12 11/11] drm/i915: add support for perf configuration queries
  2019-08-30 14:47 ` [PATCH v12 11/11] drm/i915: add support for perf configuration queries Lionel Landwerlin
@ 2019-09-02 20:08   ` Dan Carpenter
  0 siblings, 0 replies; 23+ messages in thread
From: Dan Carpenter @ 2019-09-02 20:08 UTC (permalink / raw)
  To: kbuild, Lionel Landwerlin; +Cc: intel-gfx, kbuild-all

Hi Lionel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[cannot apply to v5.3-rc7 next-20190902]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190831-033234
base:   git://anongit.freedesktop.org/drm-intel for-linux-next

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

New smatch warnings:
drivers/gpu/drm/i915/i915_query.c:405 query_perf_config_list() warn: maybe return -EFAULT instead of the bytes remaining?

Old smatch warnings:
drivers/gpu/drm/i915/i915_query.c:138 query_engine_info() warn: check that 'query.num_engines' doesn't leak information

# https://github.com/0day-ci/linux/commit/1c566ee2d38f4e7ece15ee33fef205b1088e79bb
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout 1c566ee2d38f4e7ece15ee33fef205b1088e79bb
vim +405 drivers/gpu/drm/i915/i915_query.c

1c566ee2d38f4e Lionel Landwerlin 2019-08-30  336  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  337  static int query_perf_config_list(struct drm_i915_private *i915,
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  338  				  struct drm_i915_query_item *query_item)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  339  {
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  340  	struct drm_i915_query_perf_config __user *user_query_config_ptr =
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  341  		u64_to_user_ptr(query_item->data_ptr);
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  342  	struct i915_oa_config *oa_config;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  343  	u32 flags, total_size;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  344  	u64 i, n_configs, *oa_config_ids;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  345  	int ret, id;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  346  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  347  	if (!i915->perf.initialized)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  348  		return -ENODEV;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  349  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  350  	/* Count the default test configuration */
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  351  	n_configs = i915->perf.n_metrics + 1;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  352  	total_size = sizeof(struct drm_i915_query_perf_config) +
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  353  		sizeof(u64) * n_configs;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  354  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  355  	if (query_item->length == 0)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  356  		return total_size;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  357  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  358  	if (query_item->length < total_size) {
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  359  		DRM_DEBUG("Invalid query config list item size=%u expected=%u\n",
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  360  			  query_item->length, total_size);
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  361  		return -EINVAL;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  362  	}
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  363  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  364  	if (!access_ok(user_query_config_ptr, total_size))
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  365  		return -EFAULT;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  366  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  367  	if (__get_user(flags, &user_query_config_ptr->flags))
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  368  		return -EFAULT;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  369  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  370  	if (flags != 0)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  371  		return -EINVAL;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  372  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  373  	if (__put_user(n_configs, &user_query_config_ptr->config))
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  374  		return -EFAULT;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  375  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  376  	ret = mutex_lock_interruptible(&i915->perf.metrics_lock);
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  377  	if (ret)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  378  		return ret;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  379  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  380  	/* Count the configs. */
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  381  	n_configs = 1;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  382  	idr_for_each_entry(&i915->perf.metrics_idr, oa_config, id)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  383  		n_configs++;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  384  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  385  	oa_config_ids =
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  386  		kmalloc_array(n_configs, sizeof(*oa_config_ids), GFP_KERNEL);
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  387  	if (!oa_config_ids) {
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  388  		mutex_unlock(&i915->perf.metrics_lock);
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  389  		return -ENOMEM;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  390  	}
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  391  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  392  	i = 0;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  393  	oa_config_ids[i++] = 1ull;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  394  	idr_for_each_entry(&i915->perf.metrics_idr, oa_config, id)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  395  		oa_config_ids[i++] = id;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  396  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  397  	mutex_unlock(&i915->perf.metrics_lock);
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  398  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  399  	ret = copy_to_user(u64_to_user_ptr(query_item->data_ptr +
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  400  					   sizeof(struct drm_i915_query_perf_config)),
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  401  			   oa_config_ids,
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  402  			   n_configs * sizeof(*oa_config_ids));
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  403  	kfree(oa_config_ids);
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  404  	if (ret)
1c566ee2d38f4e Lionel Landwerlin 2019-08-30 @405  		return ret;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  406  
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  407  	return total_size;
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  408  }
1c566ee2d38f4e Lionel Landwerlin 2019-08-30  409  

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2019-09-02 20:09 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-30 14:47 [PATCH v12 00/11] drm/i915: Vulkan performance query support Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 01/11] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 02/11] drm/i915: add syncobj timeline support Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 03/11] drm/i915/perf: drop list of streams Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 04/11] drm/i915/perf: store the associated engine of a stream Lionel Landwerlin
2019-08-30 20:59   ` kbuild test robot
2019-08-30 14:47 ` [PATCH v12 05/11] drm/i915/perf: introduce a versioning of the i915-perf uapi Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 06/11] drm/i915/perf: allow for CS OA configs to be created lazily Lionel Landwerlin
2019-08-30 21:39   ` kbuild test robot
2019-08-30 14:47 ` [PATCH v12 07/11] drm/i915/perf: implement active wait for noa configurations Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream Lionel Landwerlin
2019-08-30 15:48   ` Chris Wilson
2019-08-30 21:08     ` Lionel Landwerlin
2019-09-02 11:17     ` Lionel Landwerlin
2019-08-30 21:28   ` kbuild test robot
2019-08-31  4:04   ` kbuild test robot
2019-08-30 14:47 ` [PATCH v12 09/11] drm/i915: add a new perf configuration execbuf parameter Lionel Landwerlin
2019-08-30 15:32   ` Chris Wilson
2019-08-30 21:03     ` Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 10/11] drm/i915/perf: allow holding preemption on filtered ctx Lionel Landwerlin
2019-08-30 14:47 ` [PATCH v12 11/11] drm/i915: add support for perf configuration queries Lionel Landwerlin
2019-09-02 20:08   ` Dan Carpenter
2019-08-30 16:23 ` ✗ Fi.CI.BUILD: failure for drm/i915: Vulkan performance query support (rev13) Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.