All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Flip guc_id allocation partition
@ 2022-01-13  3:38 ` Matthew Brost
  0 siblings, 0 replies; 11+ messages in thread
From: Matthew Brost @ 2022-01-13  3:38 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: tvrtko.ursulin, piotr.piorkowski, michal.wajdeczko

Move the multi-lrc guc_id from the lower allocation partition (0 to
number of multi-lrc guc_ids) to upper allocation partition (number of
single-lrc to max guc_ids).

This will help when a native driver transitions to a PF after driver
load time. If the perma-pin guc_ids (kernel contexts) are in a low range
it is easy reduce total number of guc_ids as the allocated slrc are in a
valid range the mlrc range moves to an unused range. Assuming no mlrc
are allocated and few slrc are used the native to PF transition is
seamless for the guc_id resource.

v2:
 (Michal / Tvrtko)
  - Add an explaination to commit message of why this patch is needed
 (Michal / Piotr)
  - Replace marcos with functions
 (Michal)
  - Rework logic flow in new_mlrc_guc_id
  - Unconditionally call bitmap_free

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 88 +++++++++++++------
 1 file changed, 63 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a40f10d376d..3ae92260f8224 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines,
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
-/*
- * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
- * per the GuC submission interface. A different allocation algorithm is used
- * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
- * partition the guc_id space. We believe the number of multi-lrc contexts in
- * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
- * multi-lrc.
- */
-#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
-	((guc)->submission_state.num_guc_ids / 16)
-
 /*
  * Below is a set of functions which control the GuC scheduling state which
  * require a lock.
@@ -1777,11 +1766,6 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	INIT_WORK(&guc->submission_state.destroyed_worker,
 		  destroyed_worker_func);
 
-	guc->submission_state.guc_ids_bitmap =
-		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
-	if (!guc->submission_state.guc_ids_bitmap)
-		return -ENOMEM;
-
 	spin_lock_init(&guc->timestamp.lock);
 	INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
 	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1864,6 +1848,57 @@ static void guc_submit_request(struct i915_request *rq)
 	spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
+/*
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient.
+ */
+#define MLRC_GUC_ID_RATIO	16
+
+static int number_mlrc_guc_id(struct intel_guc *guc)
+{
+	return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO;
+}
+
+static int number_slrc_guc_id(struct intel_guc *guc)
+{
+	return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc);
+}
+
+static int mlrc_guc_id_base(struct intel_guc *guc)
+{
+	return number_slrc_guc_id(guc);
+}
+
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	int ret;
+
+	GEM_BUG_ON(!intel_context_is_parent(ce));
+	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
+
+	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+				       number_mlrc_guc_id(guc),
+				       order_base_2(ce->parallel.number_children
+						    + 1));
+	if (unlikely(ret < 0))
+		return ret;
+
+	return ret + mlrc_guc_id_base(guc);
+}
+
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	GEM_BUG_ON(intel_context_is_parent(ce));
+
+	return ida_simple_get(&guc->submission_state.guc_ids,
+			      0, number_slrc_guc_id(guc),
+			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+			      __GFP_NOWARN);
+}
+
 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
 	int ret;
@@ -1871,16 +1906,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 	GEM_BUG_ON(intel_context_is_child(ce));
 
 	if (intel_context_is_parent(ce))
-		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
-					      NUMBER_MULTI_LRC_GUC_ID(guc),
-					      order_base_2(ce->parallel.number_children
-							   + 1));
+		ret = new_mlrc_guc_id(guc, ce);
 	else
-		ret = ida_simple_get(&guc->submission_state.guc_ids,
-				     NUMBER_MULTI_LRC_GUC_ID(guc),
-				     guc->submission_state.num_guc_ids,
-				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
-				     __GFP_NOWARN);
+		ret = new_slrc_guc_id(guc, ce);
+
 	if (unlikely(ret < 0))
 		return ret;
 
@@ -1990,6 +2019,15 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
 
 	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
 
+	/* Outside spin lock so we can sleep on alloc */
+	if (unlikely(intel_context_is_parent(ce) &&
+		     !guc->submission_state.guc_ids_bitmap)) {
+		guc->submission_state.guc_ids_bitmap =
+			bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL);
+		if (!guc->submission_state.guc_ids_bitmap)
+			return -ENOMEM;
+	}
+
 try_again:
 	spin_lock_irqsave(&guc->submission_state.lock, flags);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] [PATCH] drm/i915: Flip guc_id allocation partition
@ 2022-01-13  3:38 ` Matthew Brost
  0 siblings, 0 replies; 11+ messages in thread
From: Matthew Brost @ 2022-01-13  3:38 UTC (permalink / raw)
  To: intel-gfx, dri-devel

Move the multi-lrc guc_id from the lower allocation partition (0 to
number of multi-lrc guc_ids) to upper allocation partition (number of
single-lrc to max guc_ids).

This will help when a native driver transitions to a PF after driver
load time. If the perma-pin guc_ids (kernel contexts) are in a low range
it is easy reduce total number of guc_ids as the allocated slrc are in a
valid range the mlrc range moves to an unused range. Assuming no mlrc
are allocated and few slrc are used the native to PF transition is
seamless for the guc_id resource.

v2:
 (Michal / Tvrtko)
  - Add an explaination to commit message of why this patch is needed
 (Michal / Piotr)
  - Replace marcos with functions
 (Michal)
  - Rework logic flow in new_mlrc_guc_id
  - Unconditionally call bitmap_free

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 88 +++++++++++++------
 1 file changed, 63 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a40f10d376d..3ae92260f8224 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines,
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
-/*
- * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
- * per the GuC submission interface. A different allocation algorithm is used
- * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
- * partition the guc_id space. We believe the number of multi-lrc contexts in
- * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
- * multi-lrc.
- */
-#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
-	((guc)->submission_state.num_guc_ids / 16)
-
 /*
  * Below is a set of functions which control the GuC scheduling state which
  * require a lock.
@@ -1777,11 +1766,6 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	INIT_WORK(&guc->submission_state.destroyed_worker,
 		  destroyed_worker_func);
 
-	guc->submission_state.guc_ids_bitmap =
-		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
-	if (!guc->submission_state.guc_ids_bitmap)
-		return -ENOMEM;
-
 	spin_lock_init(&guc->timestamp.lock);
 	INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
 	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1864,6 +1848,57 @@ static void guc_submit_request(struct i915_request *rq)
 	spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
+/*
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient.
+ */
+#define MLRC_GUC_ID_RATIO	16
+
+static int number_mlrc_guc_id(struct intel_guc *guc)
+{
+	return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO;
+}
+
+static int number_slrc_guc_id(struct intel_guc *guc)
+{
+	return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc);
+}
+
+static int mlrc_guc_id_base(struct intel_guc *guc)
+{
+	return number_slrc_guc_id(guc);
+}
+
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	int ret;
+
+	GEM_BUG_ON(!intel_context_is_parent(ce));
+	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
+
+	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+				       number_mlrc_guc_id(guc),
+				       order_base_2(ce->parallel.number_children
+						    + 1));
+	if (unlikely(ret < 0))
+		return ret;
+
+	return ret + mlrc_guc_id_base(guc);
+}
+
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	GEM_BUG_ON(intel_context_is_parent(ce));
+
+	return ida_simple_get(&guc->submission_state.guc_ids,
+			      0, number_slrc_guc_id(guc),
+			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+			      __GFP_NOWARN);
+}
+
 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
 	int ret;
@@ -1871,16 +1906,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 	GEM_BUG_ON(intel_context_is_child(ce));
 
 	if (intel_context_is_parent(ce))
-		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
-					      NUMBER_MULTI_LRC_GUC_ID(guc),
-					      order_base_2(ce->parallel.number_children
-							   + 1));
+		ret = new_mlrc_guc_id(guc, ce);
 	else
-		ret = ida_simple_get(&guc->submission_state.guc_ids,
-				     NUMBER_MULTI_LRC_GUC_ID(guc),
-				     guc->submission_state.num_guc_ids,
-				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
-				     __GFP_NOWARN);
+		ret = new_slrc_guc_id(guc, ce);
+
 	if (unlikely(ret < 0))
 		return ret;
 
@@ -1990,6 +2019,15 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
 
 	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
 
+	/* Outside spin lock so we can sleep on alloc */
+	if (unlikely(intel_context_is_parent(ce) &&
+		     !guc->submission_state.guc_ids_bitmap)) {
+		guc->submission_state.guc_ids_bitmap =
+			bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL);
+		if (!guc->submission_state.guc_ids_bitmap)
+			return -ENOMEM;
+	}
+
 try_again:
 	spin_lock_irqsave(&guc->submission_state.lock, flags);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Flip guc_id allocation partition (rev2)
  2022-01-13  3:38 ` [Intel-gfx] " Matthew Brost
  (?)
@ 2022-01-13  4:38 ` Patchwork
  -1 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2022-01-13  4:38 UTC (permalink / raw)
  To: Matthew Brost; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 7004 bytes --]

== Series Details ==

Series: drm/i915: Flip guc_id allocation partition (rev2)
URL   : https://patchwork.freedesktop.org/series/98751/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_11075 -> Patchwork_21990
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_21990 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_21990, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/index.html

Participating hosts (44 -> 42)
------------------------------

  Missing    (2): fi-bsw-cyan fi-pnv-d510 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_21990:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@guc_multi_lrc:
    - bat-dg1-6:          [PASS][1] -> [DMESG-FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/bat-dg1-6/igt@i915_selftest@live@guc_multi_lrc.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/bat-dg1-6/igt@i915_selftest@live@guc_multi_lrc.html

  
Known issues
------------

  Here are the changes found in Patchwork_21990 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@amdgpu/amd_basic@semaphore:
    - fi-bdw-5557u:       NOTRUN -> [SKIP][3] ([fdo#109271]) +31 similar issues
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-bdw-5557u/igt@amdgpu/amd_basic@semaphore.html

  * igt@gem_flink_basic@bad-flink:
    - fi-skl-6600u:       [PASS][4] -> [FAIL][5] ([i915#4547])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/fi-skl-6600u/igt@gem_flink_basic@bad-flink.html
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-skl-6600u/igt@gem_flink_basic@bad-flink.html

  * igt@i915_selftest@live@hangcheck:
    - bat-dg1-5:          NOTRUN -> [DMESG-FAIL][6] ([i915#4494])
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/bat-dg1-5/igt@i915_selftest@live@hangcheck.html
    - fi-hsw-4770:        [PASS][7] -> [INCOMPLETE][8] ([i915#4785])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/fi-hsw-4770/igt@i915_selftest@live@hangcheck.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-hsw-4770/igt@i915_selftest@live@hangcheck.html

  * igt@kms_chamelium@dp-crc-fast:
    - fi-bdw-5557u:       NOTRUN -> [SKIP][9] ([fdo#109271] / [fdo#111827]) +8 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-bdw-5557u/igt@kms_chamelium@dp-crc-fast.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-cml-u2:          [PASS][10] -> [DMESG-WARN][11] ([i915#4269])
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/fi-cml-u2/igt@kms_frontbuffer_tracking@basic.html
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-cml-u2/igt@kms_frontbuffer_tracking@basic.html

  * igt@runner@aborted:
    - fi-skl-6600u:       NOTRUN -> [FAIL][12] ([i915#4312])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-skl-6600u/igt@runner@aborted.html
    - fi-hsw-4770:        NOTRUN -> [FAIL][13] ([fdo#109271] / [i915#1436] / [i915#4312])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-hsw-4770/igt@runner@aborted.html
    - bat-dg1-6:          NOTRUN -> [FAIL][14] ([i915#4214] / [i915#4312])
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/bat-dg1-6/igt@runner@aborted.html

  
#### Possible fixes ####

  * igt@core_auth@basic-auth:
    - {bat-jsl-2}:        [DMESG-WARN][15] ([i915#4562]) -> [PASS][16]
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/bat-jsl-2/igt@core_auth@basic-auth.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/bat-jsl-2/igt@core_auth@basic-auth.html

  * igt@gem_exec_suspend@basic-s3@smem:
    - fi-bdw-5557u:       [INCOMPLETE][17] ([i915#146]) -> [PASS][18]
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/fi-bdw-5557u/igt@gem_exec_suspend@basic-s3@smem.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-bdw-5557u/igt@gem_exec_suspend@basic-s3@smem.html

  * igt@i915_selftest@live@gt_engines:
    - bat-dg1-5:          [INCOMPLETE][19] ([i915#4418]) -> [PASS][20]
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/bat-dg1-5/igt@i915_selftest@live@gt_engines.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/bat-dg1-5/igt@i915_selftest@live@gt_engines.html

  * igt@i915_selftest@live@hangcheck:
    - {fi-ehl-2}:         [DMESG-FAIL][21] -> [PASS][22]
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/fi-ehl-2/igt@i915_selftest@live@hangcheck.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/fi-ehl-2/igt@i915_selftest@live@hangcheck.html

  * igt@i915_selftest@live@hugepages:
    - {bat-adlp-6}:       [DMESG-WARN][23] ([i915#1888]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_11075/bat-adlp-6/igt@i915_selftest@live@hugepages.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/bat-adlp-6/igt@i915_selftest@live@hugepages.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#111827]: https://bugs.freedesktop.org/show_bug.cgi?id=111827
  [i915#1436]: https://gitlab.freedesktop.org/drm/intel/issues/1436
  [i915#146]: https://gitlab.freedesktop.org/drm/intel/issues/146
  [i915#1888]: https://gitlab.freedesktop.org/drm/intel/issues/1888
  [i915#4214]: https://gitlab.freedesktop.org/drm/intel/issues/4214
  [i915#4269]: https://gitlab.freedesktop.org/drm/intel/issues/4269
  [i915#4312]: https://gitlab.freedesktop.org/drm/intel/issues/4312
  [i915#4418]: https://gitlab.freedesktop.org/drm/intel/issues/4418
  [i915#4494]: https://gitlab.freedesktop.org/drm/intel/issues/4494
  [i915#4547]: https://gitlab.freedesktop.org/drm/intel/issues/4547
  [i915#4562]: https://gitlab.freedesktop.org/drm/intel/issues/4562
  [i915#4785]: https://gitlab.freedesktop.org/drm/intel/issues/4785


Build changes
-------------

  * Linux: CI_DRM_11075 -> Patchwork_21990

  CI-20190529: 20190529
  CI_DRM_11075: 9b371f79eeb9a3867d2483e4038f22f89a854886 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6326: ec75f64fcbcf4aac58fbf1bf629e8f59b19db4ce @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_21990: 562c51a5dc5cad4239bd80a1f64a01e1092c1830 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

562c51a5dc5c drm/i915: Flip guc_id allocation partition

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21990/index.html

[-- Attachment #2: Type: text/html, Size: 8237 bytes --]

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Flip guc_id allocation partition
  2022-02-02 22:15 ` Michal Wajdeczko
@ 2022-02-03 17:37   ` Matthew Brost
  0 siblings, 0 replies; 11+ messages in thread
From: Matthew Brost @ 2022-02-03 17:37 UTC (permalink / raw)
  To: Michal Wajdeczko; +Cc: intel-gfx, dri-devel

On Wed, Feb 02, 2022 at 11:15:00PM +0100, Michal Wajdeczko wrote:
> 
> 
> On 13.01.2022 17:27, Matthew Brost wrote:
> > Move the multi-lrc guc_id from the lower allocation partition (0 to
> > number of multi-lrc guc_ids) to upper allocation partition (number of
> > single-lrc to max guc_ids).
> > 
> > This will help when a native driver transitions to a PF after driver
> > load time. If the perma-pin guc_ids (kernel contexts) are in a low range
> > it is easy reduce total number of guc_ids as the allocated slrc are in a
> > valid range the mlrc range moves to an unused range. Assuming no mlrc
> > are allocated and few slrc are used the native to PF transition is
> > seamless for the guc_id resource.
> > 
> > v2:
> >  (Michal / Tvrtko)
> >   - Add an explaination to commit message of why this patch is needed
> >  (Michal / Piotr)
> >   - Replace marcos with functions
> >  (Michal)
> >   - Rework logic flow in new_mlrc_guc_id
> >   - Unconditionally call bitmap_free
> > v3:
> >  (Michal)
> >   - Move allocation of mlrc bitmap back submission init
> >  (CI)
> >   - Resend for CI
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 77 ++++++++++++++-----
> >  1 file changed, 56 insertions(+), 21 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 23a40f10d376d..fce58365b3ff8 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines,
> >  
> >  #define GUC_REQUEST_SIZE 64 /* bytes */
> >  
> > -/*
> > - * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
> > - * per the GuC submission interface. A different allocation algorithm is used
> > - * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> > - * partition the guc_id space. We believe the number of multi-lrc contexts in
> > - * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
> > - * multi-lrc.
> > - */
> > -#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
> > -	((guc)->submission_state.num_guc_ids / 16)
> > -
> >  /*
> >   * Below is a set of functions which control the GuC scheduling state which
> >   * require a lock.
> > @@ -1746,6 +1735,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
> >  }
> >  
> >  static void destroyed_worker_func(struct work_struct *w);
> > +static int number_mlrc_guc_id(struct intel_guc *guc);
> >  
> >  /*
> >   * Set up the memory resources to be shared with the GuC (via the GGTT)
> > @@ -1778,7 +1768,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
> >  		  destroyed_worker_func);
> >  
> >  	guc->submission_state.guc_ids_bitmap =
> > -		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> > +		bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL);
> 
> to fully benefit from the id partition flip we likely will have to
> allocate bitmap 'just-in-time' when first mlrc id is needed
> 

No. At worst over allocate memory and don't use it. At the current
ratio, number_mlrc_guc_id is 4k translates into a 1 page allocation.

> so something like you had in early rev but abandon to avoid alloc inside
> spinlock - but I'm wondering why we can't alloc bitmap for mlrc case,
> while we allow allocation for slrc (as ida_simple_get may alloc, no?
>

That is a good question, more on that below.
 
> >  	if (!guc->submission_state.guc_ids_bitmap)
> >  		return -ENOMEM;
> >  
> > @@ -1864,6 +1854,57 @@ static void guc_submit_request(struct i915_request *rq)
> >  	spin_unlock_irqrestore(&sched_engine->lock, flags);
> >  }
> >  
> > +/*
> > + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
> > + * per the GuC submission interface. A different allocation algorithm is used
> > + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> > + * partition the guc_id space. We believe the number of multi-lrc contexts in
> > + * use should be low and 1/16 should be sufficient.
> 
> do we have any other numbers as guideline ?
> 
> while it is easy assumption that 1/16 from 64K contexts may be
> sufficient, what about 1/16 of 1K contexts ? will that work too ?
> 

Nope, just random split I choose. We might need to change this ratio on
a VF or enforce a minimum of mlrc (e.g. 128). Easy enough to tune as
needed.

> also, do we have to make hard split ? what if there will be no users for
> mlrc but more slrc contexts would be beneficial ? or the opposite ?
>

Hard split manages complexity. Could we cook some dynamic sharing
algorith, sure. Would be it be overly complex and unnecessary, almost
certainly. The only thing I can see changing is the ratio on a VF if
needed.

> > + */
> > +#define MLRC_GUC_ID_RATIO	16
> > +
> > +static int number_mlrc_guc_id(struct intel_guc *guc)
> > +{
> > +	return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO;
> > +}
> > +
> > +static int number_slrc_guc_id(struct intel_guc *guc)
> > +{
> > +	return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc);
> > +}
> > +
> > +static int mlrc_guc_id_base(struct intel_guc *guc)
> > +{
> > +	return number_slrc_guc_id(guc);
> > +}
> > +
> > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> > +{
> > +	int ret;
> > +
> > +	GEM_BUG_ON(!intel_context_is_parent(ce));
> > +	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
> > +
> > +	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> > +				       number_mlrc_guc_id(guc),
> > +				       order_base_2(ce->parallel.number_children
> > +						    + 1));
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	return ret + mlrc_guc_id_base(guc);
> > +}
> > +
> > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> > +{
> > +	GEM_BUG_ON(intel_context_is_parent(ce));
> > +
> > +	return ida_simple_get(&guc->submission_state.guc_ids,
> > +			      0, number_slrc_guc_id(guc),
> > +			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> > +			      __GFP_NOWARN);
> 
> are you sure that these gfp flags are safe for use under spinlock ?
> 

GFP_KERNEL shouldn't be used under a spin lock but this has never blown
up in CI and we used to have tests that allocated every single guc id
which seems odd. Regardless this probably should be changed to
GFP_ATOMIC.

Matt

> -Michal
> 
> > +}
> > +
> >  static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >  {
> >  	int ret;
> > @@ -1871,16 +1912,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >  	GEM_BUG_ON(intel_context_is_child(ce));
> >  
> >  	if (intel_context_is_parent(ce))
> > -		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> > -					      NUMBER_MULTI_LRC_GUC_ID(guc),
> > -					      order_base_2(ce->parallel.number_children
> > -							   + 1));
> > +		ret = new_mlrc_guc_id(guc, ce);
> >  	else
> > -		ret = ida_simple_get(&guc->submission_state.guc_ids,
> > -				     NUMBER_MULTI_LRC_GUC_ID(guc),
> > -				     guc->submission_state.num_guc_ids,
> > -				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> > -				     __GFP_NOWARN);
> > +		ret = new_slrc_guc_id(guc, ce);
> > +
> >  	if (unlikely(ret < 0))
> >  		return ret;
> >  

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Flip guc_id allocation partition
  2022-01-13 16:27 [PATCH] drm/i915: Flip guc_id allocation partition Matthew Brost
@ 2022-02-02 22:15 ` Michal Wajdeczko
  2022-02-03 17:37   ` Matthew Brost
  0 siblings, 1 reply; 11+ messages in thread
From: Michal Wajdeczko @ 2022-02-02 22:15 UTC (permalink / raw)
  To: Matthew Brost, intel-gfx, dri-devel



On 13.01.2022 17:27, Matthew Brost wrote:
> Move the multi-lrc guc_id from the lower allocation partition (0 to
> number of multi-lrc guc_ids) to upper allocation partition (number of
> single-lrc to max guc_ids).
> 
> This will help when a native driver transitions to a PF after driver
> load time. If the perma-pin guc_ids (kernel contexts) are in a low range
> it is easy reduce total number of guc_ids as the allocated slrc are in a
> valid range the mlrc range moves to an unused range. Assuming no mlrc
> are allocated and few slrc are used the native to PF transition is
> seamless for the guc_id resource.
> 
> v2:
>  (Michal / Tvrtko)
>   - Add an explaination to commit message of why this patch is needed
>  (Michal / Piotr)
>   - Replace marcos with functions
>  (Michal)
>   - Rework logic flow in new_mlrc_guc_id
>   - Unconditionally call bitmap_free
> v3:
>  (Michal)
>   - Move allocation of mlrc bitmap back submission init
>  (CI)
>   - Resend for CI
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 77 ++++++++++++++-----
>  1 file changed, 56 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 23a40f10d376d..fce58365b3ff8 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines,
>  
>  #define GUC_REQUEST_SIZE 64 /* bytes */
>  
> -/*
> - * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
> - * per the GuC submission interface. A different allocation algorithm is used
> - * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> - * partition the guc_id space. We believe the number of multi-lrc contexts in
> - * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
> - * multi-lrc.
> - */
> -#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
> -	((guc)->submission_state.num_guc_ids / 16)
> -
>  /*
>   * Below is a set of functions which control the GuC scheduling state which
>   * require a lock.
> @@ -1746,6 +1735,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
>  }
>  
>  static void destroyed_worker_func(struct work_struct *w);
> +static int number_mlrc_guc_id(struct intel_guc *guc);
>  
>  /*
>   * Set up the memory resources to be shared with the GuC (via the GGTT)
> @@ -1778,7 +1768,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
>  		  destroyed_worker_func);
>  
>  	guc->submission_state.guc_ids_bitmap =
> -		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> +		bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL);

to fully benefit from the id partition flip we likely will have to
allocate bitmap 'just-in-time' when first mlrc id is needed

so something like you had in early rev but abandon to avoid alloc inside
spinlock - but I'm wondering why we can't alloc bitmap for mlrc case,
while we allow allocation for slrc (as ida_simple_get may alloc, no?

>  	if (!guc->submission_state.guc_ids_bitmap)
>  		return -ENOMEM;
>  
> @@ -1864,6 +1854,57 @@ static void guc_submit_request(struct i915_request *rq)
>  	spin_unlock_irqrestore(&sched_engine->lock, flags);
>  }
>  
> +/*
> + * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
> + * per the GuC submission interface. A different allocation algorithm is used
> + * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
> + * partition the guc_id space. We believe the number of multi-lrc contexts in
> + * use should be low and 1/16 should be sufficient.

do we have any other numbers as guideline ?

while it is easy assumption that 1/16 from 64K contexts may be
sufficient, what about 1/16 of 1K contexts ? will that work too ?

also, do we have to make hard split ? what if there will be no users for
mlrc but more slrc contexts would be beneficial ? or the opposite ?

> + */
> +#define MLRC_GUC_ID_RATIO	16
> +
> +static int number_mlrc_guc_id(struct intel_guc *guc)
> +{
> +	return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO;
> +}
> +
> +static int number_slrc_guc_id(struct intel_guc *guc)
> +{
> +	return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc);
> +}
> +
> +static int mlrc_guc_id_base(struct intel_guc *guc)
> +{
> +	return number_slrc_guc_id(guc);
> +}
> +
> +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	int ret;
> +
> +	GEM_BUG_ON(!intel_context_is_parent(ce));
> +	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
> +
> +	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> +				       number_mlrc_guc_id(guc),
> +				       order_base_2(ce->parallel.number_children
> +						    + 1));
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	return ret + mlrc_guc_id_base(guc);
> +}
> +
> +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	GEM_BUG_ON(intel_context_is_parent(ce));
> +
> +	return ida_simple_get(&guc->submission_state.guc_ids,
> +			      0, number_slrc_guc_id(guc),
> +			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> +			      __GFP_NOWARN);

are you sure that these gfp flags are safe for use under spinlock ?

-Michal

> +}
> +
>  static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>  {
>  	int ret;
> @@ -1871,16 +1912,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>  	GEM_BUG_ON(intel_context_is_child(ce));
>  
>  	if (intel_context_is_parent(ce))
> -		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> -					      NUMBER_MULTI_LRC_GUC_ID(guc),
> -					      order_base_2(ce->parallel.number_children
> -							   + 1));
> +		ret = new_mlrc_guc_id(guc, ce);
>  	else
> -		ret = ida_simple_get(&guc->submission_state.guc_ids,
> -				     NUMBER_MULTI_LRC_GUC_ID(guc),
> -				     guc->submission_state.num_guc_ids,
> -				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> -				     __GFP_NOWARN);
> +		ret = new_slrc_guc_id(guc, ce);
> +
>  	if (unlikely(ret < 0))
>  		return ret;
>  

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] drm/i915: Flip guc_id allocation partition
@ 2022-01-13 16:27 Matthew Brost
  2022-02-02 22:15 ` Michal Wajdeczko
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Brost @ 2022-01-13 16:27 UTC (permalink / raw)
  To: intel-gfx, dri-devel; +Cc: michal.wajdeczko

Move the multi-lrc guc_id from the lower allocation partition (0 to
number of multi-lrc guc_ids) to upper allocation partition (number of
single-lrc to max guc_ids).

This will help when a native driver transitions to a PF after driver
load time. If the perma-pin guc_ids (kernel contexts) are in a low range
it is easy reduce total number of guc_ids as the allocated slrc are in a
valid range the mlrc range moves to an unused range. Assuming no mlrc
are allocated and few slrc are used the native to PF transition is
seamless for the guc_id resource.

v2:
 (Michal / Tvrtko)
  - Add an explaination to commit message of why this patch is needed
 (Michal / Piotr)
  - Replace marcos with functions
 (Michal)
  - Rework logic flow in new_mlrc_guc_id
  - Unconditionally call bitmap_free
v3:
 (Michal)
  - Move allocation of mlrc bitmap back submission init
 (CI)
  - Resend for CI

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 77 ++++++++++++++-----
 1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 23a40f10d376d..fce58365b3ff8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -138,17 +138,6 @@ guc_create_parallel(struct intel_engine_cs **engines,
 
 #define GUC_REQUEST_SIZE 64 /* bytes */
 
-/*
- * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
- * per the GuC submission interface. A different allocation algorithm is used
- * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
- * partition the guc_id space. We believe the number of multi-lrc contexts in
- * use should be low and 1/16 should be sufficient. Minimum of 32 guc_ids for
- * multi-lrc.
- */
-#define NUMBER_MULTI_LRC_GUC_ID(guc)	\
-	((guc)->submission_state.num_guc_ids / 16)
-
 /*
  * Below is a set of functions which control the GuC scheduling state which
  * require a lock.
@@ -1746,6 +1735,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
 }
 
 static void destroyed_worker_func(struct work_struct *w);
+static int number_mlrc_guc_id(struct intel_guc *guc);
 
 /*
  * Set up the memory resources to be shared with the GuC (via the GGTT)
@@ -1778,7 +1768,7 @@ int intel_guc_submission_init(struct intel_guc *guc)
 		  destroyed_worker_func);
 
 	guc->submission_state.guc_ids_bitmap =
-		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
+		bitmap_zalloc(number_mlrc_guc_id(guc), GFP_KERNEL);
 	if (!guc->submission_state.guc_ids_bitmap)
 		return -ENOMEM;
 
@@ -1864,6 +1854,57 @@ static void guc_submit_request(struct i915_request *rq)
 	spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
+/*
+ * We reserve 1/16 of the guc_ids for multi-lrc as these need to be contiguous
+ * per the GuC submission interface. A different allocation algorithm is used
+ * (bitmap vs. ida) between multi-lrc and single-lrc hence the reason to
+ * partition the guc_id space. We believe the number of multi-lrc contexts in
+ * use should be low and 1/16 should be sufficient.
+ */
+#define MLRC_GUC_ID_RATIO	16
+
+static int number_mlrc_guc_id(struct intel_guc *guc)
+{
+	return guc->submission_state.num_guc_ids / MLRC_GUC_ID_RATIO;
+}
+
+static int number_slrc_guc_id(struct intel_guc *guc)
+{
+	return guc->submission_state.num_guc_ids - number_mlrc_guc_id(guc);
+}
+
+static int mlrc_guc_id_base(struct intel_guc *guc)
+{
+	return number_slrc_guc_id(guc);
+}
+
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	int ret;
+
+	GEM_BUG_ON(!intel_context_is_parent(ce));
+	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
+
+	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+				       number_mlrc_guc_id(guc),
+				       order_base_2(ce->parallel.number_children
+						    + 1));
+	if (unlikely(ret < 0))
+		return ret;
+
+	return ret + mlrc_guc_id_base(guc);
+}
+
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	GEM_BUG_ON(intel_context_is_parent(ce));
+
+	return ida_simple_get(&guc->submission_state.guc_ids,
+			      0, number_slrc_guc_id(guc),
+			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+			      __GFP_NOWARN);
+}
+
 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
 	int ret;
@@ -1871,16 +1912,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 	GEM_BUG_ON(intel_context_is_child(ce));
 
 	if (intel_context_is_parent(ce))
-		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
-					      NUMBER_MULTI_LRC_GUC_ID(guc),
-					      order_base_2(ce->parallel.number_children
-							   + 1));
+		ret = new_mlrc_guc_id(guc, ce);
 	else
-		ret = ida_simple_get(&guc->submission_state.guc_ids,
-				     NUMBER_MULTI_LRC_GUC_ID(guc),
-				     guc->submission_state.num_guc_ids,
-				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
-				     __GFP_NOWARN);
+		ret = new_slrc_guc_id(guc, ce);
+
 	if (unlikely(ret < 0))
 		return ret;
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Flip guc_id allocation partition
  2022-01-13 14:18     ` Michal Wajdeczko
@ 2022-01-13 16:00       ` Matthew Brost
  0 siblings, 0 replies; 11+ messages in thread
From: Matthew Brost @ 2022-01-13 16:00 UTC (permalink / raw)
  To: Michal Wajdeczko
  Cc: intel-gfx, daniele.ceraolospurio, john.c.harrison, dri-devel

On Thu, Jan 13, 2022 at 03:18:14PM +0100, Michal Wajdeczko wrote:
> 
> 
> On 13.01.2022 00:26, Matthew Brost wrote:
> > On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote:
> >> On 11.01.2022 17:30, Matthew Brost wrote:
> 
> ...
> 
> >>> @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq)
> >>>  	spin_unlock_irqrestore(&sched_engine->lock, flags);
> >>>  }
> >>>  
> >>> +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >>> +{
> >>> +	int ret;
> >>> +
> >>> +	GEM_BUG_ON(!intel_context_is_parent(ce));
> >>> +	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
> >>> +
> >>> +	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> >>> +				       NUMBER_MULTI_LRC_GUC_ID(guc),
> >>> +				       order_base_2(ce->parallel.number_children
> >>> +						    + 1));
> >>
> >> btw, is there any requirement (GuC ABI ?) that allocated ids need
> >> to be allocated with power of 2 alignment ? I don't think that we
> >> must optimize that hard and in some cases waste extra ids (as we might
> >> be limited on some configs)
> >>
> > 
> > No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and
> > didn't optmize this.
> >
> 
> there is a slower variant of "find" function:
> 
> bitmap_find_next_zero_area - find a contiguous aligned zero area
> 
> that does not have this limitation
> 

Ah, wasn't aware of this. If this becomes an issue (running of multi-lrc
ids) for customers I suppose this is the first thing we can do to try to
address this. For now, I think we leave it as is.

> ..
> 
> 
> >>> @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >>>  
> >>>  	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
> >>>  
> >>> +	if (unlikely(intel_context_is_parent(ce) &&
> >>> +		     !guc->submission_state.guc_ids_bitmap)) {
> >>> +		guc->submission_state.guc_ids_bitmap =
> >>> +			bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> >>> +		if (!guc->submission_state.guc_ids_bitmap)
> >>> +			return -ENOMEM;
> >>> +	}
> >>
> >> maybe move this chunk to new_mlrc_guc_id() ?
> >> or we can't due to the spin_lock below ?
> >> but then how do you protect guc_ids_bitmap pointer itself ?
> >>
> > 
> > Can't use GFP_KERNEL inside a spin lock...
> > 
> 
> ok, but what if there will be two or more parallel calls to pin_guc_id()
> with all being first parent context? each will see NULL guc_ids_bitmap..
> or there is another layer of synchronization?
> 

Good catch. Yes, it techincally possible two multi-lrc contexts to try
to allocate at the same time. I guess I should just do this at driver
load time + allocate the maximum number of multi-lrc ids and possibly
waste a bit of memory on a PF or VF.

Matt

> -Michal

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Flip guc_id allocation partition
  2022-01-12 23:26   ` Matthew Brost
@ 2022-01-13 14:18     ` Michal Wajdeczko
  2022-01-13 16:00       ` Matthew Brost
  0 siblings, 1 reply; 11+ messages in thread
From: Michal Wajdeczko @ 2022-01-13 14:18 UTC (permalink / raw)
  To: Matthew Brost
  Cc: intel-gfx, daniele.ceraolospurio, john.c.harrison, dri-devel



On 13.01.2022 00:26, Matthew Brost wrote:
> On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote:
>> On 11.01.2022 17:30, Matthew Brost wrote:

...

>>> @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq)
>>>  	spin_unlock_irqrestore(&sched_engine->lock, flags);
>>>  }
>>>  
>>> +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
>>> +{
>>> +	int ret;
>>> +
>>> +	GEM_BUG_ON(!intel_context_is_parent(ce));
>>> +	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
>>> +
>>> +	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
>>> +				       NUMBER_MULTI_LRC_GUC_ID(guc),
>>> +				       order_base_2(ce->parallel.number_children
>>> +						    + 1));
>>
>> btw, is there any requirement (GuC ABI ?) that allocated ids need
>> to be allocated with power of 2 alignment ? I don't think that we
>> must optimize that hard and in some cases waste extra ids (as we might
>> be limited on some configs)
>>
> 
> No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and
> didn't optmize this.
>

there is a slower variant of "find" function:

bitmap_find_next_zero_area - find a contiguous aligned zero area

that does not have this limitation

..


>>> @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
>>>  
>>>  	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
>>>  
>>> +	if (unlikely(intel_context_is_parent(ce) &&
>>> +		     !guc->submission_state.guc_ids_bitmap)) {
>>> +		guc->submission_state.guc_ids_bitmap =
>>> +			bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
>>> +		if (!guc->submission_state.guc_ids_bitmap)
>>> +			return -ENOMEM;
>>> +	}
>>
>> maybe move this chunk to new_mlrc_guc_id() ?
>> or we can't due to the spin_lock below ?
>> but then how do you protect guc_ids_bitmap pointer itself ?
>>
> 
> Can't use GFP_KERNEL inside a spin lock...
> 

ok, but what if there will be two or more parallel calls to pin_guc_id()
with all being first parent context? each will see NULL guc_ids_bitmap..
or there is another layer of synchronization?

-Michal

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Flip guc_id allocation partition
  2022-01-12 23:21 ` Michal Wajdeczko
@ 2022-01-12 23:26   ` Matthew Brost
  2022-01-13 14:18     ` Michal Wajdeczko
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Brost @ 2022-01-12 23:26 UTC (permalink / raw)
  To: Michal Wajdeczko
  Cc: intel-gfx, daniele.ceraolospurio, john.c.harrison, dri-devel

On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote:
> 
> 
> On 11.01.2022 17:30, Matthew Brost wrote:
> > Move the multi-lrc guc_id from the lower allocation partition (0 to
> > number of multi-lrc guc_ids) to upper allocation partition (number of
> > single-lrc to max guc_ids).
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++-----
> >  1 file changed, 42 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 9989d121127df..1bacc9621cea8 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
> >   */
> >  #define NUMBER_MULTI_LRC_GUC_ID(guc)	\
> >  	((guc)->submission_state.num_guc_ids / 16)
> > +#define NUMBER_SINGLE_LRC_GUC_ID(guc)	\
> > +	((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc))
> 
> above two will likely look better if converted into inline functions, or
> even better if we explicitly store slrc/mlrc upper/lower id limits under
> guc submission state
> 

Definitely inline functions, or I guess variables work too but that
might be overkill. Let me play around with this and see how it looks.

> >  
> >  /*
> >   * Below is a set of functions which control the GuC scheduling state which
> > @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc)
> >  	INIT_WORK(&guc->submission_state.destroyed_worker,
> >  		  destroyed_worker_func);
> >  
> > -	guc->submission_state.guc_ids_bitmap =
> > -		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> > -	if (!guc->submission_state.guc_ids_bitmap)
> > -		return -ENOMEM;
> > -
> >  	spin_lock_init(&guc->timestamp.lock);
> >  	INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
> >  	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
> > @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc)
> >  	guc_flush_destroyed_contexts(guc);
> >  	guc_lrc_desc_pool_destroy(guc);
> >  	i915_sched_engine_put(guc->sched_engine);
> > -	bitmap_free(guc->submission_state.guc_ids_bitmap);
> > +	if (guc->submission_state.guc_ids_bitmap)
> > +		bitmap_free(guc->submission_state.guc_ids_bitmap);
> 
> it should be fine to pass NULL to bitmap_free, no?
>

Probably? I'll double check on this.
 
> >  }
> >  
> >  static inline void queue_request(struct i915_sched_engine *sched_engine,
> > @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq)
> >  	spin_unlock_irqrestore(&sched_engine->lock, flags);
> >  }
> >  
> > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> > +{
> > +	int ret;
> > +
> > +	GEM_BUG_ON(!intel_context_is_parent(ce));
> > +	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
> > +
> > +	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> > +				       NUMBER_MULTI_LRC_GUC_ID(guc),
> > +				       order_base_2(ce->parallel.number_children
> > +						    + 1));
> 
> btw, is there any requirement (GuC ABI ?) that allocated ids need
> to be allocated with power of 2 alignment ? I don't think that we
> must optimize that hard and in some cases waste extra ids (as we might
> be limited on some configs)
> 

No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and
didn't optmize this.

> > +	if (likely(!(ret < 0)))
> > +		ret += NUMBER_SINGLE_LRC_GUC_ID(guc);
> 
> nit: more readable would be
> 
> 	if (unlikely(ret < 0))
> 		return ret;
> 
> 	return ret + guc->submission_state.mlrc_base;
> 

Sure.

> > +
> > +	return ret;
> > +}
> > +
> > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> > +{
> > +	GEM_BUG_ON(intel_context_is_parent(ce));
> 
> do we really need ce here ?
>

Just for the GEM_BUG_ON... Can remove if it is a big deal.

> > +
> > +	return ida_simple_get(&guc->submission_state.guc_ids,
> > +			      0, NUMBER_SINGLE_LRC_GUC_ID(guc),
> 
> if we change the logic of NUMBER_SINGLE/MULTI_LRC_GUC_ID macros from
> static split into more dynamic, then we could likely implement lazy
> increase of available slrc/mlrc id limits on demand, within available
> range, without deciding upfront of the hardcoded split 15 : 1
> 
> but this can be done next time ;)
>

Yea I guess. Doubt we need anything beyond a static split tho.
 
> > +			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> > +			      __GFP_NOWARN);
> > +}
> > +
> >  static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >  {
> >  	int ret;
> > @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >  	GEM_BUG_ON(intel_context_is_child(ce));
> >  
> >  	if (intel_context_is_parent(ce))
> > -		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> > -					      NUMBER_MULTI_LRC_GUC_ID(guc),
> > -					      order_base_2(ce->parallel.number_children
> > -							   + 1));
> > +		ret = new_mlrc_guc_id(guc, ce);
> >  	else
> > -		ret = ida_simple_get(&guc->submission_state.guc_ids,
> > -				     NUMBER_MULTI_LRC_GUC_ID(guc),
> > -				     guc->submission_state.num_guc_ids,
> > -				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> > -				     __GFP_NOWARN);
> > +		ret = new_slrc_guc_id(guc, ce);
> > +
> 
> with above helpers introduced, shouldn't we move code from new_guc_id()
> to assign_guc_id() ?
> 

Why add inline to code to assign_guc_id?

> >  	if (unlikely(ret < 0))
> >  		return ret;
> >  
> > @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
> >  
> >  	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
> >  
> > +	if (unlikely(intel_context_is_parent(ce) &&
> > +		     !guc->submission_state.guc_ids_bitmap)) {
> > +		guc->submission_state.guc_ids_bitmap =
> > +			bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> > +		if (!guc->submission_state.guc_ids_bitmap)
> > +			return -ENOMEM;
> > +	}
> 
> maybe move this chunk to new_mlrc_guc_id() ?
> or we can't due to the spin_lock below ?
> but then how do you protect guc_ids_bitmap pointer itself ?
> 

Can't use GFP_KERNEL inside a spin lock...

Matt

> -Michal
> 
> > +
> >  try_again:
> >  	spin_lock_irqsave(&guc->submission_state.lock, flags);
> >  

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] drm/i915: Flip guc_id allocation partition
  2022-01-11 16:30 Matthew Brost
@ 2022-01-12 23:21 ` Michal Wajdeczko
  2022-01-12 23:26   ` Matthew Brost
  0 siblings, 1 reply; 11+ messages in thread
From: Michal Wajdeczko @ 2022-01-12 23:21 UTC (permalink / raw)
  To: Matthew Brost, intel-gfx, dri-devel
  Cc: daniele.ceraolospurio, john.c.harrison



On 11.01.2022 17:30, Matthew Brost wrote:
> Move the multi-lrc guc_id from the lower allocation partition (0 to
> number of multi-lrc guc_ids) to upper allocation partition (number of
> single-lrc to max guc_ids).
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++-----
>  1 file changed, 42 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 9989d121127df..1bacc9621cea8 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
>   */
>  #define NUMBER_MULTI_LRC_GUC_ID(guc)	\
>  	((guc)->submission_state.num_guc_ids / 16)
> +#define NUMBER_SINGLE_LRC_GUC_ID(guc)	\
> +	((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc))

above two will likely look better if converted into inline functions, or
even better if we explicitly store slrc/mlrc upper/lower id limits under
guc submission state

>  
>  /*
>   * Below is a set of functions which control the GuC scheduling state which
> @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc)
>  	INIT_WORK(&guc->submission_state.destroyed_worker,
>  		  destroyed_worker_func);
>  
> -	guc->submission_state.guc_ids_bitmap =
> -		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> -	if (!guc->submission_state.guc_ids_bitmap)
> -		return -ENOMEM;
> -
>  	spin_lock_init(&guc->timestamp.lock);
>  	INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
>  	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
> @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc)
>  	guc_flush_destroyed_contexts(guc);
>  	guc_lrc_desc_pool_destroy(guc);
>  	i915_sched_engine_put(guc->sched_engine);
> -	bitmap_free(guc->submission_state.guc_ids_bitmap);
> +	if (guc->submission_state.guc_ids_bitmap)
> +		bitmap_free(guc->submission_state.guc_ids_bitmap);

it should be fine to pass NULL to bitmap_free, no?

>  }
>  
>  static inline void queue_request(struct i915_sched_engine *sched_engine,
> @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq)
>  	spin_unlock_irqrestore(&sched_engine->lock, flags);
>  }
>  
> +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	int ret;
> +
> +	GEM_BUG_ON(!intel_context_is_parent(ce));
> +	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
> +
> +	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> +				       NUMBER_MULTI_LRC_GUC_ID(guc),
> +				       order_base_2(ce->parallel.number_children
> +						    + 1));

btw, is there any requirement (GuC ABI ?) that allocated ids need
to be allocated with power of 2 alignment ? I don't think that we
must optimize that hard and in some cases waste extra ids (as we might
be limited on some configs)

> +	if (likely(!(ret < 0)))
> +		ret += NUMBER_SINGLE_LRC_GUC_ID(guc);

nit: more readable would be

	if (unlikely(ret < 0))
		return ret;

	return ret + guc->submission_state.mlrc_base;

> +
> +	return ret;
> +}
> +
> +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
> +{
> +	GEM_BUG_ON(intel_context_is_parent(ce));

do we really need ce here ?

> +
> +	return ida_simple_get(&guc->submission_state.guc_ids,
> +			      0, NUMBER_SINGLE_LRC_GUC_ID(guc),

if we change the logic of NUMBER_SINGLE/MULTI_LRC_GUC_ID macros from
static split into more dynamic, then we could likely implement lazy
increase of available slrc/mlrc id limits on demand, within available
range, without deciding upfront of the hardcoded split 15 : 1

but this can be done next time ;)

> +			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> +			      __GFP_NOWARN);
> +}
> +
>  static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>  {
>  	int ret;
> @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
>  	GEM_BUG_ON(intel_context_is_child(ce));
>  
>  	if (intel_context_is_parent(ce))
> -		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
> -					      NUMBER_MULTI_LRC_GUC_ID(guc),
> -					      order_base_2(ce->parallel.number_children
> -							   + 1));
> +		ret = new_mlrc_guc_id(guc, ce);
>  	else
> -		ret = ida_simple_get(&guc->submission_state.guc_ids,
> -				     NUMBER_MULTI_LRC_GUC_ID(guc),
> -				     guc->submission_state.num_guc_ids,
> -				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
> -				     __GFP_NOWARN);
> +		ret = new_slrc_guc_id(guc, ce);
> +

with above helpers introduced, shouldn't we move code from new_guc_id()
to assign_guc_id() ?

>  	if (unlikely(ret < 0))
>  		return ret;
>  
> @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
>  
>  	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
>  
> +	if (unlikely(intel_context_is_parent(ce) &&
> +		     !guc->submission_state.guc_ids_bitmap)) {
> +		guc->submission_state.guc_ids_bitmap =
> +			bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
> +		if (!guc->submission_state.guc_ids_bitmap)
> +			return -ENOMEM;
> +	}

maybe move this chunk to new_mlrc_guc_id() ?
or we can't due to the spin_lock below ?
but then how do you protect guc_ids_bitmap pointer itself ?

-Michal

> +
>  try_again:
>  	spin_lock_irqsave(&guc->submission_state.lock, flags);
>  

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH] drm/i915: Flip guc_id allocation partition
@ 2022-01-11 16:30 Matthew Brost
  2022-01-12 23:21 ` Michal Wajdeczko
  0 siblings, 1 reply; 11+ messages in thread
From: Matthew Brost @ 2022-01-11 16:30 UTC (permalink / raw)
  To: intel-gfx, dri-devel
  Cc: daniele.ceraolospurio, john.c.harrison, michal.wajdeczko

Move the multi-lrc guc_id from the lower allocation partition (0 to
number of multi-lrc guc_ids) to upper allocation partition (number of
single-lrc to max guc_ids).

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++-----
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 9989d121127df..1bacc9621cea8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
  */
 #define NUMBER_MULTI_LRC_GUC_ID(guc)	\
 	((guc)->submission_state.num_guc_ids / 16)
+#define NUMBER_SINGLE_LRC_GUC_ID(guc)	\
+	((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc))
 
 /*
  * Below is a set of functions which control the GuC scheduling state which
@@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	INIT_WORK(&guc->submission_state.destroyed_worker,
 		  destroyed_worker_func);
 
-	guc->submission_state.guc_ids_bitmap =
-		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
-	if (!guc->submission_state.guc_ids_bitmap)
-		return -ENOMEM;
-
 	spin_lock_init(&guc->timestamp.lock);
 	INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
 	guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc)
 	guc_flush_destroyed_contexts(guc);
 	guc_lrc_desc_pool_destroy(guc);
 	i915_sched_engine_put(guc->sched_engine);
-	bitmap_free(guc->submission_state.guc_ids_bitmap);
+	if (guc->submission_state.guc_ids_bitmap)
+		bitmap_free(guc->submission_state.guc_ids_bitmap);
 }
 
 static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq)
 	spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	int ret;
+
+	GEM_BUG_ON(!intel_context_is_parent(ce));
+	GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
+
+	ret =  bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
+				       NUMBER_MULTI_LRC_GUC_ID(guc),
+				       order_base_2(ce->parallel.number_children
+						    + 1));
+	if (likely(!(ret < 0)))
+		ret += NUMBER_SINGLE_LRC_GUC_ID(guc);
+
+	return ret;
+}
+
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce)
+{
+	GEM_BUG_ON(intel_context_is_parent(ce));
+
+	return ida_simple_get(&guc->submission_state.guc_ids,
+			      0, NUMBER_SINGLE_LRC_GUC_ID(guc),
+			      GFP_KERNEL | __GFP_RETRY_MAYFAIL |
+			      __GFP_NOWARN);
+}
+
 static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 {
 	int ret;
@@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
 	GEM_BUG_ON(intel_context_is_child(ce));
 
 	if (intel_context_is_parent(ce))
-		ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
-					      NUMBER_MULTI_LRC_GUC_ID(guc),
-					      order_base_2(ce->parallel.number_children
-							   + 1));
+		ret = new_mlrc_guc_id(guc, ce);
 	else
-		ret = ida_simple_get(&guc->submission_state.guc_ids,
-				     NUMBER_MULTI_LRC_GUC_ID(guc),
-				     guc->submission_state.num_guc_ids,
-				     GFP_KERNEL | __GFP_RETRY_MAYFAIL |
-				     __GFP_NOWARN);
+		ret = new_slrc_guc_id(guc, ce);
+
 	if (unlikely(ret < 0))
 		return ret;
 
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
 
 	GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
 
+	if (unlikely(intel_context_is_parent(ce) &&
+		     !guc->submission_state.guc_ids_bitmap)) {
+		guc->submission_state.guc_ids_bitmap =
+			bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
+		if (!guc->submission_state.guc_ids_bitmap)
+			return -ENOMEM;
+	}
+
 try_again:
 	spin_lock_irqsave(&guc->submission_state.lock, flags);
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-02-03 17:44 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-13  3:38 [PATCH] drm/i915: Flip guc_id allocation partition Matthew Brost
2022-01-13  3:38 ` [Intel-gfx] " Matthew Brost
2022-01-13  4:38 ` [Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Flip guc_id allocation partition (rev2) Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2022-01-13 16:27 [PATCH] drm/i915: Flip guc_id allocation partition Matthew Brost
2022-02-02 22:15 ` Michal Wajdeczko
2022-02-03 17:37   ` Matthew Brost
2022-01-11 16:30 Matthew Brost
2022-01-12 23:21 ` Michal Wajdeczko
2022-01-12 23:26   ` Matthew Brost
2022-01-13 14:18     ` Michal Wajdeczko
2022-01-13 16:00       ` Matthew Brost

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.