* [RFC] drm/i915: store all mmio bases in intel_engines
@ 2018-03-07 19:45 Daniele Ceraolo Spurio
2018-03-07 19:56 ` Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-07 19:45 UTC (permalink / raw)
To: intel-gfx
The mmio bases we're currently storing in the intel_engines array are
only valid for a subset of gens, so we need to ignore them and use
different values in some cases. Instead of doing that, we can have a
table of [starting gen, mmio base] pairs for each engine in
intel_engines and select the correct one based on the gen we're running
on in a consistent way.
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
drivers/gpu/drm/i915/intel_engine_cs.c | 77 +++++++++++++++++++++------------
drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
2 files changed, 49 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 4ba139c27fba..1dd92cac8d67 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -81,12 +81,16 @@ static const struct engine_class_info intel_engine_classes[] = {
},
};
+#define MAX_MMIO_BASES 3
struct engine_info {
unsigned int hw_id;
unsigned int uabi_id;
u8 class;
u8 instance;
- u32 mmio_base;
+ struct engine_mmio_base {
+ u32 gen : 8;
+ u32 base : 24;
+ } mmio_bases[MAX_MMIO_BASES];
unsigned irq_shift;
};
@@ -96,7 +100,9 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_RENDER,
.class = RENDER_CLASS,
.instance = 0,
- .mmio_base = RENDER_RING_BASE,
+ .mmio_bases = {
+ { .gen = 1, .base = RENDER_RING_BASE }
+ },
.irq_shift = GEN8_RCS_IRQ_SHIFT,
},
[BCS] = {
@@ -104,7 +110,9 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_BLT,
.class = COPY_ENGINE_CLASS,
.instance = 0,
- .mmio_base = BLT_RING_BASE,
+ .mmio_bases = {
+ { .gen = 6, .base = BLT_RING_BASE }
+ },
.irq_shift = GEN8_BCS_IRQ_SHIFT,
},
[VCS] = {
@@ -112,7 +120,11 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 0,
- .mmio_base = GEN6_BSD_RING_BASE,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD_RING_BASE },
+ { .gen = 6, .base = GEN6_BSD_RING_BASE },
+ { .gen = 4, .base = BSD_RING_BASE }
+ },
.irq_shift = GEN8_VCS1_IRQ_SHIFT,
},
[VCS2] = {
@@ -120,7 +132,10 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 1,
- .mmio_base = GEN8_BSD2_RING_BASE,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD2_RING_BASE },
+ { .gen = 8, .base = GEN8_BSD2_RING_BASE }
+ },
.irq_shift = GEN8_VCS2_IRQ_SHIFT,
},
[VCS3] = {
@@ -128,7 +143,9 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 2,
- .mmio_base = GEN11_BSD3_RING_BASE,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD3_RING_BASE }
+ },
.irq_shift = 0, /* not used */
},
[VCS4] = {
@@ -136,7 +153,9 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_BSD,
.class = VIDEO_DECODE_CLASS,
.instance = 3,
- .mmio_base = GEN11_BSD4_RING_BASE,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_BSD4_RING_BASE }
+ },
.irq_shift = 0, /* not used */
},
[VECS] = {
@@ -144,7 +163,10 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_VEBOX,
.class = VIDEO_ENHANCEMENT_CLASS,
.instance = 0,
- .mmio_base = VEBOX_RING_BASE,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
+ { .gen = 7, .base = VEBOX_RING_BASE }
+ },
.irq_shift = GEN8_VECS_IRQ_SHIFT,
},
[VECS2] = {
@@ -152,7 +174,9 @@ static const struct engine_info intel_engines[] = {
.uabi_id = I915_EXEC_VEBOX,
.class = VIDEO_ENHANCEMENT_CLASS,
.instance = 1,
- .mmio_base = GEN11_VEBOX2_RING_BASE,
+ .mmio_bases = {
+ { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
+ },
.irq_shift = 0, /* not used */
},
};
@@ -223,6 +247,21 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
}
}
+static u32 __engine_mmio_base(struct drm_i915_private *i915,
+ const struct engine_mmio_base* bases)
+{
+ int i;
+
+ for (i = 0; i < MAX_MMIO_BASES; i++)
+ if (INTEL_GEN(i915) >= bases[i].gen)
+ break;
+
+ GEM_BUG_ON(i == MAX_MMIO_BASES);
+ GEM_BUG_ON(!bases[i].base);
+
+ return bases[i].base;
+}
+
static int
intel_engine_setup(struct drm_i915_private *dev_priv,
enum intel_engine_id id)
@@ -257,25 +296,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
class_info->name, info->instance) >=
sizeof(engine->name));
engine->hw_id = engine->guc_id = info->hw_id;
- if (INTEL_GEN(dev_priv) >= 11) {
- switch (engine->id) {
- case VCS:
- engine->mmio_base = GEN11_BSD_RING_BASE;
- break;
- case VCS2:
- engine->mmio_base = GEN11_BSD2_RING_BASE;
- break;
- case VECS:
- engine->mmio_base = GEN11_VEBOX_RING_BASE;
- break;
- default:
- /* take the original value for all other engines */
- engine->mmio_base = info->mmio_base;
- break;
- }
- } else {
- engine->mmio_base = info->mmio_base;
- }
+ engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
engine->irq_shift = info->irq_shift;
engine->class = info->class;
engine->instance = info->instance;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1d599524a759..2e4408477ab5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2079,7 +2079,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
engine->emit_flush = gen6_bsd_ring_flush;
engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
} else {
- engine->mmio_base = BSD_RING_BASE;
engine->emit_flush = bsd_ring_flush;
if (IS_GEN5(dev_priv))
engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
--
2.16.2
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-07 19:45 [RFC] drm/i915: store all mmio bases in intel_engines Daniele Ceraolo Spurio
@ 2018-03-07 19:56 ` Chris Wilson
2018-03-07 20:36 ` ✓ Fi.CI.BAT: success for " Patchwork
` (2 subsequent siblings)
3 siblings, 0 replies; 12+ messages in thread
From: Chris Wilson @ 2018-03-07 19:56 UTC (permalink / raw)
To: Daniele Ceraolo Spurio, intel-gfx
Quoting Daniele Ceraolo Spurio (2018-03-07 19:45:15)
> The mmio bases we're currently storing in the intel_engines array are
> only valid for a subset of gens, so we need to ignore them and use
> different values in some cases. Instead of doing that, we can have a
> table of [starting gen, mmio base] pairs for each engine in
> intel_engines and select the correct one based on the gen we're running
> on in a consistent way.
>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 77 +++++++++++++++++++++------------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
> 2 files changed, 49 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 4ba139c27fba..1dd92cac8d67 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -81,12 +81,16 @@ static const struct engine_class_info intel_engine_classes[] = {
> },
> };
>
> +#define MAX_MMIO_BASES 3
> struct engine_info {
> unsigned int hw_id;
> unsigned int uabi_id;
> u8 class;
> u8 instance;
> - u32 mmio_base;
> + struct engine_mmio_base {
> + u32 gen : 8;
> + u32 base : 24;
> + } mmio_bases[MAX_MMIO_BASES];
Needs a note to mention the array must be in reverse gen order.
I would even add a selftest just to verify the arrays.
> unsigned irq_shift;
> };
>
> @@ -96,7 +100,9 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_RENDER,
> .class = RENDER_CLASS,
> .instance = 0,
> - .mmio_base = RENDER_RING_BASE,
> + .mmio_bases = {
> + { .gen = 1, .base = RENDER_RING_BASE }
Even gen0 (i740) has the render ring :)
> +static u32 __engine_mmio_base(struct drm_i915_private *i915,
> + const struct engine_mmio_base* bases)
> +{
> + int i;
> +
> + for (i = 0; i < MAX_MMIO_BASES; i++)
> + if (INTEL_GEN(i915) >= bases[i].gen)
> + break;
> +
> + GEM_BUG_ON(i == MAX_MMIO_BASES);
> + GEM_BUG_ON(!bases[i].base);
> +
> + return bases[i].base;
> +}
> +
> static int
> intel_engine_setup(struct drm_i915_private *dev_priv,
> enum intel_engine_id id)
> @@ -257,25 +296,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
> class_info->name, info->instance) >=
> sizeof(engine->name));
> engine->hw_id = engine->guc_id = info->hw_id;
> - if (INTEL_GEN(dev_priv) >= 11) {
> - switch (engine->id) {
> - case VCS:
> - engine->mmio_base = GEN11_BSD_RING_BASE;
> - break;
> - case VCS2:
> - engine->mmio_base = GEN11_BSD2_RING_BASE;
> - break;
> - case VECS:
> - engine->mmio_base = GEN11_VEBOX_RING_BASE;
> - break;
> - default:
> - /* take the original value for all other engines */
> - engine->mmio_base = info->mmio_base;
> - break;
> - }
> - } else {
> - engine->mmio_base = info->mmio_base;
> - }
> + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
> engine->irq_shift = info->irq_shift;
> engine->class = info->class;
> engine->instance = info->instance;
Very neat.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* ✓ Fi.CI.BAT: success for drm/i915: store all mmio bases in intel_engines
2018-03-07 19:45 [RFC] drm/i915: store all mmio bases in intel_engines Daniele Ceraolo Spurio
2018-03-07 19:56 ` Chris Wilson
@ 2018-03-07 20:36 ` Patchwork
2018-03-07 21:38 ` ✓ Fi.CI.IGT: " Patchwork
2018-03-08 9:31 ` [RFC] " Tvrtko Ursulin
3 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2018-03-07 20:36 UTC (permalink / raw)
To: Daniele Ceraolo Spurio; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: store all mmio bases in intel_engines
URL : https://patchwork.freedesktop.org/series/39556/
State : success
== Summary ==
Series 39556v1 drm/i915: store all mmio bases in intel_engines
https://patchwork.freedesktop.org/api/1.0/series/39556/revisions/1/mbox/
---- Known issues:
Test gem_exec_suspend:
Subgroup basic-s3:
pass -> DMESG-WARN (fi-skl-6700k2) fdo#104108
fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108
fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:428s
fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:423s
fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:371s
fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:502s
fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:278s
fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:488s
fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:489s
fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:480s
fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:469s
fi-cfl-8700k total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:406s
fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:583s
fi-cfl-u total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:507s
fi-elk-e7500 total:288 pass:229 dwarn:0 dfail:0 fail:0 skip:59 time:415s
fi-gdg-551 total:288 pass:179 dwarn:0 dfail:0 fail:1 skip:108 time:287s
fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:517s
fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:396s
fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:407s
fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:461s
fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:419s
fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:471s
fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:460s
fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:509s
fi-pnv-d510 total:288 pass:222 dwarn:1 dfail:0 fail:0 skip:65 time:586s
fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:429s
fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:518s
fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:532s
fi-skl-6700k2 total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:496s
fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:487s
fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:421s
fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:426s
fi-snb-2520m total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:525s
fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:393s
73d5ac42e64ff83e10989dd5fe0cd82070a78d3b drm-tip: 2018y-03m-07d-18h-48m-39s UTC integration manifest
631f6d0764f1 drm/i915: store all mmio bases in intel_engines
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8263/issues.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* ✓ Fi.CI.IGT: success for drm/i915: store all mmio bases in intel_engines
2018-03-07 19:45 [RFC] drm/i915: store all mmio bases in intel_engines Daniele Ceraolo Spurio
2018-03-07 19:56 ` Chris Wilson
2018-03-07 20:36 ` ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-03-07 21:38 ` Patchwork
2018-03-08 9:31 ` [RFC] " Tvrtko Ursulin
3 siblings, 0 replies; 12+ messages in thread
From: Patchwork @ 2018-03-07 21:38 UTC (permalink / raw)
To: Daniele Ceraolo Spurio; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: store all mmio bases in intel_engines
URL : https://patchwork.freedesktop.org/series/39556/
State : success
== Summary ==
---- Known issues:
Test kms_chv_cursor_fail:
Subgroup pipe-b-64x64-bottom-edge:
dmesg-warn -> PASS (shard-snb) fdo#105185 +2
Test kms_flip:
Subgroup 2x-flip-vs-expired-vblank-interruptible:
pass -> FAIL (shard-hsw) fdo#102887
Subgroup plain-flip-fb-recreate:
pass -> FAIL (shard-hsw) fdo#100368 +1
Test kms_frontbuffer_tracking:
Subgroup fbc-suspend:
fail -> PASS (shard-apl) fdo#101623
Test kms_rotation_crc:
Subgroup primary-rotation-180:
fail -> PASS (shard-snb) fdo#103925
Test pm_lpsp:
Subgroup screens-disabled:
fail -> PASS (shard-hsw) fdo#104941
fdo#105185 https://bugs.freedesktop.org/show_bug.cgi?id=105185
fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
fdo#101623 https://bugs.freedesktop.org/show_bug.cgi?id=101623
fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
fdo#104941 https://bugs.freedesktop.org/show_bug.cgi?id=104941
shard-apl total:3381 pass:1780 dwarn:1 dfail:0 fail:7 skip:1591 time:11800s
shard-hsw total:3467 pass:1771 dwarn:1 dfail:0 fail:3 skip:1691 time:11892s
shard-snb total:3467 pass:1363 dwarn:2 dfail:0 fail:1 skip:2101 time:7027s
Blacklisted hosts:
shard-kbl total:3467 pass:1899 dwarn:26 dfail:1 fail:8 skip:1533 time:9186s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8263/shards.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-07 19:45 [RFC] drm/i915: store all mmio bases in intel_engines Daniele Ceraolo Spurio
` (2 preceding siblings ...)
2018-03-07 21:38 ` ✓ Fi.CI.IGT: " Patchwork
@ 2018-03-08 9:31 ` Tvrtko Ursulin
2018-03-08 18:46 ` Daniele Ceraolo Spurio
3 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-03-08 9:31 UTC (permalink / raw)
To: Daniele Ceraolo Spurio, intel-gfx
On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
> The mmio bases we're currently storing in the intel_engines array are
> only valid for a subset of gens, so we need to ignore them and use
> different values in some cases. Instead of doing that, we can have a
> table of [starting gen, mmio base] pairs for each engine in
> intel_engines and select the correct one based on the gen we're running
> on in a consistent way.
>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 77 +++++++++++++++++++++------------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
> 2 files changed, 49 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 4ba139c27fba..1dd92cac8d67 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -81,12 +81,16 @@ static const struct engine_class_info intel_engine_classes[] = {
> },
> };
>
> +#define MAX_MMIO_BASES 3
> struct engine_info {
> unsigned int hw_id;
> unsigned int uabi_id;
> u8 class;
> u8 instance;
> - u32 mmio_base;
> + struct engine_mmio_base {
> + u32 gen : 8;
> + u32 base : 24;
> + } mmio_bases[MAX_MMIO_BASES];
> unsigned irq_shift;
> };
It's not bad, but I am just wondering if it is too complicated versus
simply duplicating the tables.
Duplicated tables would certainly be much less code and complexity, but
what about the size of pure data?
In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
NUM_ENGINES equals 240 bytes for intel_engines[] array with this scheme.
Separate tables per gens would be:
sizeof(struct engine_info) is 18 bytes.
For < gen6 we'd need 3 engines * 18 = 54
< gen11 = 5 engines * 18 = 90
>= gen11 = 8 engines * 18 = 144
54 + 90 + 144 = 288 bytes
So a little bit bigger. Hm. Plus we would need to refactor so
intel_engines[] is not indexed by engine->id but is contiguous array
with engine->id in the elements. Code to lookup the compact array should
be simpler than combined new code from this patch, especially if you add
the test as Chris suggested. So separate engine info arrays would win I
think overall - when considering size of text + size of data + size of
source code.
But on the other hand your solution might be more future proof. So I
don't know. Use the crystal ball to decide? :)
Regards,
Tvrtko
>
> @@ -96,7 +100,9 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_RENDER,
> .class = RENDER_CLASS,
> .instance = 0,
> - .mmio_base = RENDER_RING_BASE,
> + .mmio_bases = {
> + { .gen = 1, .base = RENDER_RING_BASE }
> + },
> .irq_shift = GEN8_RCS_IRQ_SHIFT,
> },
> [BCS] = {
> @@ -104,7 +110,9 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_BLT,
> .class = COPY_ENGINE_CLASS,
> .instance = 0,
> - .mmio_base = BLT_RING_BASE,
> + .mmio_bases = {
> + { .gen = 6, .base = BLT_RING_BASE }
> + },
> .irq_shift = GEN8_BCS_IRQ_SHIFT,
> },
> [VCS] = {
> @@ -112,7 +120,11 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 0,
> - .mmio_base = GEN6_BSD_RING_BASE,
> + .mmio_bases = {
> + { .gen = 11, .base = GEN11_BSD_RING_BASE },
> + { .gen = 6, .base = GEN6_BSD_RING_BASE },
> + { .gen = 4, .base = BSD_RING_BASE }
> + },
> .irq_shift = GEN8_VCS1_IRQ_SHIFT,
> },
> [VCS2] = {
> @@ -120,7 +132,10 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 1,
> - .mmio_base = GEN8_BSD2_RING_BASE,
> + .mmio_bases = {
> + { .gen = 11, .base = GEN11_BSD2_RING_BASE },
> + { .gen = 8, .base = GEN8_BSD2_RING_BASE }
> + },
> .irq_shift = GEN8_VCS2_IRQ_SHIFT,
> },
> [VCS3] = {
> @@ -128,7 +143,9 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 2,
> - .mmio_base = GEN11_BSD3_RING_BASE,
> + .mmio_bases = {
> + { .gen = 11, .base = GEN11_BSD3_RING_BASE }
> + },
> .irq_shift = 0, /* not used */
> },
> [VCS4] = {
> @@ -136,7 +153,9 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_BSD,
> .class = VIDEO_DECODE_CLASS,
> .instance = 3,
> - .mmio_base = GEN11_BSD4_RING_BASE,
> + .mmio_bases = {
> + { .gen = 11, .base = GEN11_BSD4_RING_BASE }
> + },
> .irq_shift = 0, /* not used */
> },
> [VECS] = {
> @@ -144,7 +163,10 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_VEBOX,
> .class = VIDEO_ENHANCEMENT_CLASS,
> .instance = 0,
> - .mmio_base = VEBOX_RING_BASE,
> + .mmio_bases = {
> + { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
> + { .gen = 7, .base = VEBOX_RING_BASE }
> + },
> .irq_shift = GEN8_VECS_IRQ_SHIFT,
> },
> [VECS2] = {
> @@ -152,7 +174,9 @@ static const struct engine_info intel_engines[] = {
> .uabi_id = I915_EXEC_VEBOX,
> .class = VIDEO_ENHANCEMENT_CLASS,
> .instance = 1,
> - .mmio_base = GEN11_VEBOX2_RING_BASE,
> + .mmio_bases = {
> + { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
> + },
> .irq_shift = 0, /* not used */
> },
> };
> @@ -223,6 +247,21 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
> }
> }
>
> +static u32 __engine_mmio_base(struct drm_i915_private *i915,
> + const struct engine_mmio_base* bases)
> +{
> + int i;
> +
> + for (i = 0; i < MAX_MMIO_BASES; i++)
> + if (INTEL_GEN(i915) >= bases[i].gen)
> + break;
> +
> + GEM_BUG_ON(i == MAX_MMIO_BASES);
> + GEM_BUG_ON(!bases[i].base);
> +
> + return bases[i].base;
> +}
> +
> static int
> intel_engine_setup(struct drm_i915_private *dev_priv,
> enum intel_engine_id id)
> @@ -257,25 +296,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
> class_info->name, info->instance) >=
> sizeof(engine->name));
> engine->hw_id = engine->guc_id = info->hw_id;
> - if (INTEL_GEN(dev_priv) >= 11) {
> - switch (engine->id) {
> - case VCS:
> - engine->mmio_base = GEN11_BSD_RING_BASE;
> - break;
> - case VCS2:
> - engine->mmio_base = GEN11_BSD2_RING_BASE;
> - break;
> - case VECS:
> - engine->mmio_base = GEN11_VEBOX_RING_BASE;
> - break;
> - default:
> - /* take the original value for all other engines */
> - engine->mmio_base = info->mmio_base;
> - break;
> - }
> - } else {
> - engine->mmio_base = info->mmio_base;
> - }
> + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
> engine->irq_shift = info->irq_shift;
> engine->class = info->class;
> engine->instance = info->instance;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 1d599524a759..2e4408477ab5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2079,7 +2079,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
> engine->emit_flush = gen6_bsd_ring_flush;
> engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
> } else {
> - engine->mmio_base = BSD_RING_BASE;
> engine->emit_flush = bsd_ring_flush;
> if (IS_GEN5(dev_priv))
> engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-08 9:31 ` [RFC] " Tvrtko Ursulin
@ 2018-03-08 18:46 ` Daniele Ceraolo Spurio
2018-03-09 9:53 ` Tvrtko Ursulin
0 siblings, 1 reply; 12+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-08 18:46 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
On 08/03/18 01:31, Tvrtko Ursulin wrote:
>
> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>> The mmio bases we're currently storing in the intel_engines array are
>> only valid for a subset of gens, so we need to ignore them and use
>> different values in some cases. Instead of doing that, we can have a
>> table of [starting gen, mmio base] pairs for each engine in
>> intel_engines and select the correct one based on the gen we're running
>> on in a consistent way.
>>
>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> ---
>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>> +++++++++++++++++++++------------
>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 4ba139c27fba..1dd92cac8d67 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>> intel_engine_classes[] = {
>> },
>> };
>> +#define MAX_MMIO_BASES 3
>> struct engine_info {
>> unsigned int hw_id;
>> unsigned int uabi_id;
>> u8 class;
>> u8 instance;
>> - u32 mmio_base;
>> + struct engine_mmio_base {
>> + u32 gen : 8;
>> + u32 base : 24;
>> + } mmio_bases[MAX_MMIO_BASES];
>> unsigned irq_shift;
>> };
>
> It's not bad, but I am just wondering if it is too complicated versus
> simply duplicating the tables.
>
> Duplicated tables would certainly be much less code and complexity, but
> what about the size of pure data?
>
> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
> sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
> NUM_ENGINES equals 240 bytes for intel_engines[] array with this scheme.
>
we remove a u32 (the old mmio base) so we only grow 8 per engine, but
the compiler rounds up to a multiple of u32 so 28 per engine, for a
total of 224.
> Separate tables per gens would be:
>
> sizeof(struct engine_info) is 18 bytes.
>
> For < gen6 we'd need 3 engines * 18 = 54
> < gen11 = 5 engines * 18 = 90
> >= gen11 = 8 engines * 18 = 144
>
> 54 + 90 + 144 = 288 bytes
>
> So a little bit bigger. Hm. Plus we would need to refactor so
> intel_engines[] is not indexed by engine->id but is contiguous array
> with engine->id in the elements. Code to lookup the compact array should
> be simpler than combined new code from this patch, especially if you add
> the test as Chris suggested. So separate engine info arrays would win I
> think overall - when considering size of text + size of data + size of
> source code.
>
Not sure. I would exclude the selftest, which is usually not compiled
for released kernels, which leads to:
add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
Function old new delta
intel_engines 160 224 +64
__func__ 13891 13910 +19
intel_engines_init_mmio 1247 1264 +17
intel_init_bsd_ring_buffer 142 135 -7
Total: Before=1479626, After=1479719, chg +0.01%
Total growth is 93, which is less then your estimation for the growth
introduced by replicating the table.
> But on the other hand your solution might be more future proof. So I
> don't know. Use the crystal ball to decide? :)
>
I think we should expect that the total engine count could grow with
future gens. In this case to me adding a new entry to a unified table
seems much cleaner (and uses less data) than adding a completely new
table each time.
Daniele
> Regards,
>
> Tvrtko
>
>
>> @@ -96,7 +100,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_RENDER,
>> .class = RENDER_CLASS,
>> .instance = 0,
>> - .mmio_base = RENDER_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 1, .base = RENDER_RING_BASE }
>> + },
>> .irq_shift = GEN8_RCS_IRQ_SHIFT,
>> },
>> [BCS] = {
>> @@ -104,7 +110,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BLT,
>> .class = COPY_ENGINE_CLASS,
>> .instance = 0,
>> - .mmio_base = BLT_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 6, .base = BLT_RING_BASE }
>> + },
>> .irq_shift = GEN8_BCS_IRQ_SHIFT,
>> },
>> [VCS] = {
>> @@ -112,7 +120,11 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 0,
>> - .mmio_base = GEN6_BSD_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD_RING_BASE },
>> + { .gen = 6, .base = GEN6_BSD_RING_BASE },
>> + { .gen = 4, .base = BSD_RING_BASE }
>> + },
>> .irq_shift = GEN8_VCS1_IRQ_SHIFT,
>> },
>> [VCS2] = {
>> @@ -120,7 +132,10 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 1,
>> - .mmio_base = GEN8_BSD2_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD2_RING_BASE },
>> + { .gen = 8, .base = GEN8_BSD2_RING_BASE }
>> + },
>> .irq_shift = GEN8_VCS2_IRQ_SHIFT,
>> },
>> [VCS3] = {
>> @@ -128,7 +143,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 2,
>> - .mmio_base = GEN11_BSD3_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD3_RING_BASE }
>> + },
>> .irq_shift = 0, /* not used */
>> },
>> [VCS4] = {
>> @@ -136,7 +153,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_BSD,
>> .class = VIDEO_DECODE_CLASS,
>> .instance = 3,
>> - .mmio_base = GEN11_BSD4_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_BSD4_RING_BASE }
>> + },
>> .irq_shift = 0, /* not used */
>> },
>> [VECS] = {
>> @@ -144,7 +163,10 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_VEBOX,
>> .class = VIDEO_ENHANCEMENT_CLASS,
>> .instance = 0,
>> - .mmio_base = VEBOX_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_VEBOX_RING_BASE },
>> + { .gen = 7, .base = VEBOX_RING_BASE }
>> + },
>> .irq_shift = GEN8_VECS_IRQ_SHIFT,
>> },
>> [VECS2] = {
>> @@ -152,7 +174,9 @@ static const struct engine_info intel_engines[] = {
>> .uabi_id = I915_EXEC_VEBOX,
>> .class = VIDEO_ENHANCEMENT_CLASS,
>> .instance = 1,
>> - .mmio_base = GEN11_VEBOX2_RING_BASE,
>> + .mmio_bases = {
>> + { .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
>> + },
>> .irq_shift = 0, /* not used */
>> },
>> };
>> @@ -223,6 +247,21 @@ __intel_engine_context_size(struct
>> drm_i915_private *dev_priv, u8 class)
>> }
>> }
>> +static u32 __engine_mmio_base(struct drm_i915_private *i915,
>> + const struct engine_mmio_base* bases)
>> +{
>> + int i;
>> +
>> + for (i = 0; i < MAX_MMIO_BASES; i++)
>> + if (INTEL_GEN(i915) >= bases[i].gen)
>> + break;
>> +
>> + GEM_BUG_ON(i == MAX_MMIO_BASES);
>> + GEM_BUG_ON(!bases[i].base);
>> +
>> + return bases[i].base;
>> +}
>> +
>> static int
>> intel_engine_setup(struct drm_i915_private *dev_priv,
>> enum intel_engine_id id)
>> @@ -257,25 +296,7 @@ intel_engine_setup(struct drm_i915_private
>> *dev_priv,
>> class_info->name, info->instance) >=
>> sizeof(engine->name));
>> engine->hw_id = engine->guc_id = info->hw_id;
>> - if (INTEL_GEN(dev_priv) >= 11) {
>> - switch (engine->id) {
>> - case VCS:
>> - engine->mmio_base = GEN11_BSD_RING_BASE;
>> - break;
>> - case VCS2:
>> - engine->mmio_base = GEN11_BSD2_RING_BASE;
>> - break;
>> - case VECS:
>> - engine->mmio_base = GEN11_VEBOX_RING_BASE;
>> - break;
>> - default:
>> - /* take the original value for all other engines */
>> - engine->mmio_base = info->mmio_base;
>> - break;
>> - }
>> - } else {
>> - engine->mmio_base = info->mmio_base;
>> - }
>> + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
>> engine->irq_shift = info->irq_shift;
>> engine->class = info->class;
>> engine->instance = info->instance;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 1d599524a759..2e4408477ab5 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -2079,7 +2079,6 @@ int intel_init_bsd_ring_buffer(struct
>> intel_engine_cs *engine)
>> engine->emit_flush = gen6_bsd_ring_flush;
>> engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
>> } else {
>> - engine->mmio_base = BSD_RING_BASE;
>> engine->emit_flush = bsd_ring_flush;
>> if (IS_GEN5(dev_priv))
>> engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
>>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-08 18:46 ` Daniele Ceraolo Spurio
@ 2018-03-09 9:53 ` Tvrtko Ursulin
2018-03-09 18:47 ` Daniele Ceraolo Spurio
0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-03-09 9:53 UTC (permalink / raw)
To: Daniele Ceraolo Spurio, intel-gfx
On 08/03/2018 18:46, Daniele Ceraolo Spurio wrote:
> On 08/03/18 01:31, Tvrtko Ursulin wrote:
>>
>> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>>> The mmio bases we're currently storing in the intel_engines array are
>>> only valid for a subset of gens, so we need to ignore them and use
>>> different values in some cases. Instead of doing that, we can have a
>>> table of [starting gen, mmio base] pairs for each engine in
>>> intel_engines and select the correct one based on the gen we're running
>>> on in a consistent way.
>>>
>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>>> +++++++++++++++++++++------------
>>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index 4ba139c27fba..1dd92cac8d67 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>>> intel_engine_classes[] = {
>>> },
>>> };
>>> +#define MAX_MMIO_BASES 3
>>> struct engine_info {
>>> unsigned int hw_id;
>>> unsigned int uabi_id;
>>> u8 class;
>>> u8 instance;
>>> - u32 mmio_base;
>>> + struct engine_mmio_base {
>>> + u32 gen : 8;
>>> + u32 base : 24;
>>> + } mmio_bases[MAX_MMIO_BASES];
>>> unsigned irq_shift;
>>> };
>>
>> It's not bad, but I am just wondering if it is too complicated versus
>> simply duplicating the tables.
>>
>> Duplicated tables would certainly be much less code and complexity,
>> but what about the size of pure data?
>>
>> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
>> sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
>> NUM_ENGINES equals 240 bytes for intel_engines[] array with this scheme.
>>
>
> we remove a u32 (the old mmio base) so we only grow 8 per engine, but
> the compiler rounds up to a multiple of u32 so 28 per engine, for a
> total of 224.
>
>> Separate tables per gens would be:
>>
>> sizeof(struct engine_info) is 18 bytes.
>>
>> For < gen6 we'd need 3 engines * 18 = 54
>> < gen11 = 5 engines * 18 = 90
>> >= gen11 = 8 engines * 18 = 144
>>
>> 54 + 90 + 144 = 288 bytes
>>
>> So a little bit bigger. Hm. Plus we would need to refactor so
>> intel_engines[] is not indexed by engine->id but is contiguous array
>> with engine->id in the elements. Code to lookup the compact array
>> should be simpler than combined new code from this patch, especially
>> if you add the test as Chris suggested. So separate engine info arrays
>> would win I think overall - when considering size of text + size of
>> data + size of source code.
>>
>
> Not sure. I would exclude the selftest, which is usually not compiled
> for released kernels, which leads to:
Yes, but we cannot exclude its source since selftests for separate
tables wouldn't be needed.
> add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
> Function old new delta
> intel_engines 160 224 +64
> __func__ 13891 13910 +19
> intel_engines_init_mmio 1247 1264 +17
> intel_init_bsd_ring_buffer 142 135 -7
> Total: Before=1479626, After=1479719, chg +0.01%
>
> Total growth is 93, which is less then your estimation for the growth
> introduced by replicating the table.
>
>> But on the other hand your solution might be more future proof. So I
>> don't know. Use the crystal ball to decide? :)
>>
>
> I think we should expect that the total engine count could grow with
> future gens. In this case to me adding a new entry to a unified table
> seems much cleaner (and uses less data) than adding a completely new
> table each time.
Okay I was off in my estimates. But in general I was coming from the
angle of thinking that every new mmio base difference in this scheme
grows the size for all engines. So if just VCS grows one new base,
impact is multiplied by NUM_ENGINES. But maybe separate tables are also
not a solution. Perhaps pulling out mmio_base arrays to outside struct
engine_info in another set of static tables, so they could be null
terminated would be best of both worlds?
struct engine_mmio_base {
u32 gen : 8;
u32 base : 24;
};
static const struct engine_mmio_base vcs0_mmio_bases[] = {
{ .gen = 11, .base = GEN11_BSD_RING_BASE },
{ .gen = 6, .base = GEN6_BSD_RING_BASE },
{ .gen = 4, .base = BSD_RING_BASE },
{ },
};
And then in intel_engines array, for BSD:
{
...
.mmio_bases = &vcs0_mmio_bases;
...
},
This way we limit data growth only to engines which change their mmio
bases frequently.
Just an idea.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-09 9:53 ` Tvrtko Ursulin
@ 2018-03-09 18:47 ` Daniele Ceraolo Spurio
2018-03-09 19:44 ` Tvrtko Ursulin
0 siblings, 1 reply; 12+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-09 18:47 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
On 09/03/18 01:53, Tvrtko Ursulin wrote:
>
> On 08/03/2018 18:46, Daniele Ceraolo Spurio wrote:
>> On 08/03/18 01:31, Tvrtko Ursulin wrote:
>>>
>>> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>>>> The mmio bases we're currently storing in the intel_engines array are
>>>> only valid for a subset of gens, so we need to ignore them and use
>>>> different values in some cases. Instead of doing that, we can have a
>>>> table of [starting gen, mmio base] pairs for each engine in
>>>> intel_engines and select the correct one based on the gen we're running
>>>> on in a consistent way.
>>>>
>>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>>> ---
>>>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>>>> +++++++++++++++++++++------------
>>>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>>>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> index 4ba139c27fba..1dd92cac8d67 100644
>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>>>> intel_engine_classes[] = {
>>>> },
>>>> };
>>>> +#define MAX_MMIO_BASES 3
>>>> struct engine_info {
>>>> unsigned int hw_id;
>>>> unsigned int uabi_id;
>>>> u8 class;
>>>> u8 instance;
>>>> - u32 mmio_base;
>>>> + struct engine_mmio_base {
>>>> + u32 gen : 8;
>>>> + u32 base : 24;
>>>> + } mmio_bases[MAX_MMIO_BASES];
>>>> unsigned irq_shift;
>>>> };
>>>
>>> It's not bad, but I am just wondering if it is too complicated versus
>>> simply duplicating the tables.
>>>
>>> Duplicated tables would certainly be much less code and complexity,
>>> but what about the size of pure data?
>>>
>>> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
>>> sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
>>> NUM_ENGINES equals 240 bytes for intel_engines[] array with this scheme.
>>>
>>
>> we remove a u32 (the old mmio base) so we only grow 8 per engine, but
>> the compiler rounds up to a multiple of u32 so 28 per engine, for a
>> total of 224.
>>
>>> Separate tables per gens would be:
>>>
>>> sizeof(struct engine_info) is 18 bytes.
>>>
>>> For < gen6 we'd need 3 engines * 18 = 54
>>> < gen11 = 5 engines * 18 = 90
>>> >= gen11 = 8 engines * 18 = 144
>>>
>>> 54 + 90 + 144 = 288 bytes
>>>
>>> So a little bit bigger. Hm. Plus we would need to refactor so
>>> intel_engines[] is not indexed by engine->id but is contiguous array
>>> with engine->id in the elements. Code to lookup the compact array
>>> should be simpler than combined new code from this patch, especially
>>> if you add the test as Chris suggested. So separate engine info
>>> arrays would win I think overall - when considering size of text +
>>> size of data + size of source code.
>>>
>>
>> Not sure. I would exclude the selftest, which is usually not compiled
>> for released kernels, which leads to:
>
> Yes, but we cannot exclude its source since selftests for separate
> tables wouldn't be needed.
>
>> add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
>> Function old new delta
>> intel_engines 160 224 +64
>> __func__ 13891 13910 +19
>> intel_engines_init_mmio 1247 1264 +17
>> intel_init_bsd_ring_buffer 142 135 -7
>> Total: Before=1479626, After=1479719, chg +0.01%
>>
>> Total growth is 93, which is less then your estimation for the growth
>> introduced by replicating the table.
>>
>>> But on the other hand your solution might be more future proof. So I
>>> don't know. Use the crystal ball to decide? :)
>>>
>>
>> I think we should expect that the total engine count could grow with
>> future gens. In this case to me adding a new entry to a unified table
>> seems much cleaner (and uses less data) than adding a completely new
>> table each time.
>
> Okay I was off in my estimates. But in general I was coming from the
> angle of thinking that every new mmio base difference in this scheme
> grows the size for all engines. So if just VCS grows one new base,
> impact is multiplied by NUM_ENGINES. But maybe separate tables are also
> not a solution. Perhaps pulling out mmio_base arrays to outside struct
> engine_info in another set of static tables, so they could be null
> terminated would be best of both worlds?
>
> struct engine_mmio_base {
> u32 gen : 8;
> u32 base : 24;
> };
>
> static const struct engine_mmio_base vcs0_mmio_bases[] = {
> { .gen = 11, .base = GEN11_BSD_RING_BASE },
> { .gen = 6, .base = GEN6_BSD_RING_BASE },
> { .gen = 4, .base = BSD_RING_BASE },
> { },
> };
>
> And then in intel_engines array, for BSD:
>
> {
> ...
> .mmio_bases = &vcs0_mmio_bases;
> ...
> },
>
> This way we limit data growth only to engines which change their mmio
> bases frequently.
>
> Just an idea.
>
I gave this a try and the code actually grows:
add/remove: 8/0 grow/shrink: 2/0 up/down: 120/0 (120)
Function old new delta
intel_engines 224 256 +32
vcs0_mmio_bases - 16 +16
vecs1_mmio_bases - 12 +12
vecs0_mmio_bases - 12 +12
vcs1_mmio_bases - 12 +12
vcs3_mmio_bases - 8 +8
vcs2_mmio_bases - 8 +8
rcs_mmio_bases - 8 +8
intel_engines_init_mmio 1264 1272 +8
bcs_mmio_bases - 4 +4
Total: Before=1479719, After=1479839, chg +0.01%
I have no idea what the compiler is doing to grow intel_engines, since
by replacing and array of 3 u32s with a pointer I would expect a shrink
of 4 bytes per-engine. Anyway, even without the compiler weirdness with
this method we would grow the code size of at least 4 bytes per engine
because we replace an array of 3 u32 (12B) with a pointer (8B) and an
array of at 2 or more u32 (8B+). I guess we can reconsider if/when one
engine reaches more than 3 mmio bases.
Thanks,
Daniele
> Regards,
>
> Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-09 18:47 ` Daniele Ceraolo Spurio
@ 2018-03-09 19:44 ` Tvrtko Ursulin
2018-03-12 11:04 ` Tvrtko Ursulin
0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-03-09 19:44 UTC (permalink / raw)
To: Daniele Ceraolo Spurio, intel-gfx
On 09/03/2018 18:47, Daniele Ceraolo Spurio wrote:
> On 09/03/18 01:53, Tvrtko Ursulin wrote:
>>
>> On 08/03/2018 18:46, Daniele Ceraolo Spurio wrote:
>>> On 08/03/18 01:31, Tvrtko Ursulin wrote:
>>>>
>>>> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>>>>> The mmio bases we're currently storing in the intel_engines array are
>>>>> only valid for a subset of gens, so we need to ignore them and use
>>>>> different values in some cases. Instead of doing that, we can have a
>>>>> table of [starting gen, mmio base] pairs for each engine in
>>>>> intel_engines and select the correct one based on the gen we're
>>>>> running
>>>>> on in a consistent way.
>>>>>
>>>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> Signed-off-by: Daniele Ceraolo Spurio
>>>>> <daniele.ceraolospurio@intel.com>
>>>>> ---
>>>>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>>>>> +++++++++++++++++++++------------
>>>>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>>>>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> index 4ba139c27fba..1dd92cac8d67 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>>>>> intel_engine_classes[] = {
>>>>> },
>>>>> };
>>>>> +#define MAX_MMIO_BASES 3
>>>>> struct engine_info {
>>>>> unsigned int hw_id;
>>>>> unsigned int uabi_id;
>>>>> u8 class;
>>>>> u8 instance;
>>>>> - u32 mmio_base;
>>>>> + struct engine_mmio_base {
>>>>> + u32 gen : 8;
>>>>> + u32 base : 24;
>>>>> + } mmio_bases[MAX_MMIO_BASES];
>>>>> unsigned irq_shift;
>>>>> };
>>>>
>>>> It's not bad, but I am just wondering if it is too complicated
>>>> versus simply duplicating the tables.
>>>>
>>>> Duplicated tables would certainly be much less code and complexity,
>>>> but what about the size of pure data?
>>>>
>>>> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
>>>> sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
>>>> NUM_ENGINES equals 240 bytes for intel_engines[] array with this
>>>> scheme.
>>>>
>>>
>>> we remove a u32 (the old mmio base) so we only grow 8 per engine, but
>>> the compiler rounds up to a multiple of u32 so 28 per engine, for a
>>> total of 224.
>>>
>>>> Separate tables per gens would be:
>>>>
>>>> sizeof(struct engine_info) is 18 bytes.
>>>>
>>>> For < gen6 we'd need 3 engines * 18 = 54
>>>> < gen11 = 5 engines * 18 = 90
>>>> >= gen11 = 8 engines * 18 = 144
>>>>
>>>> 54 + 90 + 144 = 288 bytes
>>>>
>>>> So a little bit bigger. Hm. Plus we would need to refactor so
>>>> intel_engines[] is not indexed by engine->id but is contiguous array
>>>> with engine->id in the elements. Code to lookup the compact array
>>>> should be simpler than combined new code from this patch, especially
>>>> if you add the test as Chris suggested. So separate engine info
>>>> arrays would win I think overall - when considering size of text +
>>>> size of data + size of source code.
>>>>
>>>
>>> Not sure. I would exclude the selftest, which is usually not compiled
>>> for released kernels, which leads to:
>>
>> Yes, but we cannot exclude its source since selftests for separate
>> tables wouldn't be needed.
>>
>>> add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
>>> Function old new delta
>>> intel_engines 160 224 +64
>>> __func__ 13891 13910 +19
>>> intel_engines_init_mmio 1247 1264 +17
>>> intel_init_bsd_ring_buffer 142 135 -7
>>> Total: Before=1479626, After=1479719, chg +0.01%
>>>
>>> Total growth is 93, which is less then your estimation for the growth
>>> introduced by replicating the table.
>>>
>>>> But on the other hand your solution might be more future proof. So I
>>>> don't know. Use the crystal ball to decide? :)
>>>>
>>>
>>> I think we should expect that the total engine count could grow with
>>> future gens. In this case to me adding a new entry to a unified table
>>> seems much cleaner (and uses less data) than adding a completely new
>>> table each time.
>>
>> Okay I was off in my estimates. But in general I was coming from the
>> angle of thinking that every new mmio base difference in this scheme
>> grows the size for all engines. So if just VCS grows one new base,
>> impact is multiplied by NUM_ENGINES. But maybe separate tables are
>> also not a solution. Perhaps pulling out mmio_base arrays to outside
>> struct engine_info in another set of static tables, so they could be
>> null terminated would be best of both worlds?
>>
>> struct engine_mmio_base {
>> u32 gen : 8;
>> u32 base : 24;
>> };
>>
>> static const struct engine_mmio_base vcs0_mmio_bases[] = {
>> { .gen = 11, .base = GEN11_BSD_RING_BASE },
>> { .gen = 6, .base = GEN6_BSD_RING_BASE },
>> { .gen = 4, .base = BSD_RING_BASE },
>> { },
>> };
>>
>> And then in intel_engines array, for BSD:
>>
>> {
>> ...
>> .mmio_bases = &vcs0_mmio_bases;
>> ...
>> },
>>
>> This way we limit data growth only to engines which change their mmio
>> bases frequently.
>>
>> Just an idea.
>>
>
> I gave this a try and the code actually grows:
>
> add/remove: 8/0 grow/shrink: 2/0 up/down: 120/0 (120)
> Function old new delta
> intel_engines 224 256 +32
> vcs0_mmio_bases - 16 +16
> vecs1_mmio_bases - 12 +12
> vecs0_mmio_bases - 12 +12
> vcs1_mmio_bases - 12 +12
> vcs3_mmio_bases - 8 +8
> vcs2_mmio_bases - 8 +8
> rcs_mmio_bases - 8 +8
> intel_engines_init_mmio 1264 1272 +8
> bcs_mmio_bases - 4 +4
> Total: Before=1479719, After=1479839, chg +0.01%
>
> I have no idea what the compiler is doing to grow intel_engines, since
> by replacing and array of 3 u32s with a pointer I would expect a shrink
> of 4 bytes per-engine. Anyway, even without the compiler weirdness with
It probably has to align the pointer to 8 bytes so that creates a hole.
Moving mmio_base pointer higher up, either to the top or after two
unsigned ints should work. Or packing the struct.
> this method we would grow the code size of at least 4 bytes per engine
> because we replace an array of 3 u32 (12B) with a pointer (8B) and an
> array of at 2 or more u32 (8B+). I guess we can reconsider if/when one
> engine reaches more than 3 mmio bases.
Yeah it's fine. I was thinking that since you are almost there it makes
sense to future proof it more, since no one will probably remember it
later. But OK.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-09 19:44 ` Tvrtko Ursulin
@ 2018-03-12 11:04 ` Tvrtko Ursulin
2018-03-12 17:48 ` Daniele Ceraolo Spurio
0 siblings, 1 reply; 12+ messages in thread
From: Tvrtko Ursulin @ 2018-03-12 11:04 UTC (permalink / raw)
To: Daniele Ceraolo Spurio, intel-gfx
On 09/03/2018 19:44, Tvrtko Ursulin wrote:
>
> On 09/03/2018 18:47, Daniele Ceraolo Spurio wrote:
>> On 09/03/18 01:53, Tvrtko Ursulin wrote:
>>>
>>> On 08/03/2018 18:46, Daniele Ceraolo Spurio wrote:
>>>> On 08/03/18 01:31, Tvrtko Ursulin wrote:
>>>>>
>>>>> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>>>>>> The mmio bases we're currently storing in the intel_engines array are
>>>>>> only valid for a subset of gens, so we need to ignore them and use
>>>>>> different values in some cases. Instead of doing that, we can have a
>>>>>> table of [starting gen, mmio base] pairs for each engine in
>>>>>> intel_engines and select the correct one based on the gen we're
>>>>>> running
>>>>>> on in a consistent way.
>>>>>>
>>>>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>> Signed-off-by: Daniele Ceraolo Spurio
>>>>>> <daniele.ceraolospurio@intel.com>
>>>>>> ---
>>>>>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>>>>>> +++++++++++++++++++++------------
>>>>>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>>>>>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>> index 4ba139c27fba..1dd92cac8d67 100644
>>>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>>>>>> intel_engine_classes[] = {
>>>>>> },
>>>>>> };
>>>>>> +#define MAX_MMIO_BASES 3
>>>>>> struct engine_info {
>>>>>> unsigned int hw_id;
>>>>>> unsigned int uabi_id;
>>>>>> u8 class;
>>>>>> u8 instance;
>>>>>> - u32 mmio_base;
>>>>>> + struct engine_mmio_base {
>>>>>> + u32 gen : 8;
>>>>>> + u32 base : 24;
>>>>>> + } mmio_bases[MAX_MMIO_BASES];
>>>>>> unsigned irq_shift;
>>>>>> };
>>>>>
>>>>> It's not bad, but I am just wondering if it is too complicated
>>>>> versus simply duplicating the tables.
>>>>>
>>>>> Duplicated tables would certainly be much less code and complexity,
>>>>> but what about the size of pure data?
>>>>>
>>>>> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
>>>>> sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
>>>>> NUM_ENGINES equals 240 bytes for intel_engines[] array with this
>>>>> scheme.
>>>>>
>>>>
>>>> we remove a u32 (the old mmio base) so we only grow 8 per engine,
>>>> but the compiler rounds up to a multiple of u32 so 28 per engine,
>>>> for a total of 224.
>>>>
>>>>> Separate tables per gens would be:
>>>>>
>>>>> sizeof(struct engine_info) is 18 bytes.
>>>>>
>>>>> For < gen6 we'd need 3 engines * 18 = 54
>>>>> < gen11 = 5 engines * 18 = 90
>>>>> >= gen11 = 8 engines * 18 = 144
>>>>>
>>>>> 54 + 90 + 144 = 288 bytes
>>>>>
>>>>> So a little bit bigger. Hm. Plus we would need to refactor so
>>>>> intel_engines[] is not indexed by engine->id but is contiguous
>>>>> array with engine->id in the elements. Code to lookup the compact
>>>>> array should be simpler than combined new code from this patch,
>>>>> especially if you add the test as Chris suggested. So separate
>>>>> engine info arrays would win I think overall - when considering
>>>>> size of text + size of data + size of source code.
>>>>>
>>>>
>>>> Not sure. I would exclude the selftest, which is usually not
>>>> compiled for released kernels, which leads to:
>>>
>>> Yes, but we cannot exclude its source since selftests for separate
>>> tables wouldn't be needed.
>>>
>>>> add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
>>>> Function old new delta
>>>> intel_engines 160 224 +64
>>>> __func__ 13891 13910 +19
>>>> intel_engines_init_mmio 1247 1264 +17
>>>> intel_init_bsd_ring_buffer 142 135 -7
>>>> Total: Before=1479626, After=1479719, chg +0.01%
>>>>
>>>> Total growth is 93, which is less then your estimation for the
>>>> growth introduced by replicating the table.
>>>>
>>>>> But on the other hand your solution might be more future proof. So
>>>>> I don't know. Use the crystal ball to decide? :)
>>>>>
>>>>
>>>> I think we should expect that the total engine count could grow with
>>>> future gens. In this case to me adding a new entry to a unified
>>>> table seems much cleaner (and uses less data) than adding a
>>>> completely new table each time.
>>>
>>> Okay I was off in my estimates. But in general I was coming from the
>>> angle of thinking that every new mmio base difference in this scheme
>>> grows the size for all engines. So if just VCS grows one new base,
>>> impact is multiplied by NUM_ENGINES. But maybe separate tables are
>>> also not a solution. Perhaps pulling out mmio_base arrays to outside
>>> struct engine_info in another set of static tables, so they could be
>>> null terminated would be best of both worlds?
>>>
>>> struct engine_mmio_base {
>>> u32 gen : 8;
>>> u32 base : 24;
>>> };
>>>
>>> static const struct engine_mmio_base vcs0_mmio_bases[] = {
>>> { .gen = 11, .base = GEN11_BSD_RING_BASE },
>>> { .gen = 6, .base = GEN6_BSD_RING_BASE },
>>> { .gen = 4, .base = BSD_RING_BASE },
>>> { },
>>> };
>>>
>>> And then in intel_engines array, for BSD:
>>>
>>> {
>>> ...
>>> .mmio_bases = &vcs0_mmio_bases;
>>> ...
>>> },
>>>
>>> This way we limit data growth only to engines which change their mmio
>>> bases frequently.
>>>
>>> Just an idea.
>>>
>>
>> I gave this a try and the code actually grows:
>>
>> add/remove: 8/0 grow/shrink: 2/0 up/down: 120/0 (120)
>> Function old new delta
>> intel_engines 224 256 +32
>> vcs0_mmio_bases - 16 +16
>> vecs1_mmio_bases - 12 +12
>> vecs0_mmio_bases - 12 +12
>> vcs1_mmio_bases - 12 +12
>> vcs3_mmio_bases - 8 +8
>> vcs2_mmio_bases - 8 +8
>> rcs_mmio_bases - 8 +8
>> intel_engines_init_mmio 1264 1272 +8
>> bcs_mmio_bases - 4 +4
>> Total: Before=1479719, After=1479839, chg +0.01%
>>
>> I have no idea what the compiler is doing to grow intel_engines, since
>> by replacing and array of 3 u32s with a pointer I would expect a
>> shrink of 4 bytes per-engine. Anyway, even without the compiler
>> weirdness with
>
> It probably has to align the pointer to 8 bytes so that creates a hole.
> Moving mmio_base pointer higher up, either to the top or after two
> unsigned ints should work. Or packing the struct.
>
>> this method we would grow the code size of at least 4 bytes per engine
>> because we replace an array of 3 u32 (12B) with a pointer (8B) and an
>> array of at 2 or more u32 (8B+). I guess we can reconsider if/when one
>> engine reaches more than 3 mmio bases.
>
> Yeah it's fine. I was thinking that since you are almost there it makes
> sense to future proof it more, since no one will probably remember it
> later. But OK.
One idea to compact more, in addition to avoiding alignment holes, could
be to store the engine_mmio_base directly in the engine_mmio_base
pointer when it is a single entry, othewrise use a pointer to null
terminated array. Actually we could store two without table indirection
to be most optimal on 64-bit. Something like:
struct engine_mmio_base {
u32 base : 24; /* Must be LSB, */
u32 gen : 8;
};
union engine_mmio {
struct engine_mmio_base mmio_base[2];
struct engine_mmio_base *mmio_base_list;
};
#define MMIO_PTR_LIST BIT(0)
{
... render engine ...
.mmio = { (struct engine_mmio_base){.base = ..., ..gen = ...} },
...
},
{
...
.mmio = { .mmio_base_list = &vcs0_mmio_bases | MMIO_PTR_LIST }
...
},
This could be the best of both worlds, with up to two bases built-in,
and engines with more point to an array.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-12 11:04 ` Tvrtko Ursulin
@ 2018-03-12 17:48 ` Daniele Ceraolo Spurio
2018-03-12 21:05 ` Daniele Ceraolo Spurio
0 siblings, 1 reply; 12+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-12 17:48 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
On 12/03/18 04:04, Tvrtko Ursulin wrote:
>
> On 09/03/2018 19:44, Tvrtko Ursulin wrote:
>>
>> On 09/03/2018 18:47, Daniele Ceraolo Spurio wrote:
>>> On 09/03/18 01:53, Tvrtko Ursulin wrote:
>>>>
>>>> On 08/03/2018 18:46, Daniele Ceraolo Spurio wrote:
>>>>> On 08/03/18 01:31, Tvrtko Ursulin wrote:
>>>>>>
>>>>>> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>>>>>>> The mmio bases we're currently storing in the intel_engines array
>>>>>>> are
>>>>>>> only valid for a subset of gens, so we need to ignore them and use
>>>>>>> different values in some cases. Instead of doing that, we can have a
>>>>>>> table of [starting gen, mmio base] pairs for each engine in
>>>>>>> intel_engines and select the correct one based on the gen we're
>>>>>>> running
>>>>>>> on in a consistent way.
>>>>>>>
>>>>>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>> Signed-off-by: Daniele Ceraolo Spurio
>>>>>>> <daniele.ceraolospurio@intel.com>
>>>>>>> ---
>>>>>>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>>>>>>> +++++++++++++++++++++------------
>>>>>>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>>>>>>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>> index 4ba139c27fba..1dd92cac8d67 100644
>>>>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>>>>>>> intel_engine_classes[] = {
>>>>>>> },
>>>>>>> };
>>>>>>> +#define MAX_MMIO_BASES 3
>>>>>>> struct engine_info {
>>>>>>> unsigned int hw_id;
>>>>>>> unsigned int uabi_id;
>>>>>>> u8 class;
>>>>>>> u8 instance;
>>>>>>> - u32 mmio_base;
>>>>>>> + struct engine_mmio_base {
>>>>>>> + u32 gen : 8;
>>>>>>> + u32 base : 24;
>>>>>>> + } mmio_bases[MAX_MMIO_BASES];
>>>>>>> unsigned irq_shift;
>>>>>>> };
>>>>>>
>>>>>> It's not bad, but I am just wondering if it is too complicated
>>>>>> versus simply duplicating the tables.
>>>>>>
>>>>>> Duplicated tables would certainly be much less code and
>>>>>> complexity, but what about the size of pure data?
>>>>>>
>>>>>> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES *
>>>>>> sizeof(u32), so 12, to the total of 30 if I am not mistaken. Times
>>>>>> NUM_ENGINES equals 240 bytes for intel_engines[] array with this
>>>>>> scheme.
>>>>>>
>>>>>
>>>>> we remove a u32 (the old mmio base) so we only grow 8 per engine,
>>>>> but the compiler rounds up to a multiple of u32 so 28 per engine,
>>>>> for a total of 224.
>>>>>
>>>>>> Separate tables per gens would be:
>>>>>>
>>>>>> sizeof(struct engine_info) is 18 bytes.
>>>>>>
>>>>>> For < gen6 we'd need 3 engines * 18 = 54
>>>>>> < gen11 = 5 engines * 18 = 90
>>>>>> >= gen11 = 8 engines * 18 = 144
>>>>>>
>>>>>> 54 + 90 + 144 = 288 bytes
>>>>>>
>>>>>> So a little bit bigger. Hm. Plus we would need to refactor so
>>>>>> intel_engines[] is not indexed by engine->id but is contiguous
>>>>>> array with engine->id in the elements. Code to lookup the compact
>>>>>> array should be simpler than combined new code from this patch,
>>>>>> especially if you add the test as Chris suggested. So separate
>>>>>> engine info arrays would win I think overall - when considering
>>>>>> size of text + size of data + size of source code.
>>>>>>
>>>>>
>>>>> Not sure. I would exclude the selftest, which is usually not
>>>>> compiled for released kernels, which leads to:
>>>>
>>>> Yes, but we cannot exclude its source since selftests for separate
>>>> tables wouldn't be needed.
>>>>
>>>>> add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
>>>>> Function old new delta
>>>>> intel_engines 160 224 +64
>>>>> __func__ 13891 13910 +19
>>>>> intel_engines_init_mmio 1247 1264 +17
>>>>> intel_init_bsd_ring_buffer 142 135 -7
>>>>> Total: Before=1479626, After=1479719, chg +0.01%
>>>>>
>>>>> Total growth is 93, which is less then your estimation for the
>>>>> growth introduced by replicating the table.
>>>>>
>>>>>> But on the other hand your solution might be more future proof. So
>>>>>> I don't know. Use the crystal ball to decide? :)
>>>>>>
>>>>>
>>>>> I think we should expect that the total engine count could grow
>>>>> with future gens. In this case to me adding a new entry to a
>>>>> unified table seems much cleaner (and uses less data) than adding a
>>>>> completely new table each time.
>>>>
>>>> Okay I was off in my estimates. But in general I was coming from the
>>>> angle of thinking that every new mmio base difference in this scheme
>>>> grows the size for all engines. So if just VCS grows one new base,
>>>> impact is multiplied by NUM_ENGINES. But maybe separate tables are
>>>> also not a solution. Perhaps pulling out mmio_base arrays to outside
>>>> struct engine_info in another set of static tables, so they could be
>>>> null terminated would be best of both worlds?
>>>>
>>>> struct engine_mmio_base {
>>>> u32 gen : 8;
>>>> u32 base : 24;
>>>> };
>>>>
>>>> static const struct engine_mmio_base vcs0_mmio_bases[] = {
>>>> { .gen = 11, .base = GEN11_BSD_RING_BASE },
>>>> { .gen = 6, .base = GEN6_BSD_RING_BASE },
>>>> { .gen = 4, .base = BSD_RING_BASE },
>>>> { },
>>>> };
>>>>
>>>> And then in intel_engines array, for BSD:
>>>>
>>>> {
>>>> ...
>>>> .mmio_bases = &vcs0_mmio_bases;
>>>> ...
>>>> },
>>>>
>>>> This way we limit data growth only to engines which change their
>>>> mmio bases frequently.
>>>>
>>>> Just an idea.
>>>>
>>>
>>> I gave this a try and the code actually grows:
>>>
>>> add/remove: 8/0 grow/shrink: 2/0 up/down: 120/0 (120)
>>> Function old new delta
>>> intel_engines 224 256 +32
>>> vcs0_mmio_bases - 16 +16
>>> vecs1_mmio_bases - 12 +12
>>> vecs0_mmio_bases - 12 +12
>>> vcs1_mmio_bases - 12 +12
>>> vcs3_mmio_bases - 8 +8
>>> vcs2_mmio_bases - 8 +8
>>> rcs_mmio_bases - 8 +8
>>> intel_engines_init_mmio 1264 1272 +8
>>> bcs_mmio_bases - 4 +4
>>> Total: Before=1479719, After=1479839, chg +0.01%
>>>
>>> I have no idea what the compiler is doing to grow intel_engines,
>>> since by replacing and array of 3 u32s with a pointer I would expect
>>> a shrink of 4 bytes per-engine. Anyway, even without the compiler
>>> weirdness with
>>
>> It probably has to align the pointer to 8 bytes so that creates a
>> hole. Moving mmio_base pointer higher up, either to the top or after
>> two unsigned ints should work. Or packing the struct.
>>
>>> this method we would grow the code size of at least 4 bytes per
>>> engine because we replace an array of 3 u32 (12B) with a pointer (8B)
>>> and an array of at 2 or more u32 (8B+). I guess we can reconsider
>>> if/when one engine reaches more than 3 mmio bases.
>>
>> Yeah it's fine. I was thinking that since you are almost there it
>> makes sense to future proof it more, since no one will probably
>> remember it later. But OK.
>
> One idea to compact more, in addition to avoiding alignment holes, could
> be to store the engine_mmio_base directly in the engine_mmio_base
> pointer when it is a single entry, othewrise use a pointer to null
> terminated array. Actually we could store two without table indirection
> to be most optimal on 64-bit. Something like:
>
> struct engine_mmio_base {
> u32 base : 24; /* Must be LSB, */
> u32 gen : 8;
> };
>
> union engine_mmio {
> struct engine_mmio_base mmio_base[2];
> struct engine_mmio_base *mmio_base_list;
> };
>
> #define MMIO_PTR_LIST BIT(0)
>
> {
> ... render engine ...
> .mmio = { (struct engine_mmio_base){.base = ..., ..gen = ...} },
> ...
> },
> {
> ...
> .mmio = { .mmio_base_list = &vcs0_mmio_bases | MMIO_PTR_LIST }
This is giving a compiler error (even with the appropriate casting) for
value not constant. I've solved by doing:
union engine_mmio {
const struct engine_mmio_base bases[MAX_BUILTIN_MMIO_BASES];
const struct engine_mmio_base *bases_list;
union {
const u64 is_list : 1;
const u64 : 63;
};
};
Code size is almost unchanged but still looks like a good compromise.
Will update the selftest and send patches soon.
Daniele
> ...
> },
>
> This could be the best of both worlds, with up to two bases built-in,
> and engines with more point to an array.
>
>
> Regards,
>
> Tvrtko
>
>
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC] drm/i915: store all mmio bases in intel_engines
2018-03-12 17:48 ` Daniele Ceraolo Spurio
@ 2018-03-12 21:05 ` Daniele Ceraolo Spurio
0 siblings, 0 replies; 12+ messages in thread
From: Daniele Ceraolo Spurio @ 2018-03-12 21:05 UTC (permalink / raw)
To: Tvrtko Ursulin, intel-gfx
On 12/03/18 10:48, Daniele Ceraolo Spurio wrote:
>
>
> On 12/03/18 04:04, Tvrtko Ursulin wrote:
>>
>> On 09/03/2018 19:44, Tvrtko Ursulin wrote:
>>>
>>> On 09/03/2018 18:47, Daniele Ceraolo Spurio wrote:
>>>> On 09/03/18 01:53, Tvrtko Ursulin wrote:
>>>>>
>>>>> On 08/03/2018 18:46, Daniele Ceraolo Spurio wrote:
>>>>>> On 08/03/18 01:31, Tvrtko Ursulin wrote:
>>>>>>>
>>>>>>> On 07/03/2018 19:45, Daniele Ceraolo Spurio wrote:
>>>>>>>> The mmio bases we're currently storing in the intel_engines
>>>>>>>> array are
>>>>>>>> only valid for a subset of gens, so we need to ignore them and use
>>>>>>>> different values in some cases. Instead of doing that, we can
>>>>>>>> have a
>>>>>>>> table of [starting gen, mmio base] pairs for each engine in
>>>>>>>> intel_engines and select the correct one based on the gen we're
>>>>>>>> running
>>>>>>>> on in a consistent way.
>>>>>>>>
>>>>>>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>>> Signed-off-by: Daniele Ceraolo Spurio
>>>>>>>> <daniele.ceraolospurio@intel.com>
>>>>>>>> ---
>>>>>>>> drivers/gpu/drm/i915/intel_engine_cs.c | 77
>>>>>>>> +++++++++++++++++++++------------
>>>>>>>> drivers/gpu/drm/i915/intel_ringbuffer.c | 1 -
>>>>>>>> 2 files changed, 49 insertions(+), 29 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>>> b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>>> index 4ba139c27fba..1dd92cac8d67 100644
>>>>>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>>>>>> @@ -81,12 +81,16 @@ static const struct engine_class_info
>>>>>>>> intel_engine_classes[] = {
>>>>>>>> },
>>>>>>>> };
>>>>>>>> +#define MAX_MMIO_BASES 3
>>>>>>>> struct engine_info {
>>>>>>>> unsigned int hw_id;
>>>>>>>> unsigned int uabi_id;
>>>>>>>> u8 class;
>>>>>>>> u8 instance;
>>>>>>>> - u32 mmio_base;
>>>>>>>> + struct engine_mmio_base {
>>>>>>>> + u32 gen : 8;
>>>>>>>> + u32 base : 24;
>>>>>>>> + } mmio_bases[MAX_MMIO_BASES];
>>>>>>>> unsigned irq_shift;
>>>>>>>> };
>>>>>>>
>>>>>>> It's not bad, but I am just wondering if it is too complicated
>>>>>>> versus simply duplicating the tables.
>>>>>>>
>>>>>>> Duplicated tables would certainly be much less code and
>>>>>>> complexity, but what about the size of pure data?
>>>>>>>
>>>>>>> In this patch sizeof(struct engine_info) grows by MAX_MMIO_BASES
>>>>>>> * sizeof(u32), so 12, to the total of 30 if I am not mistaken.
>>>>>>> Times NUM_ENGINES equals 240 bytes for intel_engines[] array with
>>>>>>> this scheme.
>>>>>>>
>>>>>>
>>>>>> we remove a u32 (the old mmio base) so we only grow 8 per engine,
>>>>>> but the compiler rounds up to a multiple of u32 so 28 per engine,
>>>>>> for a total of 224.
>>>>>>
>>>>>>> Separate tables per gens would be:
>>>>>>>
>>>>>>> sizeof(struct engine_info) is 18 bytes.
>>>>>>>
>>>>>>> For < gen6 we'd need 3 engines * 18 = 54
>>>>>>> < gen11 = 5 engines * 18 = 90
>>>>>>> >= gen11 = 8 engines * 18 = 144
>>>>>>>
>>>>>>> 54 + 90 + 144 = 288 bytes
>>>>>>>
>>>>>>> So a little bit bigger. Hm. Plus we would need to refactor so
>>>>>>> intel_engines[] is not indexed by engine->id but is contiguous
>>>>>>> array with engine->id in the elements. Code to lookup the compact
>>>>>>> array should be simpler than combined new code from this patch,
>>>>>>> especially if you add the test as Chris suggested. So separate
>>>>>>> engine info arrays would win I think overall - when considering
>>>>>>> size of text + size of data + size of source code.
>>>>>>>
>>>>>>
>>>>>> Not sure. I would exclude the selftest, which is usually not
>>>>>> compiled for released kernels, which leads to:
>>>>>
>>>>> Yes, but we cannot exclude its source since selftests for separate
>>>>> tables wouldn't be needed.
>>>>>
>>>>>> add/remove: 0/0 grow/shrink: 3/1 up/down: 100/-7 (93)
>>>>>> Function old new delta
>>>>>> intel_engines 160 224 +64
>>>>>> __func__ 13891 13910 +19
>>>>>> intel_engines_init_mmio 1247 1264 +17
>>>>>> intel_init_bsd_ring_buffer 142 135 -7
>>>>>> Total: Before=1479626, After=1479719, chg +0.01%
>>>>>>
>>>>>> Total growth is 93, which is less then your estimation for the
>>>>>> growth introduced by replicating the table.
>>>>>>
>>>>>>> But on the other hand your solution might be more future proof.
>>>>>>> So I don't know. Use the crystal ball to decide? :)
>>>>>>>
>>>>>>
>>>>>> I think we should expect that the total engine count could grow
>>>>>> with future gens. In this case to me adding a new entry to a
>>>>>> unified table seems much cleaner (and uses less data) than adding
>>>>>> a completely new table each time.
>>>>>
>>>>> Okay I was off in my estimates. But in general I was coming from
>>>>> the angle of thinking that every new mmio base difference in this
>>>>> scheme grows the size for all engines. So if just VCS grows one new
>>>>> base, impact is multiplied by NUM_ENGINES. But maybe separate
>>>>> tables are also not a solution. Perhaps pulling out mmio_base
>>>>> arrays to outside struct engine_info in another set of static
>>>>> tables, so they could be null terminated would be best of both worlds?
>>>>>
>>>>> struct engine_mmio_base {
>>>>> u32 gen : 8;
>>>>> u32 base : 24;
>>>>> };
>>>>>
>>>>> static const struct engine_mmio_base vcs0_mmio_bases[] = {
>>>>> { .gen = 11, .base = GEN11_BSD_RING_BASE },
>>>>> { .gen = 6, .base = GEN6_BSD_RING_BASE },
>>>>> { .gen = 4, .base = BSD_RING_BASE },
>>>>> { },
>>>>> };
>>>>>
>>>>> And then in intel_engines array, for BSD:
>>>>>
>>>>> {
>>>>> ...
>>>>> .mmio_bases = &vcs0_mmio_bases;
>>>>> ...
>>>>> },
>>>>>
>>>>> This way we limit data growth only to engines which change their
>>>>> mmio bases frequently.
>>>>>
>>>>> Just an idea.
>>>>>
>>>>
>>>> I gave this a try and the code actually grows:
>>>>
>>>> add/remove: 8/0 grow/shrink: 2/0 up/down: 120/0 (120)
>>>> Function old new delta
>>>> intel_engines 224 256 +32
>>>> vcs0_mmio_bases - 16 +16
>>>> vecs1_mmio_bases - 12 +12
>>>> vecs0_mmio_bases - 12 +12
>>>> vcs1_mmio_bases - 12 +12
>>>> vcs3_mmio_bases - 8 +8
>>>> vcs2_mmio_bases - 8 +8
>>>> rcs_mmio_bases - 8 +8
>>>> intel_engines_init_mmio 1264 1272 +8
>>>> bcs_mmio_bases - 4 +4
>>>> Total: Before=1479719, After=1479839, chg +0.01%
>>>>
>>>> I have no idea what the compiler is doing to grow intel_engines,
>>>> since by replacing and array of 3 u32s with a pointer I would expect
>>>> a shrink of 4 bytes per-engine. Anyway, even without the compiler
>>>> weirdness with
>>>
>>> It probably has to align the pointer to 8 bytes so that creates a
>>> hole. Moving mmio_base pointer higher up, either to the top or after
>>> two unsigned ints should work. Or packing the struct.
>>>
>>>> this method we would grow the code size of at least 4 bytes per
>>>> engine because we replace an array of 3 u32 (12B) with a pointer
>>>> (8B) and an array of at 2 or more u32 (8B+). I guess we can
>>>> reconsider if/when one engine reaches more than 3 mmio bases.
>>>
>>> Yeah it's fine. I was thinking that since you are almost there it
>>> makes sense to future proof it more, since no one will probably
>>> remember it later. But OK.
>>
>> One idea to compact more, in addition to avoiding alignment holes,
>> could be to store the engine_mmio_base directly in the
>> engine_mmio_base pointer when it is a single entry, othewrise use a
>> pointer to null terminated array. Actually we could store two without
>> table indirection to be most optimal on 64-bit. Something like:
>>
>> struct engine_mmio_base {
>> u32 base : 24; /* Must be LSB, */
>> u32 gen : 8;
>> };
>>
>> union engine_mmio {
>> struct engine_mmio_base mmio_base[2];
>> struct engine_mmio_base *mmio_base_list;
>> };
>>
>> #define MMIO_PTR_LIST BIT(0)
>>
>> {
>> ... render engine ...
>> .mmio = { (struct engine_mmio_base){.base = ..., ..gen = ...} },
>> ...
>> },
>> {
>> ...
>> .mmio = { .mmio_base_list = &vcs0_mmio_bases | MMIO_PTR_LIST }
>
> This is giving a compiler error (even with the appropriate casting) for
> value not constant. I've solved by doing:
>
> union engine_mmio {
> const struct engine_mmio_base bases[MAX_BUILTIN_MMIO_BASES];
> const struct engine_mmio_base *bases_list;
> union {
> const u64 is_list : 1;
> const u64 : 63;
> };
> };
>
I retract this statement. The trick with the union compiles fine but
only one of the 2 assignments (between bases_list and is_list) actually
ends up in the structure.
I've experimented with other possible solutions with shifts and/or ORs,
but they all lead to a compiler error for value not constant. At this
point I'll stick with the original implementation as I want to avoid
over-engineering this.
Daniele
> Code size is almost unchanged but still looks like a good compromise.
> Will update the selftest and send patches soon.
>
> Daniele
>
>> ...
>> },
>>
>> This could be the best of both worlds, with up to two bases built-in,
>> and engines with more point to an array.
>>
>>
>> Regards,
>>
>> Tvrtko
>>
>>
>>
>>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2018-03-12 21:05 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-07 19:45 [RFC] drm/i915: store all mmio bases in intel_engines Daniele Ceraolo Spurio
2018-03-07 19:56 ` Chris Wilson
2018-03-07 20:36 ` ✓ Fi.CI.BAT: success for " Patchwork
2018-03-07 21:38 ` ✓ Fi.CI.IGT: " Patchwork
2018-03-08 9:31 ` [RFC] " Tvrtko Ursulin
2018-03-08 18:46 ` Daniele Ceraolo Spurio
2018-03-09 9:53 ` Tvrtko Ursulin
2018-03-09 18:47 ` Daniele Ceraolo Spurio
2018-03-09 19:44 ` Tvrtko Ursulin
2018-03-12 11:04 ` Tvrtko Ursulin
2018-03-12 17:48 ` Daniele Ceraolo Spurio
2018-03-12 21:05 ` Daniele Ceraolo Spurio
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.