All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs
@ 2019-05-24  7:25 Chris Wilson
  2019-05-24  7:45 ` [igt-dev] " Ser, Simon
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Chris Wilson @ 2019-05-24  7:25 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
extension that clang refuses to support as handling memory layout is too
difficult for it.

Move the on-stack VLA to the heap.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++---------------
 1 file changed, 95 insertions(+), 51 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index e2ffb93a9..0a0032bff 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
 	return slice_mask;
 }
 
+static size_t sizeof_load_balance(int count)
+{
+	struct i915_context_engines_load_balance *ptr;
+
+	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
+	return sizeof(*ptr) + sizeof(ptr->engines[count]);
+}
+
+static struct i915_context_engines_load_balance *
+alloc_load_balance(int count)
+{
+	return calloc(1, sizeof_load_balance(count));
+}
+
+static size_t sizeof_param_engines(int count)
+{
+	struct i915_context_param_engines *ptr;
+
+	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
+	return sizeof(*ptr) + sizeof(ptr->engines[count]);
+}
+
+static struct i915_context_param_engines *
+alloc_param_engines(int count)
+{
+	return calloc(1, sizeof_param_engines(count));
+}
+
+static size_t sizeof_engines_bond(int count)
+{
+	struct i915_context_engines_bond *ptr;
+
+	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
+	return sizeof(*ptr) + sizeof(ptr->engines[count]);
+}
+
+static struct i915_context_engines_bond *
+alloc_engines_bond(int count)
+{
+	return calloc(1, sizeof_engines_bond(count));
+}
+
 static int
 prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 {
@@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 		}
 
 		if (ctx->engine_map) {
-			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
-							  ctx->engine_map_count + 1);
-			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
-								 ctx->engine_map_count);
+			struct i915_context_param_engines *set_engines =
+				alloc_param_engines(ctx->engine_map_count + 1);
+			struct i915_context_engines_load_balance *load_balance =
+				alloc_load_balance(ctx->engine_map_count);
 			struct drm_i915_gem_context_param param = {
 				.ctx_id = ctx_id,
 				.param = I915_CONTEXT_PARAM_ENGINES,
-				.size = sizeof(set_engines),
-				.value = to_user_pointer(&set_engines),
+				.size = sizeof_param_engines(ctx->engine_map_count + 1),
+				.value = to_user_pointer(set_engines),
 			};
+			struct i915_context_engines_bond *last = NULL;
 
 			if (ctx->wants_balance) {
-				set_engines.extensions =
-					to_user_pointer(&load_balance);
+				set_engines->extensions =
+					to_user_pointer(load_balance);
 
-				memset(&load_balance, 0, sizeof(load_balance));
-				load_balance.base.name =
+				load_balance->base.name =
 					I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
-				load_balance.num_siblings =
+				load_balance->num_siblings =
 					ctx->engine_map_count;
 
 				for (j = 0; j < ctx->engine_map_count; j++)
-					load_balance.engines[j] =
+					load_balance->engines[j] =
 						get_engine(ctx->engine_map[j]);
-			} else {
-				set_engines.extensions = 0;
 			}
 
 			/* Reserve slot for virtual engine. */
-			set_engines.engines[0].engine_class =
+			set_engines->engines[0].engine_class =
 				I915_ENGINE_CLASS_INVALID;
-			set_engines.engines[0].engine_instance =
+			set_engines->engines[0].engine_instance =
 				I915_ENGINE_CLASS_INVALID_NONE;
 
 			for (j = 1; j <= ctx->engine_map_count; j++)
-				set_engines.engines[j] =
+				set_engines->engines[j] =
 					get_engine(ctx->engine_map[j - 1]);
 
+			last = NULL;
 			for (j = 0; j < ctx->bond_count; j++) {
 				unsigned long mask = ctx->bonds[j].mask;
-				I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
-								 __builtin_popcount(mask));
-				struct i915_context_engines_bond *p = NULL, *prev;
+				struct i915_context_engines_bond *bond =
+					alloc_engines_bond(__builtin_popcount(mask));
 				unsigned int b, e;
 
-				prev = p;
-				p = alloca(sizeof(bond));
-				assert(p);
-				memset(p, 0, sizeof(bond));
-
-				if (j == 0)
-					load_balance.base.next_extension =
-						to_user_pointer(p);
-				else if (j < (ctx->bond_count - 1))
-					prev->base.next_extension =
-						to_user_pointer(p);
+				bond->base.next_extension = to_user_pointer(last);
+				bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
 
-				p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
-				p->virtual_index = 0;
-				p->master = get_engine(ctx->bonds[j].master);
+				bond->virtual_index = 0;
+				bond->master = get_engine(ctx->bonds[j].master);
 
 				for (b = 0, e = 0; mask; e++, mask >>= 1) {
 					unsigned int idx;
@@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 					if (!(mask & 1))
 						continue;
 
-					idx = find_engine(&set_engines.engines[1],
+					idx = find_engine(&set_engines->engines[1],
 							  ctx->engine_map_count,
 							  e);
-					p->engines[b++] =
-						set_engines.engines[1 + idx];
+					bond->engines[b++] =
+						set_engines->engines[1 + idx];
 				}
+
+				last = bond;
 			}
+			load_balance->base.next_extension = to_user_pointer(last);
 
 			gem_context_set_param(fd, &param);
+
+			while (last) {
+				struct i915_context_engines_bond *next =
+					from_user_pointer(last->base.next_extension);
+				free(last);
+				last = next;
+			}
+			free(load_balance);
+			free(set_engines);
 		} else if (ctx->wants_balance) {
 			const unsigned int count = num_engines_in_class(VCS);
-			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
-								 count);
-			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
-							  count + 1);
+			struct i915_context_engines_load_balance *load_balance =
+				alloc_load_balance(count);
+			struct i915_context_param_engines *set_engines =
+				alloc_param_engines(count + 1);
 			struct drm_i915_gem_context_param param = {
 				.ctx_id = ctx_id,
 				.param = I915_CONTEXT_PARAM_ENGINES,
-				.size = sizeof(set_engines),
-				.value = to_user_pointer(&set_engines),
+				.size = sizeof_param_engines(count + 1),
+				.value = to_user_pointer(set_engines),
 			};
 
-			set_engines.extensions = to_user_pointer(&load_balance);
+			set_engines->extensions = to_user_pointer(load_balance);
 
-			set_engines.engines[0].engine_class =
+			set_engines->engines[0].engine_class =
 				I915_ENGINE_CLASS_INVALID;
-			set_engines.engines[0].engine_instance =
+			set_engines->engines[0].engine_instance =
 				I915_ENGINE_CLASS_INVALID_NONE;
-			fill_engines_class(&set_engines.engines[1], VCS);
+			fill_engines_class(&set_engines->engines[1], VCS);
 
-			memset(&load_balance, 0, sizeof(load_balance));
-			load_balance.base.name =
+			load_balance->base.name =
 				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
-			load_balance.num_siblings = count;
+			load_balance->num_siblings = count;
 
-			fill_engines_class(&load_balance.engines[0], VCS);
+			fill_engines_class(&load_balance->engines[0], VCS);
 
 			gem_context_set_param(fd, &param);
+
+			free(set_engines);
+			free(load_balance);
 		}
 
 		if (wrk->sseu) {
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs
  2019-05-24  7:25 [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs Chris Wilson
@ 2019-05-24  7:45 ` Ser, Simon
  2019-05-24  8:20 ` Tvrtko Ursulin
  2019-05-24  8:45 ` [PATCH i-g-t v2] benchmarks/gem_wsim: Manually calculate VLA struct sizes Chris Wilson
  2 siblings, 0 replies; 9+ messages in thread
From: Ser, Simon @ 2019-05-24  7:45 UTC (permalink / raw)
  To: intel-gfx, chris; +Cc: igt-dev

On Fri, 2019-05-24 at 08:25 +0100, Chris Wilson wrote:
> Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
> extension that clang refuses to support as handling memory layout is too
> difficult for it.
> 
> Move the on-stack VLA to the heap.

IMHO using an upper bound would be much simpler.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++---------------
>  1 file changed, 95 insertions(+), 51 deletions(-)
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index e2ffb93a9..0a0032bff 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
>  	return slice_mask;
>  }
>  
> +static size_t sizeof_load_balance(int count)
> +{
> +	struct i915_context_engines_load_balance *ptr;
> +
> +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> +}
> +
> +static struct i915_context_engines_load_balance *
> +alloc_load_balance(int count)
> +{
> +	return calloc(1, sizeof_load_balance(count));
> +}
> +
> +static size_t sizeof_param_engines(int count)
> +{
> +	struct i915_context_param_engines *ptr;
> +
> +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> +}
> +
> +static struct i915_context_param_engines *
> +alloc_param_engines(int count)
> +{
> +	return calloc(1, sizeof_param_engines(count));
> +}
> +
> +static size_t sizeof_engines_bond(int count)
> +{
> +	struct i915_context_engines_bond *ptr;
> +
> +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> +}
> +
> +static struct i915_context_engines_bond *
> +alloc_engines_bond(int count)
> +{
> +	return calloc(1, sizeof_engines_bond(count));
> +}
> +
>  static int
>  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>  {
> @@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>  		}
>  
>  		if (ctx->engine_map) {
> -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> -							  ctx->engine_map_count + 1);
> -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> -								 ctx->engine_map_count);
> +			struct i915_context_param_engines *set_engines =
> +				alloc_param_engines(ctx->engine_map_count + 1);
> +			struct i915_context_engines_load_balance *load_balance =
> +				alloc_load_balance(ctx->engine_map_count);
>  			struct drm_i915_gem_context_param param = {
>  				.ctx_id = ctx_id,
>  				.param = I915_CONTEXT_PARAM_ENGINES,
> -				.size = sizeof(set_engines),
> -				.value = to_user_pointer(&set_engines),
> +				.size = sizeof_param_engines(ctx->engine_map_count + 1),
> +				.value = to_user_pointer(set_engines),
>  			};
> +			struct i915_context_engines_bond *last = NULL;
>  
>  			if (ctx->wants_balance) {
> -				set_engines.extensions =
> -					to_user_pointer(&load_balance);
> +				set_engines->extensions =
> +					to_user_pointer(load_balance);
>  
> -				memset(&load_balance, 0, sizeof(load_balance));
> -				load_balance.base.name =
> +				load_balance->base.name =
>  					I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> -				load_balance.num_siblings =
> +				load_balance->num_siblings =
>  					ctx->engine_map_count;
>  
>  				for (j = 0; j < ctx->engine_map_count; j++)
> -					load_balance.engines[j] =
> +					load_balance->engines[j] =
>  						get_engine(ctx->engine_map[j]);
> -			} else {
> -				set_engines.extensions = 0;
>  			}
>  
>  			/* Reserve slot for virtual engine. */
> -			set_engines.engines[0].engine_class =
> +			set_engines->engines[0].engine_class =
>  				I915_ENGINE_CLASS_INVALID;
> -			set_engines.engines[0].engine_instance =
> +			set_engines->engines[0].engine_instance =
>  				I915_ENGINE_CLASS_INVALID_NONE;
>  
>  			for (j = 1; j <= ctx->engine_map_count; j++)
> -				set_engines.engines[j] =
> +				set_engines->engines[j] =
>  					get_engine(ctx->engine_map[j - 1]);
>  
> +			last = NULL;
>  			for (j = 0; j < ctx->bond_count; j++) {
>  				unsigned long mask = ctx->bonds[j].mask;
> -				I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
> -								 __builtin_popcount(mask));
> -				struct i915_context_engines_bond *p = NULL, *prev;
> +				struct i915_context_engines_bond *bond =
> +					alloc_engines_bond(__builtin_popcount(mask));
>  				unsigned int b, e;
>  
> -				prev = p;
> -				p = alloca(sizeof(bond));
> -				assert(p);
> -				memset(p, 0, sizeof(bond));
> -
> -				if (j == 0)
> -					load_balance.base.next_extension =
> -						to_user_pointer(p);
> -				else if (j < (ctx->bond_count - 1))
> -					prev->base.next_extension =
> -						to_user_pointer(p);
> +				bond->base.next_extension = to_user_pointer(last);
> +				bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
>  
> -				p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> -				p->virtual_index = 0;
> -				p->master = get_engine(ctx->bonds[j].master);
> +				bond->virtual_index = 0;
> +				bond->master = get_engine(ctx->bonds[j].master);
>  
>  				for (b = 0, e = 0; mask; e++, mask >>= 1) {
>  					unsigned int idx;
> @@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>  					if (!(mask & 1))
>  						continue;
>  
> -					idx = find_engine(&set_engines.engines[1],
> +					idx = find_engine(&set_engines->engines[1],
>  							  ctx->engine_map_count,
>  							  e);
> -					p->engines[b++] =
> -						set_engines.engines[1 + idx];
> +					bond->engines[b++] =
> +						set_engines->engines[1 + idx];
>  				}
> +
> +				last = bond;
>  			}
> +			load_balance->base.next_extension = to_user_pointer(last);
>  
>  			gem_context_set_param(fd, &param);
> +
> +			while (last) {
> +				struct i915_context_engines_bond *next =
> +					from_user_pointer(last->base.next_extension);
> +				free(last);
> +				last = next;
> +			}
> +			free(load_balance);
> +			free(set_engines);
>  		} else if (ctx->wants_balance) {
>  			const unsigned int count = num_engines_in_class(VCS);
> -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> -								 count);
> -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> -							  count + 1);
> +			struct i915_context_engines_load_balance *load_balance =
> +				alloc_load_balance(count);
> +			struct i915_context_param_engines *set_engines =
> +				alloc_param_engines(count + 1);
>  			struct drm_i915_gem_context_param param = {
>  				.ctx_id = ctx_id,
>  				.param = I915_CONTEXT_PARAM_ENGINES,
> -				.size = sizeof(set_engines),
> -				.value = to_user_pointer(&set_engines),
> +				.size = sizeof_param_engines(count + 1),
> +				.value = to_user_pointer(set_engines),
>  			};
>  
> -			set_engines.extensions = to_user_pointer(&load_balance);
> +			set_engines->extensions = to_user_pointer(load_balance);
>  
> -			set_engines.engines[0].engine_class =
> +			set_engines->engines[0].engine_class =
>  				I915_ENGINE_CLASS_INVALID;
> -			set_engines.engines[0].engine_instance =
> +			set_engines->engines[0].engine_instance =
>  				I915_ENGINE_CLASS_INVALID_NONE;
> -			fill_engines_class(&set_engines.engines[1], VCS);
> +			fill_engines_class(&set_engines->engines[1], VCS);
>  
> -			memset(&load_balance, 0, sizeof(load_balance));
> -			load_balance.base.name =
> +			load_balance->base.name =
>  				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> -			load_balance.num_siblings = count;
> +			load_balance->num_siblings = count;
>  
> -			fill_engines_class(&load_balance.engines[0], VCS);
> +			fill_engines_class(&load_balance->engines[0], VCS);
>  
>  			gem_context_set_param(fd, &param);
> +
> +			free(set_engines);
> +			free(load_balance);
>  		}
>  
>  		if (wrk->sseu) {
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs
  2019-05-24  7:25 [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs Chris Wilson
  2019-05-24  7:45 ` [igt-dev] " Ser, Simon
@ 2019-05-24  8:20 ` Tvrtko Ursulin
  2019-05-24  8:27   ` [igt-dev] " Ser, Simon
  2019-05-24  8:33   ` Chris Wilson
  2019-05-24  8:45 ` [PATCH i-g-t v2] benchmarks/gem_wsim: Manually calculate VLA struct sizes Chris Wilson
  2 siblings, 2 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2019-05-24  8:20 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 24/05/2019 08:25, Chris Wilson wrote:
> Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
> extension that clang refuses to support as handling memory layout is too
> difficult for it.
> 
> Move the on-stack VLA to the heap.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++---------------
>   1 file changed, 95 insertions(+), 51 deletions(-)
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index e2ffb93a9..0a0032bff 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
>   	return slice_mask;
>   }
>   
> +static size_t sizeof_load_balance(int count)
> +{
> +	struct i915_context_engines_load_balance *ptr;
> +
> +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));

This seems wrong - is bound to trigger.

> +	return sizeof(*ptr) + sizeof(ptr->engines[count]);

So size of of engine needs to be multiplied by count.

> +}
> +
> +static struct i915_context_engines_load_balance *
> +alloc_load_balance(int count)
> +{
> +	return calloc(1, sizeof_load_balance(count));

How about alloca so cleanup is simpler? Or is alloca also on the 
unpopular list?

Or possibly what Simon suggested, just a large temporary stack arrays 
would be enough and easiest diff. Just with an assert that it fits.

I can do that if you want?

Regards,

Tvrtko

> +}
> +
> +static size_t sizeof_param_engines(int count)
> +{
> +	struct i915_context_param_engines *ptr;
> +
> +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> +}
> +
> +static struct i915_context_param_engines *
> +alloc_param_engines(int count)
> +{
> +	return calloc(1, sizeof_param_engines(count));
> +}
> +
> +static size_t sizeof_engines_bond(int count)
> +{
> +	struct i915_context_engines_bond *ptr;
> +
> +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> +}
> +
> +static struct i915_context_engines_bond *
> +alloc_engines_bond(int count)
> +{
> +	return calloc(1, sizeof_engines_bond(count));
> +}
> +
>   static int
>   prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>   {
> @@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>   		}
>   
>   		if (ctx->engine_map) {
> -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> -							  ctx->engine_map_count + 1);
> -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> -								 ctx->engine_map_count);
> +			struct i915_context_param_engines *set_engines =
> +				alloc_param_engines(ctx->engine_map_count + 1);
> +			struct i915_context_engines_load_balance *load_balance =
> +				alloc_load_balance(ctx->engine_map_count);
>   			struct drm_i915_gem_context_param param = {
>   				.ctx_id = ctx_id,
>   				.param = I915_CONTEXT_PARAM_ENGINES,
> -				.size = sizeof(set_engines),
> -				.value = to_user_pointer(&set_engines),
> +				.size = sizeof_param_engines(ctx->engine_map_count + 1),
> +				.value = to_user_pointer(set_engines),
>   			};
> +			struct i915_context_engines_bond *last = NULL;
>   
>   			if (ctx->wants_balance) {
> -				set_engines.extensions =
> -					to_user_pointer(&load_balance);
> +				set_engines->extensions =
> +					to_user_pointer(load_balance);
>   
> -				memset(&load_balance, 0, sizeof(load_balance));
> -				load_balance.base.name =
> +				load_balance->base.name =
>   					I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> -				load_balance.num_siblings =
> +				load_balance->num_siblings =
>   					ctx->engine_map_count;
>   
>   				for (j = 0; j < ctx->engine_map_count; j++)
> -					load_balance.engines[j] =
> +					load_balance->engines[j] =
>   						get_engine(ctx->engine_map[j]);
> -			} else {
> -				set_engines.extensions = 0;
>   			}
>   
>   			/* Reserve slot for virtual engine. */
> -			set_engines.engines[0].engine_class =
> +			set_engines->engines[0].engine_class =
>   				I915_ENGINE_CLASS_INVALID;
> -			set_engines.engines[0].engine_instance =
> +			set_engines->engines[0].engine_instance =
>   				I915_ENGINE_CLASS_INVALID_NONE;
>   
>   			for (j = 1; j <= ctx->engine_map_count; j++)
> -				set_engines.engines[j] =
> +				set_engines->engines[j] =
>   					get_engine(ctx->engine_map[j - 1]);
>   
> +			last = NULL;
>   			for (j = 0; j < ctx->bond_count; j++) {
>   				unsigned long mask = ctx->bonds[j].mask;
> -				I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
> -								 __builtin_popcount(mask));
> -				struct i915_context_engines_bond *p = NULL, *prev;
> +				struct i915_context_engines_bond *bond =
> +					alloc_engines_bond(__builtin_popcount(mask));
>   				unsigned int b, e;
>   
> -				prev = p;
> -				p = alloca(sizeof(bond));
> -				assert(p);
> -				memset(p, 0, sizeof(bond));
> -
> -				if (j == 0)
> -					load_balance.base.next_extension =
> -						to_user_pointer(p);
> -				else if (j < (ctx->bond_count - 1))
> -					prev->base.next_extension =
> -						to_user_pointer(p);
> +				bond->base.next_extension = to_user_pointer(last);
> +				bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
>   
> -				p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> -				p->virtual_index = 0;
> -				p->master = get_engine(ctx->bonds[j].master);
> +				bond->virtual_index = 0;
> +				bond->master = get_engine(ctx->bonds[j].master);
>   
>   				for (b = 0, e = 0; mask; e++, mask >>= 1) {
>   					unsigned int idx;
> @@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>   					if (!(mask & 1))
>   						continue;
>   
> -					idx = find_engine(&set_engines.engines[1],
> +					idx = find_engine(&set_engines->engines[1],
>   							  ctx->engine_map_count,
>   							  e);
> -					p->engines[b++] =
> -						set_engines.engines[1 + idx];
> +					bond->engines[b++] =
> +						set_engines->engines[1 + idx];
>   				}
> +
> +				last = bond;
>   			}
> +			load_balance->base.next_extension = to_user_pointer(last);
>   
>   			gem_context_set_param(fd, &param);
> +
> +			while (last) {
> +				struct i915_context_engines_bond *next =
> +					from_user_pointer(last->base.next_extension);
> +				free(last);
> +				last = next;
> +			}
> +			free(load_balance);
> +			free(set_engines);
>   		} else if (ctx->wants_balance) {
>   			const unsigned int count = num_engines_in_class(VCS);
> -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> -								 count);
> -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> -							  count + 1);
> +			struct i915_context_engines_load_balance *load_balance =
> +				alloc_load_balance(count);
> +			struct i915_context_param_engines *set_engines =
> +				alloc_param_engines(count + 1);
>   			struct drm_i915_gem_context_param param = {
>   				.ctx_id = ctx_id,
>   				.param = I915_CONTEXT_PARAM_ENGINES,
> -				.size = sizeof(set_engines),
> -				.value = to_user_pointer(&set_engines),
> +				.size = sizeof_param_engines(count + 1),
> +				.value = to_user_pointer(set_engines),
>   			};
>   
> -			set_engines.extensions = to_user_pointer(&load_balance);
> +			set_engines->extensions = to_user_pointer(load_balance);
>   
> -			set_engines.engines[0].engine_class =
> +			set_engines->engines[0].engine_class =
>   				I915_ENGINE_CLASS_INVALID;
> -			set_engines.engines[0].engine_instance =
> +			set_engines->engines[0].engine_instance =
>   				I915_ENGINE_CLASS_INVALID_NONE;
> -			fill_engines_class(&set_engines.engines[1], VCS);
> +			fill_engines_class(&set_engines->engines[1], VCS);
>   
> -			memset(&load_balance, 0, sizeof(load_balance));
> -			load_balance.base.name =
> +			load_balance->base.name =
>   				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> -			load_balance.num_siblings = count;
> +			load_balance->num_siblings = count;
>   
> -			fill_engines_class(&load_balance.engines[0], VCS);
> +			fill_engines_class(&load_balance->engines[0], VCS);
>   
>   			gem_context_set_param(fd, &param);
> +
> +			free(set_engines);
> +			free(load_balance);
>   		}
>   
>   		if (wrk->sseu) {
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs
  2019-05-24  8:20 ` Tvrtko Ursulin
@ 2019-05-24  8:27   ` Ser, Simon
  2019-05-24  8:33   ` Chris Wilson
  1 sibling, 0 replies; 9+ messages in thread
From: Ser, Simon @ 2019-05-24  8:27 UTC (permalink / raw)
  To: tvrtko.ursulin, intel-gfx, chris; +Cc: igt-dev

On Fri, 2019-05-24 at 09:20 +0100, Tvrtko Ursulin wrote:
> On 24/05/2019 08:25, Chris Wilson wrote:
> > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
> > extension that clang refuses to support as handling memory layout is too
> > difficult for it.
> > 
> > Move the on-stack VLA to the heap.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++---------------
> >   1 file changed, 95 insertions(+), 51 deletions(-)
> > 
> > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> > index e2ffb93a9..0a0032bff 100644
> > --- a/benchmarks/gem_wsim.c
> > +++ b/benchmarks/gem_wsim.c
> > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
> >   	return slice_mask;
> >   }
> >   
> > +static size_t sizeof_load_balance(int count)
> > +{
> > +	struct i915_context_engines_load_balance *ptr;
> > +
> > +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> 
> This seems wrong - is bound to trigger.
> 
> > +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> 
> So size of of engine needs to be multiplied by count.
> 
> > +}
> > +
> > +static struct i915_context_engines_load_balance *
> > +alloc_load_balance(int count)
> > +{
> > +	return calloc(1, sizeof_load_balance(count));
> 
> How about alloca so cleanup is simpler? Or is alloca also on the 
> unpopular list?
> 
> Or possibly what Simon suggested, just a large temporary stack arrays 
> would be enough and easiest diff. Just with an assert that it fits.
> 
> I can do that if you want?

I think Arek already has a patch for this.

> Regards,
> 
> Tvrtko
> 
> > +}
> > +
> > +static size_t sizeof_param_engines(int count)
> > +{
> > +	struct i915_context_param_engines *ptr;
> > +
> > +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> > +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> > +}
> > +
> > +static struct i915_context_param_engines *
> > +alloc_param_engines(int count)
> > +{
> > +	return calloc(1, sizeof_param_engines(count));
> > +}
> > +
> > +static size_t sizeof_engines_bond(int count)
> > +{
> > +	struct i915_context_engines_bond *ptr;
> > +
> > +	assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> > +	return sizeof(*ptr) + sizeof(ptr->engines[count]);
> > +}
> > +
> > +static struct i915_context_engines_bond *
> > +alloc_engines_bond(int count)
> > +{
> > +	return calloc(1, sizeof_engines_bond(count));
> > +}
> > +
> >   static int
> >   prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
> >   {
> > @@ -1676,66 +1718,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
> >   		}
> >   
> >   		if (ctx->engine_map) {
> > -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> > -							  ctx->engine_map_count + 1);
> > -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> > -								 ctx->engine_map_count);
> > +			struct i915_context_param_engines *set_engines =
> > +				alloc_param_engines(ctx->engine_map_count + 1);
> > +			struct i915_context_engines_load_balance *load_balance =
> > +				alloc_load_balance(ctx->engine_map_count);
> >   			struct drm_i915_gem_context_param param = {
> >   				.ctx_id = ctx_id,
> >   				.param = I915_CONTEXT_PARAM_ENGINES,
> > -				.size = sizeof(set_engines),
> > -				.value = to_user_pointer(&set_engines),
> > +				.size = sizeof_param_engines(ctx->engine_map_count + 1),
> > +				.value = to_user_pointer(set_engines),
> >   			};
> > +			struct i915_context_engines_bond *last = NULL;
> >   
> >   			if (ctx->wants_balance) {
> > -				set_engines.extensions =
> > -					to_user_pointer(&load_balance);
> > +				set_engines->extensions =
> > +					to_user_pointer(load_balance);
> >   
> > -				memset(&load_balance, 0, sizeof(load_balance));
> > -				load_balance.base.name =
> > +				load_balance->base.name =
> >   					I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > -				load_balance.num_siblings =
> > +				load_balance->num_siblings =
> >   					ctx->engine_map_count;
> >   
> >   				for (j = 0; j < ctx->engine_map_count; j++)
> > -					load_balance.engines[j] =
> > +					load_balance->engines[j] =
> >   						get_engine(ctx->engine_map[j]);
> > -			} else {
> > -				set_engines.extensions = 0;
> >   			}
> >   
> >   			/* Reserve slot for virtual engine. */
> > -			set_engines.engines[0].engine_class =
> > +			set_engines->engines[0].engine_class =
> >   				I915_ENGINE_CLASS_INVALID;
> > -			set_engines.engines[0].engine_instance =
> > +			set_engines->engines[0].engine_instance =
> >   				I915_ENGINE_CLASS_INVALID_NONE;
> >   
> >   			for (j = 1; j <= ctx->engine_map_count; j++)
> > -				set_engines.engines[j] =
> > +				set_engines->engines[j] =
> >   					get_engine(ctx->engine_map[j - 1]);
> >   
> > +			last = NULL;
> >   			for (j = 0; j < ctx->bond_count; j++) {
> >   				unsigned long mask = ctx->bonds[j].mask;
> > -				I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
> > -								 __builtin_popcount(mask));
> > -				struct i915_context_engines_bond *p = NULL, *prev;
> > +				struct i915_context_engines_bond *bond =
> > +					alloc_engines_bond(__builtin_popcount(mask));
> >   				unsigned int b, e;
> >   
> > -				prev = p;
> > -				p = alloca(sizeof(bond));
> > -				assert(p);
> > -				memset(p, 0, sizeof(bond));
> > -
> > -				if (j == 0)
> > -					load_balance.base.next_extension =
> > -						to_user_pointer(p);
> > -				else if (j < (ctx->bond_count - 1))
> > -					prev->base.next_extension =
> > -						to_user_pointer(p);
> > +				bond->base.next_extension = to_user_pointer(last);
> > +				bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> >   
> > -				p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> > -				p->virtual_index = 0;
> > -				p->master = get_engine(ctx->bonds[j].master);
> > +				bond->virtual_index = 0;
> > +				bond->master = get_engine(ctx->bonds[j].master);
> >   
> >   				for (b = 0, e = 0; mask; e++, mask >>= 1) {
> >   					unsigned int idx;
> > @@ -1743,44 +1773,58 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
> >   					if (!(mask & 1))
> >   						continue;
> >   
> > -					idx = find_engine(&set_engines.engines[1],
> > +					idx = find_engine(&set_engines->engines[1],
> >   							  ctx->engine_map_count,
> >   							  e);
> > -					p->engines[b++] =
> > -						set_engines.engines[1 + idx];
> > +					bond->engines[b++] =
> > +						set_engines->engines[1 + idx];
> >   				}
> > +
> > +				last = bond;
> >   			}
> > +			load_balance->base.next_extension = to_user_pointer(last);
> >   
> >   			gem_context_set_param(fd, &param);
> > +
> > +			while (last) {
> > +				struct i915_context_engines_bond *next =
> > +					from_user_pointer(last->base.next_extension);
> > +				free(last);
> > +				last = next;
> > +			}
> > +			free(load_balance);
> > +			free(set_engines);
> >   		} else if (ctx->wants_balance) {
> >   			const unsigned int count = num_engines_in_class(VCS);
> > -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> > -								 count);
> > -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> > -							  count + 1);
> > +			struct i915_context_engines_load_balance *load_balance =
> > +				alloc_load_balance(count);
> > +			struct i915_context_param_engines *set_engines =
> > +				alloc_param_engines(count + 1);
> >   			struct drm_i915_gem_context_param param = {
> >   				.ctx_id = ctx_id,
> >   				.param = I915_CONTEXT_PARAM_ENGINES,
> > -				.size = sizeof(set_engines),
> > -				.value = to_user_pointer(&set_engines),
> > +				.size = sizeof_param_engines(count + 1),
> > +				.value = to_user_pointer(set_engines),
> >   			};
> >   
> > -			set_engines.extensions = to_user_pointer(&load_balance);
> > +			set_engines->extensions = to_user_pointer(load_balance);
> >   
> > -			set_engines.engines[0].engine_class =
> > +			set_engines->engines[0].engine_class =
> >   				I915_ENGINE_CLASS_INVALID;
> > -			set_engines.engines[0].engine_instance =
> > +			set_engines->engines[0].engine_instance =
> >   				I915_ENGINE_CLASS_INVALID_NONE;
> > -			fill_engines_class(&set_engines.engines[1], VCS);
> > +			fill_engines_class(&set_engines->engines[1], VCS);
> >   
> > -			memset(&load_balance, 0, sizeof(load_balance));
> > -			load_balance.base.name =
> > +			load_balance->base.name =
> >   				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> > -			load_balance.num_siblings = count;
> > +			load_balance->num_siblings = count;
> >   
> > -			fill_engines_class(&load_balance.engines[0], VCS);
> > +			fill_engines_class(&load_balance->engines[0], VCS);
> >   
> >   			gem_context_set_param(fd, &param);
> > +
> > +			free(set_engines);
> > +			free(load_balance);
> >   		}
> >   
> >   		if (wrk->sseu) {
> > 
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs
  2019-05-24  8:20 ` Tvrtko Ursulin
  2019-05-24  8:27   ` [igt-dev] " Ser, Simon
@ 2019-05-24  8:33   ` Chris Wilson
  2019-05-24  8:39     ` [igt-dev] " Ser, Simon
  2019-05-24  8:44     ` Tvrtko Ursulin
  1 sibling, 2 replies; 9+ messages in thread
From: Chris Wilson @ 2019-05-24  8:33 UTC (permalink / raw)
  To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev

Quoting Tvrtko Ursulin (2019-05-24 09:20:47)
> 
> On 24/05/2019 08:25, Chris Wilson wrote:
> > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
> > extension that clang refuses to support as handling memory layout is too
> > difficult for it.
> > 
> > Move the on-stack VLA to the heap.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++---------------
> >   1 file changed, 95 insertions(+), 51 deletions(-)
> > 
> > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> > index e2ffb93a9..0a0032bff 100644
> > --- a/benchmarks/gem_wsim.c
> > +++ b/benchmarks/gem_wsim.c
> > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
> >       return slice_mask;
> >   }
> >   
> > +static size_t sizeof_load_balance(int count)
> > +{
> > +     struct i915_context_engines_load_balance *ptr;
> > +
> > +     assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> 
> This seems wrong - is bound to trigger.

Why does it seem wrong? That's the calculation used previously, and the
ptr->engines[] was meant to be packed in order for
sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]). Anyway,
I threw it in there to check if the calculation was sane.

> > +     return sizeof(*ptr) + sizeof(ptr->engines[count]);
> 
> So size of of engine needs to be multiplied by count.

(Just note this is the what the current VLA evaluates to :)

> > +}
> > +
> > +static struct i915_context_engines_load_balance *
> > +alloc_load_balance(int count)
> > +{
> > +     return calloc(1, sizeof_load_balance(count));
> 
> How about alloca so cleanup is simpler? Or is alloca also on the 
> unpopular list?

I don't mind. Would shave a few lines indeed, but we need the memsets
back. #define alloca0()?

> Or possibly what Simon suggested, just a large temporary stack arrays 
> would be enough and easiest diff. Just with an assert that it fits.

I don't think that is as clean for the long term.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs
  2019-05-24  8:33   ` Chris Wilson
@ 2019-05-24  8:39     ` Ser, Simon
  2019-05-24  8:44     ` Tvrtko Ursulin
  1 sibling, 0 replies; 9+ messages in thread
From: Ser, Simon @ 2019-05-24  8:39 UTC (permalink / raw)
  To: tvrtko.ursulin, intel-gfx, chris; +Cc: igt-dev

On Fri, 2019-05-24 at 09:33 +0100, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-24 09:20:47)
> > On 24/05/2019 08:25, Chris Wilson wrote:
> > > Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
> > > extension that clang refuses to support as handling memory layout is too
> > > difficult for it.
> > > 
> > > Move the on-stack VLA to the heap.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > > ---
> > >   benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++---------------
> > >   1 file changed, 95 insertions(+), 51 deletions(-)
> > > 
> > > diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> > > index e2ffb93a9..0a0032bff 100644
> > > --- a/benchmarks/gem_wsim.c
> > > +++ b/benchmarks/gem_wsim.c
> > > @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
> > >       return slice_mask;
> > >   }
> > >   
> > > +static size_t sizeof_load_balance(int count)
> > > +{
> > > +     struct i915_context_engines_load_balance *ptr;
> > > +
> > > +     assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
> > 
> > This seems wrong - is bound to trigger.
> 
> Why does it seem wrong? That's the calculation used previously, and the
> ptr->engines[] was meant to be packed in order for
> sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]). Anyway,
> I threw it in there to check if the calculation was sane.
> 
> > > +     return sizeof(*ptr) + sizeof(ptr->engines[count]);
> > 
> > So size of of engine needs to be multiplied by count.
> 
> (Just note this is the what the current VLA evaluates to :)
> 
> > > +}
> > > +
> > > +static struct i915_context_engines_load_balance *
> > > +alloc_load_balance(int count)
> > > +{
> > > +     return calloc(1, sizeof_load_balance(count));
> > 
> > How about alloca so cleanup is simpler? Or is alloca also on the 
> > unpopular list?
> 
> I don't mind. Would shave a few lines indeed, but we need the memsets
> back. #define alloca0()?
> 
> > Or possibly what Simon suggested, just a large temporary stack arrays 
> > would be enough and easiest diff. Just with an assert that it fits.
> 
> I don't think that is as clean for the long term.

I don't understand the motivation here. Can you elaborate?
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs
  2019-05-24  8:33   ` Chris Wilson
  2019-05-24  8:39     ` [igt-dev] " Ser, Simon
@ 2019-05-24  8:44     ` Tvrtko Ursulin
  1 sibling, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2019-05-24  8:44 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 24/05/2019 09:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-24 09:20:47)
>>
>> On 24/05/2019 08:25, Chris Wilson wrote:
>>> Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
>>> extension that clang refuses to support as handling memory layout is too
>>> difficult for it.
>>>
>>> Move the on-stack VLA to the heap.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    benchmarks/gem_wsim.c | 146 +++++++++++++++++++++++++++---------------
>>>    1 file changed, 95 insertions(+), 51 deletions(-)
>>>
>>> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
>>> index e2ffb93a9..0a0032bff 100644
>>> --- a/benchmarks/gem_wsim.c
>>> +++ b/benchmarks/gem_wsim.c
>>> @@ -1441,6 +1441,48 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
>>>        return slice_mask;
>>>    }
>>>    
>>> +static size_t sizeof_load_balance(int count)
>>> +{
>>> +     struct i915_context_engines_load_balance *ptr;
>>> +
>>> +     assert(sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]));
>>
>> This seems wrong - is bound to trigger.
> 
> Why does it seem wrong? That's the calculation used previously, and the
> ptr->engines[] was meant to be packed in order for
> sizeof(ptr->engines[count]) == count * sizeof(ptr->engines[0]). Anyway,
> I threw it in there to check if the calculation was sane.

Because sizeof(ptr->engines[0]) == sizeof(ptr->engines[N]), since the 
code is not declaring N big array, just referencing the element N. So 
for more than one engine I expect it explodes. Unless I am way wrong.. I 
guess someone needs to run it.. :)

>>> +     return sizeof(*ptr) + sizeof(ptr->engines[count]);
>>
>> So size of of engine needs to be multiplied by count.
> 
> (Just note this is the what the current VLA evaluates to :)
> 
>>> +}
>>> +
>>> +static struct i915_context_engines_load_balance *
>>> +alloc_load_balance(int count)
>>> +{
>>> +     return calloc(1, sizeof_load_balance(count));
>>
>> How about alloca so cleanup is simpler? Or is alloca also on the
>> unpopular list?
> 
> I don't mind. Would shave a few lines indeed, but we need the memsets
> back. #define alloca0()?

And a helper macro to generically deal with struct header + engines 
array so it doesn't need to be repeated three times. Yadayada too much 
work.. :) ...

>> Or possibly what Simon suggested, just a large temporary stack arrays
>> would be enough and easiest diff. Just with an assert that it fits.
> 
> I don't think that is as clean for the long term.

... this should be just fine for now so I'd vote for it.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH i-g-t v2] benchmarks/gem_wsim: Manually calculate VLA struct sizes
  2019-05-24  7:25 [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs Chris Wilson
  2019-05-24  7:45 ` [igt-dev] " Ser, Simon
  2019-05-24  8:20 ` Tvrtko Ursulin
@ 2019-05-24  8:45 ` Chris Wilson
  2019-05-24  9:35   ` Tvrtko Ursulin
  2 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2019-05-24  8:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
extension that clang refuses to support as handling memory layout is too
difficult for it. So calculate the size by hand!

v2: Use alloca().

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 benchmarks/gem_wsim.c | 115 +++++++++++++++++++++++-------------------
 1 file changed, 64 insertions(+), 51 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index e2ffb93a9..db19925b1 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -1441,6 +1441,29 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
 	return slice_mask;
 }
 
+static size_t sizeof_load_balance(int count)
+{
+	struct i915_context_engines_load_balance *ptr;
+
+	return sizeof(*ptr) + count * sizeof(ptr->engines[0]);
+}
+
+static size_t sizeof_param_engines(int count)
+{
+	struct i915_context_param_engines *ptr;
+
+	return sizeof(*ptr) + count * sizeof(ptr->engines[0]);
+}
+
+static size_t sizeof_engines_bond(int count)
+{
+	struct i915_context_engines_bond *ptr;
+
+	return sizeof(*ptr) + count * sizeof(ptr->engines[0]);
+}
+
+#define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); })
+
 static int
 prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 {
@@ -1676,66 +1699,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 		}
 
 		if (ctx->engine_map) {
-			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
-							  ctx->engine_map_count + 1);
-			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
-								 ctx->engine_map_count);
+			struct i915_context_param_engines *set_engines =
+				alloca0(sizeof_param_engines(ctx->engine_map_count + 1));
+			struct i915_context_engines_load_balance *load_balance =
+				alloca0(sizeof_load_balance(ctx->engine_map_count));
 			struct drm_i915_gem_context_param param = {
 				.ctx_id = ctx_id,
 				.param = I915_CONTEXT_PARAM_ENGINES,
-				.size = sizeof(set_engines),
-				.value = to_user_pointer(&set_engines),
+				.size = sizeof_param_engines(ctx->engine_map_count + 1),
+				.value = to_user_pointer(set_engines),
 			};
+			struct i915_context_engines_bond *last = NULL;
 
 			if (ctx->wants_balance) {
-				set_engines.extensions =
-					to_user_pointer(&load_balance);
+				set_engines->extensions =
+					to_user_pointer(load_balance);
 
-				memset(&load_balance, 0, sizeof(load_balance));
-				load_balance.base.name =
+				load_balance->base.name =
 					I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
-				load_balance.num_siblings =
+				load_balance->num_siblings =
 					ctx->engine_map_count;
 
 				for (j = 0; j < ctx->engine_map_count; j++)
-					load_balance.engines[j] =
+					load_balance->engines[j] =
 						get_engine(ctx->engine_map[j]);
-			} else {
-				set_engines.extensions = 0;
 			}
 
 			/* Reserve slot for virtual engine. */
-			set_engines.engines[0].engine_class =
+			set_engines->engines[0].engine_class =
 				I915_ENGINE_CLASS_INVALID;
-			set_engines.engines[0].engine_instance =
+			set_engines->engines[0].engine_instance =
 				I915_ENGINE_CLASS_INVALID_NONE;
 
 			for (j = 1; j <= ctx->engine_map_count; j++)
-				set_engines.engines[j] =
+				set_engines->engines[j] =
 					get_engine(ctx->engine_map[j - 1]);
 
+			last = NULL;
 			for (j = 0; j < ctx->bond_count; j++) {
 				unsigned long mask = ctx->bonds[j].mask;
-				I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
-								 __builtin_popcount(mask));
-				struct i915_context_engines_bond *p = NULL, *prev;
+				struct i915_context_engines_bond *bond =
+					alloca0(sizeof_engines_bond(__builtin_popcount(mask)));
 				unsigned int b, e;
 
-				prev = p;
-				p = alloca(sizeof(bond));
-				assert(p);
-				memset(p, 0, sizeof(bond));
+				bond->base.next_extension = to_user_pointer(last);
+				bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
 
-				if (j == 0)
-					load_balance.base.next_extension =
-						to_user_pointer(p);
-				else if (j < (ctx->bond_count - 1))
-					prev->base.next_extension =
-						to_user_pointer(p);
-
-				p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
-				p->virtual_index = 0;
-				p->master = get_engine(ctx->bonds[j].master);
+				bond->virtual_index = 0;
+				bond->master = get_engine(ctx->bonds[j].master);
 
 				for (b = 0, e = 0; mask; e++, mask >>= 1) {
 					unsigned int idx;
@@ -1743,42 +1754,44 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 					if (!(mask & 1))
 						continue;
 
-					idx = find_engine(&set_engines.engines[1],
+					idx = find_engine(&set_engines->engines[1],
 							  ctx->engine_map_count,
 							  e);
-					p->engines[b++] =
-						set_engines.engines[1 + idx];
+					bond->engines[b++] =
+						set_engines->engines[1 + idx];
 				}
+
+				last = bond;
 			}
+			load_balance->base.next_extension = to_user_pointer(last);
 
 			gem_context_set_param(fd, &param);
 		} else if (ctx->wants_balance) {
 			const unsigned int count = num_engines_in_class(VCS);
-			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
-								 count);
-			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
-							  count + 1);
+			struct i915_context_engines_load_balance *load_balance =
+				alloca0(sizeof_load_balance(count));
+			struct i915_context_param_engines *set_engines =
+				alloca0(sizeof_param_engines(count + 1));
 			struct drm_i915_gem_context_param param = {
 				.ctx_id = ctx_id,
 				.param = I915_CONTEXT_PARAM_ENGINES,
-				.size = sizeof(set_engines),
-				.value = to_user_pointer(&set_engines),
+				.size = sizeof_param_engines(count + 1),
+				.value = to_user_pointer(set_engines),
 			};
 
-			set_engines.extensions = to_user_pointer(&load_balance);
+			set_engines->extensions = to_user_pointer(load_balance);
 
-			set_engines.engines[0].engine_class =
+			set_engines->engines[0].engine_class =
 				I915_ENGINE_CLASS_INVALID;
-			set_engines.engines[0].engine_instance =
+			set_engines->engines[0].engine_instance =
 				I915_ENGINE_CLASS_INVALID_NONE;
-			fill_engines_class(&set_engines.engines[1], VCS);
+			fill_engines_class(&set_engines->engines[1], VCS);
 
-			memset(&load_balance, 0, sizeof(load_balance));
-			load_balance.base.name =
+			load_balance->base.name =
 				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
-			load_balance.num_siblings = count;
+			load_balance->num_siblings = count;
 
-			fill_engines_class(&load_balance.engines[0], VCS);
+			fill_engines_class(&load_balance->engines[0], VCS);
 
 			gem_context_set_param(fd, &param);
 		}
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH i-g-t v2] benchmarks/gem_wsim: Manually calculate VLA struct sizes
  2019-05-24  8:45 ` [PATCH i-g-t v2] benchmarks/gem_wsim: Manually calculate VLA struct sizes Chris Wilson
@ 2019-05-24  9:35   ` Tvrtko Ursulin
  0 siblings, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2019-05-24  9:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: igt-dev


On 24/05/2019 09:45, Chris Wilson wrote:
> Apparently VLA structs (e.g. struct { int array[count] }) is a gcc
> extension that clang refuses to support as handling memory layout is too
> difficult for it. So calculate the size by hand!
> 
> v2: Use alloca().
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   benchmarks/gem_wsim.c | 115 +++++++++++++++++++++++-------------------
>   1 file changed, 64 insertions(+), 51 deletions(-)
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index e2ffb93a9..db19925b1 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -1441,6 +1441,29 @@ set_ctx_sseu(struct ctx *ctx, uint64_t slice_mask)
>   	return slice_mask;
>   }
>   
> +static size_t sizeof_load_balance(int count)
> +{
> +	struct i915_context_engines_load_balance *ptr;
> +
> +	return sizeof(*ptr) + count * sizeof(ptr->engines[0]);
> +}
> +
> +static size_t sizeof_param_engines(int count)
> +{
> +	struct i915_context_param_engines *ptr;
> +
> +	return sizeof(*ptr) + count * sizeof(ptr->engines[0]);
> +}
> +
> +static size_t sizeof_engines_bond(int count)
> +{
> +	struct i915_context_engines_bond *ptr;
> +
> +	return sizeof(*ptr) + count * sizeof(ptr->engines[0]);
> +}
> +
> +#define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); })
> +
>   static int
>   prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>   {
> @@ -1676,66 +1699,54 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>   		}
>   
>   		if (ctx->engine_map) {
> -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> -							  ctx->engine_map_count + 1);
> -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> -								 ctx->engine_map_count);
> +			struct i915_context_param_engines *set_engines =
> +				alloca0(sizeof_param_engines(ctx->engine_map_count + 1));
> +			struct i915_context_engines_load_balance *load_balance =
> +				alloca0(sizeof_load_balance(ctx->engine_map_count));
>   			struct drm_i915_gem_context_param param = {
>   				.ctx_id = ctx_id,
>   				.param = I915_CONTEXT_PARAM_ENGINES,
> -				.size = sizeof(set_engines),
> -				.value = to_user_pointer(&set_engines),
> +				.size = sizeof_param_engines(ctx->engine_map_count + 1),
> +				.value = to_user_pointer(set_engines),
>   			};
> +			struct i915_context_engines_bond *last = NULL;
>   
>   			if (ctx->wants_balance) {
> -				set_engines.extensions =
> -					to_user_pointer(&load_balance);
> +				set_engines->extensions =
> +					to_user_pointer(load_balance);
>   
> -				memset(&load_balance, 0, sizeof(load_balance));
> -				load_balance.base.name =
> +				load_balance->base.name =
>   					I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> -				load_balance.num_siblings =
> +				load_balance->num_siblings =
>   					ctx->engine_map_count;
>   
>   				for (j = 0; j < ctx->engine_map_count; j++)
> -					load_balance.engines[j] =
> +					load_balance->engines[j] =
>   						get_engine(ctx->engine_map[j]);
> -			} else {
> -				set_engines.extensions = 0;
>   			}
>   
>   			/* Reserve slot for virtual engine. */
> -			set_engines.engines[0].engine_class =
> +			set_engines->engines[0].engine_class =
>   				I915_ENGINE_CLASS_INVALID;
> -			set_engines.engines[0].engine_instance =
> +			set_engines->engines[0].engine_instance =
>   				I915_ENGINE_CLASS_INVALID_NONE;
>   
>   			for (j = 1; j <= ctx->engine_map_count; j++)
> -				set_engines.engines[j] =
> +				set_engines->engines[j] =
>   					get_engine(ctx->engine_map[j - 1]);
>   
> +			last = NULL;
>   			for (j = 0; j < ctx->bond_count; j++) {
>   				unsigned long mask = ctx->bonds[j].mask;
> -				I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
> -								 __builtin_popcount(mask));
> -				struct i915_context_engines_bond *p = NULL, *prev;
> +				struct i915_context_engines_bond *bond =
> +					alloca0(sizeof_engines_bond(__builtin_popcount(mask)));
>   				unsigned int b, e;
>   
> -				prev = p;
> -				p = alloca(sizeof(bond));
> -				assert(p);
> -				memset(p, 0, sizeof(bond));
> +				bond->base.next_extension = to_user_pointer(last);
> +				bond->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
>   
> -				if (j == 0)
> -					load_balance.base.next_extension =
> -						to_user_pointer(p);
> -				else if (j < (ctx->bond_count - 1))
> -					prev->base.next_extension =
> -						to_user_pointer(p);
> -
> -				p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> -				p->virtual_index = 0;
> -				p->master = get_engine(ctx->bonds[j].master);
> +				bond->virtual_index = 0;
> +				bond->master = get_engine(ctx->bonds[j].master);
>   
>   				for (b = 0, e = 0; mask; e++, mask >>= 1) {
>   					unsigned int idx;
> @@ -1743,42 +1754,44 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>   					if (!(mask & 1))
>   						continue;
>   
> -					idx = find_engine(&set_engines.engines[1],
> +					idx = find_engine(&set_engines->engines[1],
>   							  ctx->engine_map_count,
>   							  e);
> -					p->engines[b++] =
> -						set_engines.engines[1 + idx];
> +					bond->engines[b++] =
> +						set_engines->engines[1 + idx];
>   				}
> +
> +				last = bond;
>   			}
> +			load_balance->base.next_extension = to_user_pointer(last);
>   
>   			gem_context_set_param(fd, &param);
>   		} else if (ctx->wants_balance) {
>   			const unsigned int count = num_engines_in_class(VCS);
> -			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance,
> -								 count);
> -			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines,
> -							  count + 1);
> +			struct i915_context_engines_load_balance *load_balance =
> +				alloca0(sizeof_load_balance(count));
> +			struct i915_context_param_engines *set_engines =
> +				alloca0(sizeof_param_engines(count + 1));
>   			struct drm_i915_gem_context_param param = {
>   				.ctx_id = ctx_id,
>   				.param = I915_CONTEXT_PARAM_ENGINES,
> -				.size = sizeof(set_engines),
> -				.value = to_user_pointer(&set_engines),
> +				.size = sizeof_param_engines(count + 1),
> +				.value = to_user_pointer(set_engines),
>   			};
>   
> -			set_engines.extensions = to_user_pointer(&load_balance);
> +			set_engines->extensions = to_user_pointer(load_balance);
>   
> -			set_engines.engines[0].engine_class =
> +			set_engines->engines[0].engine_class =
>   				I915_ENGINE_CLASS_INVALID;
> -			set_engines.engines[0].engine_instance =
> +			set_engines->engines[0].engine_instance =
>   				I915_ENGINE_CLASS_INVALID_NONE;
> -			fill_engines_class(&set_engines.engines[1], VCS);
> +			fill_engines_class(&set_engines->engines[1], VCS);
>   
> -			memset(&load_balance, 0, sizeof(load_balance));
> -			load_balance.base.name =
> +			load_balance->base.name =
>   				I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE;
> -			load_balance.num_siblings = count;
> +			load_balance->num_siblings = count;
>   
> -			fill_engines_class(&load_balance.engines[0], VCS);
> +			fill_engines_class(&load_balance->engines[0], VCS);
>   
>   			gem_context_set_param(fd, &param);
>   		}
> 

Seems correct.

You couldn't resist throwing in the reverse bond list building 
simplification as well. :)

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-05-24  9:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-24  7:25 [PATCH i-g-t] benchmarks/gem_wsim: Heap allocate VLA structs Chris Wilson
2019-05-24  7:45 ` [igt-dev] " Ser, Simon
2019-05-24  8:20 ` Tvrtko Ursulin
2019-05-24  8:27   ` [igt-dev] " Ser, Simon
2019-05-24  8:33   ` Chris Wilson
2019-05-24  8:39     ` [igt-dev] " Ser, Simon
2019-05-24  8:44     ` Tvrtko Ursulin
2019-05-24  8:45 ` [PATCH i-g-t v2] benchmarks/gem_wsim: Manually calculate VLA struct sizes Chris Wilson
2019-05-24  9:35   ` Tvrtko Ursulin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.