All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs.
@ 2023-04-09 18:59 Bas Nieuwenhuizen
  2023-04-09 18:59 ` [PATCH v2 2/3] drm/amdgpu: Simplify amdgpu_hw_ip_info Bas Nieuwenhuizen
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Bas Nieuwenhuizen @ 2023-04-09 18:59 UTC (permalink / raw)
  To: amd-gfx
  Cc: alexander.deucher, Bas Nieuwenhuizen, christian.koenig, maraeo,
	timur.kristof

Instead of failing somewhere in the scheduler after the
ioctl has already succeeded.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 9 +++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
 3 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 995ee9ff65c9..8db6618b9049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -113,6 +113,15 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (!entity)
 		return 0;
 
+	if (entity->rq && entity->rq->sched) {
+		struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
+
+		if (num_ibs > ring->max_ibs) {
+			DRM_DEBUG("Rejected a submission with too many IBs");
+			return -EINVAL;
+		}
+	}
+
 	return drm_sched_job_init(&(*job)->base, entity, owner);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index dc474b809604..933cb95a0e30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -324,6 +324,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->max_dw = max_dw;
 	ring->hw_prio = hw_prio;
 
+	if (ring->funcs->emit_ib_size) {
+		ring->max_ibs =
+			(max_dw - ring->funcs->emit_frame_size) / ring->funcs->emit_ib_size;
+	}
+
 	if (!ring->no_scheduler) {
 		hw_ip = ring->funcs->type;
 		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 3989e755a5b4..7a295d80728b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -245,6 +245,7 @@ struct amdgpu_ring {
 	unsigned		ring_size;
 	unsigned		max_dw;
 	int			count_dw;
+	unsigned		max_ibs;
 	uint64_t		gpu_addr;
 	uint64_t		ptr_mask;
 	uint32_t		buf_mask;
-- 
2.40.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/3] drm/amdgpu: Simplify amdgpu_hw_ip_info.
  2023-04-09 18:59 [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs Bas Nieuwenhuizen
@ 2023-04-09 18:59 ` Bas Nieuwenhuizen
  2023-04-11  8:05   ` Christian König
  2023-04-09 18:59 ` [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2) Bas Nieuwenhuizen
  2023-04-11 12:34 ` [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs Christian König
  2 siblings, 1 reply; 10+ messages in thread
From: Bas Nieuwenhuizen @ 2023-04-09 18:59 UTC (permalink / raw)
  To: amd-gfx
  Cc: alexander.deucher, Bas Nieuwenhuizen, christian.koenig, maraeo,
	timur.kristof

We have a list of all rings, so no sense writing the same loop N
times. With how often this gets called and how small the ring list
is the performance of this shouldn't matter.

Note that some of the loops included some checking wrt harvesting.
That is redundant now, as those rings never get initialized and
hence never added to the adev->rings array.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 61 ++++++-------------------
 1 file changed, 15 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 0efb38539d70..89689b940493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -366,7 +366,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	uint32_t ib_size_alignment = 0;
 	enum amd_ip_block_type type;
 	unsigned int num_rings = 0;
-	unsigned int i, j;
+	unsigned int i;
 
 	if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
 		return -EINVAL;
@@ -374,83 +374,49 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	switch (info->query_hw_ip.type) {
 	case AMDGPU_HW_IP_GFX:
 		type = AMD_IP_BLOCK_TYPE_GFX;
-		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			if (adev->gfx.gfx_ring[i].sched.ready)
-				++num_rings;
+
 		ib_start_alignment = 32;
 		ib_size_alignment = 32;
 		break;
 	case AMDGPU_HW_IP_COMPUTE:
 		type = AMD_IP_BLOCK_TYPE_GFX;
-		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			if (adev->gfx.compute_ring[i].sched.ready)
-				++num_rings;
+
 		ib_start_alignment = 32;
 		ib_size_alignment = 32;
 		break;
 	case AMDGPU_HW_IP_DMA:
 		type = AMD_IP_BLOCK_TYPE_SDMA;
-		for (i = 0; i < adev->sdma.num_instances; i++)
-			if (adev->sdma.instance[i].ring.sched.ready)
-				++num_rings;
+
 		ib_start_alignment = 256;
 		ib_size_alignment = 4;
 		break;
 	case AMDGPU_HW_IP_UVD:
 		type = AMD_IP_BLOCK_TYPE_UVD;
-		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-			if (adev->uvd.harvest_config & (1 << i))
-				continue;
 
-			if (adev->uvd.inst[i].ring.sched.ready)
-				++num_rings;
-		}
 		ib_start_alignment = 64;
 		ib_size_alignment = 64;
 		break;
 	case AMDGPU_HW_IP_VCE:
 		type = AMD_IP_BLOCK_TYPE_VCE;
-		for (i = 0; i < adev->vce.num_rings; i++)
-			if (adev->vce.ring[i].sched.ready)
-				++num_rings;
+
 		ib_start_alignment = 4;
 		ib_size_alignment = 1;
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
 		type = AMD_IP_BLOCK_TYPE_UVD;
-		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
-			if (adev->uvd.harvest_config & (1 << i))
-				continue;
 
-			for (j = 0; j < adev->uvd.num_enc_rings; j++)
-				if (adev->uvd.inst[i].ring_enc[j].sched.ready)
-					++num_rings;
-		}
 		ib_start_alignment = 64;
 		ib_size_alignment = 64;
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 		type = AMD_IP_BLOCK_TYPE_VCN;
-		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-			if (adev->vcn.harvest_config & (1 << i))
-				continue;
 
-			if (adev->vcn.inst[i].ring_dec.sched.ready)
-				++num_rings;
-		}
 		ib_start_alignment = 16;
 		ib_size_alignment = 16;
 		break;
 	case AMDGPU_HW_IP_VCN_ENC:
 		type = AMD_IP_BLOCK_TYPE_VCN;
-		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-			if (adev->vcn.harvest_config & (1 << i))
-				continue;
 
-			for (j = 0; j < adev->vcn.num_enc_rings; j++)
-				if (adev->vcn.inst[i].ring_enc[j].sched.ready)
-					++num_rings;
-		}
 		ib_start_alignment = 64;
 		ib_size_alignment = 1;
 		break;
@@ -458,13 +424,6 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 		type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
 			AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
 
-		for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
-			if (adev->jpeg.harvest_config & (1 << i))
-				continue;
-
-			if (adev->jpeg.inst[i].ring_dec.sched.ready)
-				++num_rings;
-		}
 		ib_start_alignment = 16;
 		ib_size_alignment = 16;
 		break;
@@ -472,6 +431,16 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
+	for (i = 0; i < adev->num_rings; ++i) {
+		/* Note that this uses that ring types alias the equivalent
+		 * HW IP exposes to userspace.
+		 */
+		if (adev->rings[i]->funcs->type == info->query_hw_ip.type &&
+		    adev->rings[i]->sched.ready) {
+			++num_rings;
+		}
+	}
+
 	for (i = 0; i < adev->num_ip_blocks; i++)
 		if (adev->ip_blocks[i].version->type == type &&
 		    adev->ip_blocks[i].status.valid)
-- 
2.40.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2)
  2023-04-09 18:59 [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs Bas Nieuwenhuizen
  2023-04-09 18:59 ` [PATCH v2 2/3] drm/amdgpu: Simplify amdgpu_hw_ip_info Bas Nieuwenhuizen
@ 2023-04-09 18:59 ` Bas Nieuwenhuizen
  2023-04-11  8:10   ` Christian König
  2023-04-11 12:34 ` [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs Christian König
  2 siblings, 1 reply; 10+ messages in thread
From: Bas Nieuwenhuizen @ 2023-04-09 18:59 UTC (permalink / raw)
  To: amd-gfx
  Cc: maraeo, timur.kristof, Bas Nieuwenhuizen, alexander.deucher,
	David Airlie, christian.koenig

We need to introduce a new version of the info struct because
libdrm_amdgpu forgot any versioning info in amdgpu_query_hw_ip_info
so the mesa<->libdrm_amdgpu interface can't handle increasing the
size.

This info would be used by radv to figure out when we need to
split a submission into multiple submissions. radv currently has
a limit of 192 which seems to work for most gfx submissions, but
is way too high for e.g. compute or sdma.

Because the kernel handling is just fine we can just use the v2
everywhere and have the return_size do the versioning for us,
with userspace interpreting 0 as unknown.

Userspace implementation:
https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection
https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection

v2: Use base member in the new struct.

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: David Airlie <airlied@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31 ++++++++++++++-----------
 include/uapi/drm/amdgpu_drm.h           | 14 +++++++++++
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 89689b940493..5b575e1aef1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 
 static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 			     struct drm_amdgpu_info *info,
-			     struct drm_amdgpu_info_hw_ip *result)
+			     struct drm_amdgpu_info_hw_ip2 *result)
 {
 	uint32_t ib_start_alignment = 0;
 	uint32_t ib_size_alignment = 0;
@@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
+	result->max_ibs = UINT_MAX;
 	for (i = 0; i < adev->num_rings; ++i) {
 		/* Note that this uses that ring types alias the equivalent
 		 * HW IP exposes to userspace.
@@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 		if (adev->rings[i]->funcs->type == info->query_hw_ip.type &&
 		    adev->rings[i]->sched.ready) {
 			++num_rings;
+			result->max_ibs = min(result->max_ibs,
+					      adev->rings[i]->max_ibs);
 		}
 	}
 
@@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
 			num_rings);
 
-	result->hw_ip_version_major = adev->ip_blocks[i].version->major;
-	result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
+	result->base.hw_ip_version_major = adev->ip_blocks[i].version->major;
+	result->base.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
 
 	if (adev->asic_type >= CHIP_VEGA10) {
 		switch (type) {
 		case AMD_IP_BLOCK_TYPE_GFX:
-			result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
+			result->base.ip_discovery_version = adev->ip_versions[GC_HWIP][0];
 			break;
 		case AMD_IP_BLOCK_TYPE_SDMA:
-			result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
+			result->base.ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
 			break;
 		case AMD_IP_BLOCK_TYPE_UVD:
 		case AMD_IP_BLOCK_TYPE_VCN:
 		case AMD_IP_BLOCK_TYPE_JPEG:
-			result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
+			result->base.ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
 			break;
 		case AMD_IP_BLOCK_TYPE_VCE:
-			result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
+			result->base.ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
 			break;
 		default:
-			result->ip_discovery_version = 0;
+			result->base.ip_discovery_version = 0;
 			break;
 		}
 	} else {
-		result->ip_discovery_version = 0;
+		result->base.ip_discovery_version = 0;
 	}
-	result->capabilities_flags = 0;
-	result->available_rings = (1 << num_rings) - 1;
-	result->ib_start_alignment = ib_start_alignment;
-	result->ib_size_alignment = ib_size_alignment;
+	result->base.capabilities_flags = 0;
+	result->base.available_rings = (1 << num_rings) - 1;
+	result->base.ib_start_alignment = ib_start_alignment;
+	result->base.ib_size_alignment = ib_size_alignment;
 	return 0;
 }
 
@@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		}
 		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_HW_IP_INFO: {
-		struct drm_amdgpu_info_hw_ip ip = {};
+		struct drm_amdgpu_info_hw_ip2 ip = {};
 		int ret;
 
 		ret = amdgpu_hw_ip_info(adev, info, &ip);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index b6eb90df5d05..6b9e35b6f747 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device {
 	__u32 enabled_rb_pipes_mask_hi;
 };
 
+/* The size of this struct cannot be increased for compatibility reasons, use
+ * struct drm_amdgpu_info_hw_ip2 instead.
+ */
 struct drm_amdgpu_info_hw_ip {
 	/** Version of h/w IP */
 	__u32  hw_ip_version_major;
@@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip {
 	__u32  ip_discovery_version;
 };
 
+struct drm_amdgpu_info_hw_ip2 {
+	/** Previous version of the struct */
+	struct drm_amdgpu_info_hw_ip  base;
+	/** The maximum number of IBs one can submit in a single submission
+	 * ioctl. (When using gang submit: this is per IP type)
+	 */
+	__u32  max_ibs;
+	/** padding to 64bit for arch differences */
+	__u32  pad;
+};
+
 struct drm_amdgpu_info_num_handles {
 	/** Max handles as supported by firmware for UVD */
 	__u32  uvd_max_handles;
-- 
2.40.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/3] drm/amdgpu: Simplify amdgpu_hw_ip_info.
  2023-04-09 18:59 ` [PATCH v2 2/3] drm/amdgpu: Simplify amdgpu_hw_ip_info Bas Nieuwenhuizen
@ 2023-04-11  8:05   ` Christian König
  0 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2023-04-11  8:05 UTC (permalink / raw)
  To: Bas Nieuwenhuizen, amd-gfx; +Cc: alexander.deucher, maraeo, timur.kristof

Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
> We have a list of all rings, so no sense writing the same loop N
> times. With how often this gets called and how small the ring list
> is the performance of this shouldn't matter.
>
> Note that some of the loops included some checking wrt harvesting.
> That is redundant now, as those rings never get initialized and
> hence never added to the adev->rings array.

We intentionally removed that because we wanted to get rid of adev->ring 
in the long term. Please don't bring it back.

Christian.

>
> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 61 ++++++-------------------
>   1 file changed, 15 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 0efb38539d70..89689b940493 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -366,7 +366,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	uint32_t ib_size_alignment = 0;
>   	enum amd_ip_block_type type;
>   	unsigned int num_rings = 0;
> -	unsigned int i, j;
> +	unsigned int i;
>   
>   	if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
>   		return -EINVAL;
> @@ -374,83 +374,49 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	switch (info->query_hw_ip.type) {
>   	case AMDGPU_HW_IP_GFX:
>   		type = AMD_IP_BLOCK_TYPE_GFX;
> -		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -			if (adev->gfx.gfx_ring[i].sched.ready)
> -				++num_rings;
> +
>   		ib_start_alignment = 32;
>   		ib_size_alignment = 32;
>   		break;
>   	case AMDGPU_HW_IP_COMPUTE:
>   		type = AMD_IP_BLOCK_TYPE_GFX;
> -		for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -			if (adev->gfx.compute_ring[i].sched.ready)
> -				++num_rings;
> +
>   		ib_start_alignment = 32;
>   		ib_size_alignment = 32;
>   		break;
>   	case AMDGPU_HW_IP_DMA:
>   		type = AMD_IP_BLOCK_TYPE_SDMA;
> -		for (i = 0; i < adev->sdma.num_instances; i++)
> -			if (adev->sdma.instance[i].ring.sched.ready)
> -				++num_rings;
> +
>   		ib_start_alignment = 256;
>   		ib_size_alignment = 4;
>   		break;
>   	case AMDGPU_HW_IP_UVD:
>   		type = AMD_IP_BLOCK_TYPE_UVD;
> -		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
> -			if (adev->uvd.harvest_config & (1 << i))
> -				continue;
>   
> -			if (adev->uvd.inst[i].ring.sched.ready)
> -				++num_rings;
> -		}
>   		ib_start_alignment = 64;
>   		ib_size_alignment = 64;
>   		break;
>   	case AMDGPU_HW_IP_VCE:
>   		type = AMD_IP_BLOCK_TYPE_VCE;
> -		for (i = 0; i < adev->vce.num_rings; i++)
> -			if (adev->vce.ring[i].sched.ready)
> -				++num_rings;
> +
>   		ib_start_alignment = 4;
>   		ib_size_alignment = 1;
>   		break;
>   	case AMDGPU_HW_IP_UVD_ENC:
>   		type = AMD_IP_BLOCK_TYPE_UVD;
> -		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
> -			if (adev->uvd.harvest_config & (1 << i))
> -				continue;
>   
> -			for (j = 0; j < adev->uvd.num_enc_rings; j++)
> -				if (adev->uvd.inst[i].ring_enc[j].sched.ready)
> -					++num_rings;
> -		}
>   		ib_start_alignment = 64;
>   		ib_size_alignment = 64;
>   		break;
>   	case AMDGPU_HW_IP_VCN_DEC:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
> -		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> -			if (adev->vcn.harvest_config & (1 << i))
> -				continue;
>   
> -			if (adev->vcn.inst[i].ring_dec.sched.ready)
> -				++num_rings;
> -		}
>   		ib_start_alignment = 16;
>   		ib_size_alignment = 16;
>   		break;
>   	case AMDGPU_HW_IP_VCN_ENC:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
> -		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> -			if (adev->vcn.harvest_config & (1 << i))
> -				continue;
>   
> -			for (j = 0; j < adev->vcn.num_enc_rings; j++)
> -				if (adev->vcn.inst[i].ring_enc[j].sched.ready)
> -					++num_rings;
> -		}
>   		ib_start_alignment = 64;
>   		ib_size_alignment = 1;
>   		break;
> @@ -458,13 +424,6 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   		type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ?
>   			AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN;
>   
> -		for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) {
> -			if (adev->jpeg.harvest_config & (1 << i))
> -				continue;
> -
> -			if (adev->jpeg.inst[i].ring_dec.sched.ready)
> -				++num_rings;
> -		}
>   		ib_start_alignment = 16;
>   		ib_size_alignment = 16;
>   		break;
> @@ -472,6 +431,16 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   		return -EINVAL;
>   	}
>   
> +	for (i = 0; i < adev->num_rings; ++i) {
> +		/* Note that this uses that ring types alias the equivalent
> +		 * HW IP exposes to userspace.
> +		 */
> +		if (adev->rings[i]->funcs->type == info->query_hw_ip.type &&
> +		    adev->rings[i]->sched.ready) {
> +			++num_rings;
> +		}
> +	}
> +
>   	for (i = 0; i < adev->num_ip_blocks; i++)
>   		if (adev->ip_blocks[i].version->type == type &&
>   		    adev->ip_blocks[i].status.valid)


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2)
  2023-04-09 18:59 ` [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2) Bas Nieuwenhuizen
@ 2023-04-11  8:10   ` Christian König
  2023-04-11  8:22     ` Bas Nieuwenhuizen
  0 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2023-04-11  8:10 UTC (permalink / raw)
  To: Bas Nieuwenhuizen, amd-gfx
  Cc: alexander.deucher, David Airlie, maraeo, timur.kristof

Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
> We need to introduce a new version of the info struct because
> libdrm_amdgpu forgot any versioning info in amdgpu_query_hw_ip_info
> so the mesa<->libdrm_amdgpu interface can't handle increasing the
> size.
>
> This info would be used by radv to figure out when we need to
> split a submission into multiple submissions. radv currently has
> a limit of 192 which seems to work for most gfx submissions, but
> is way too high for e.g. compute or sdma.

Why do you need so many IBs in the first place?

You can use sub-IBs since R600 and newer hw even supports a JUMP command 
to chain IBs together IIRC.

For the kernel UAPI you only need separate IBs if you have separate 
properties on them, E.g. preamble or submitting to a different engine.

Everything else just needs additional CPU overhead in the IOCTL.

Regards,
Christian.

>
> Because the kernel handling is just fine we can just use the v2
> everywhere and have the return_size do the versioning for us,
> with userspace interpreting 0 as unknown.
>
> Userspace implementation:
> https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection
> https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection
>
> v2: Use base member in the new struct.
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> Cc: David Airlie <airlied@gmail.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31 ++++++++++++++-----------
>   include/uapi/drm/amdgpu_drm.h           | 14 +++++++++++
>   2 files changed, 31 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 89689b940493..5b575e1aef1a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
>   
>   static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   			     struct drm_amdgpu_info *info,
> -			     struct drm_amdgpu_info_hw_ip *result)
> +			     struct drm_amdgpu_info_hw_ip2 *result)
>   {
>   	uint32_t ib_start_alignment = 0;
>   	uint32_t ib_size_alignment = 0;
> @@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   		return -EINVAL;
>   	}
>   
> +	result->max_ibs = UINT_MAX;
>   	for (i = 0; i < adev->num_rings; ++i) {
>   		/* Note that this uses that ring types alias the equivalent
>   		 * HW IP exposes to userspace.
> @@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   		if (adev->rings[i]->funcs->type == info->query_hw_ip.type &&
>   		    adev->rings[i]->sched.ready) {
>   			++num_rings;
> +			result->max_ibs = min(result->max_ibs,
> +					      adev->rings[i]->max_ibs);
>   		}
>   	}
>   
> @@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
>   			num_rings);
>   
> -	result->hw_ip_version_major = adev->ip_blocks[i].version->major;
> -	result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
> +	result->base.hw_ip_version_major = adev->ip_blocks[i].version->major;
> +	result->base.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
>   
>   	if (adev->asic_type >= CHIP_VEGA10) {
>   		switch (type) {
>   		case AMD_IP_BLOCK_TYPE_GFX:
> -			result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
> +			result->base.ip_discovery_version = adev->ip_versions[GC_HWIP][0];
>   			break;
>   		case AMD_IP_BLOCK_TYPE_SDMA:
> -			result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
> +			result->base.ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
>   			break;
>   		case AMD_IP_BLOCK_TYPE_UVD:
>   		case AMD_IP_BLOCK_TYPE_VCN:
>   		case AMD_IP_BLOCK_TYPE_JPEG:
> -			result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
> +			result->base.ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
>   			break;
>   		case AMD_IP_BLOCK_TYPE_VCE:
> -			result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
> +			result->base.ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
>   			break;
>   		default:
> -			result->ip_discovery_version = 0;
> +			result->base.ip_discovery_version = 0;
>   			break;
>   		}
>   	} else {
> -		result->ip_discovery_version = 0;
> +		result->base.ip_discovery_version = 0;
>   	}
> -	result->capabilities_flags = 0;
> -	result->available_rings = (1 << num_rings) - 1;
> -	result->ib_start_alignment = ib_start_alignment;
> -	result->ib_size_alignment = ib_size_alignment;
> +	result->base.capabilities_flags = 0;
> +	result->base.available_rings = (1 << num_rings) - 1;
> +	result->base.ib_start_alignment = ib_start_alignment;
> +	result->base.ib_size_alignment = ib_size_alignment;
>   	return 0;
>   }
>   
> @@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   		}
>   		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
>   	case AMDGPU_INFO_HW_IP_INFO: {
> -		struct drm_amdgpu_info_hw_ip ip = {};
> +		struct drm_amdgpu_info_hw_ip2 ip = {};
>   		int ret;
>   
>   		ret = amdgpu_hw_ip_info(adev, info, &ip);
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index b6eb90df5d05..6b9e35b6f747 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device {
>   	__u32 enabled_rb_pipes_mask_hi;
>   };
>   
> +/* The size of this struct cannot be increased for compatibility reasons, use
> + * struct drm_amdgpu_info_hw_ip2 instead.
> + */
>   struct drm_amdgpu_info_hw_ip {
>   	/** Version of h/w IP */
>   	__u32  hw_ip_version_major;
> @@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip {
>   	__u32  ip_discovery_version;
>   };
>   
> +struct drm_amdgpu_info_hw_ip2 {
> +	/** Previous version of the struct */
> +	struct drm_amdgpu_info_hw_ip  base;
> +	/** The maximum number of IBs one can submit in a single submission
> +	 * ioctl. (When using gang submit: this is per IP type)
> +	 */
> +	__u32  max_ibs;
> +	/** padding to 64bit for arch differences */
> +	__u32  pad;
> +};
> +
>   struct drm_amdgpu_info_num_handles {
>   	/** Max handles as supported by firmware for UVD */
>   	__u32  uvd_max_handles;


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2)
  2023-04-11  8:10   ` Christian König
@ 2023-04-11  8:22     ` Bas Nieuwenhuizen
  2023-04-11  9:06       ` Timur Kristóf
  0 siblings, 1 reply; 10+ messages in thread
From: Bas Nieuwenhuizen @ 2023-04-11  8:22 UTC (permalink / raw)
  To: Christian König
  Cc: alexander.deucher, David Airlie, maraeo, amd-gfx, timur.kristof

On Tue, Apr 11, 2023 at 10:10 AM Christian König
<christian.koenig@amd.com> wrote:
>
> Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
> > We need to introduce a new version of the info struct because
> > libdrm_amdgpu forgot any versioning info in amdgpu_query_hw_ip_info
> > so the mesa<->libdrm_amdgpu interface can't handle increasing the
> > size.
> >
> > This info would be used by radv to figure out when we need to
> > split a submission into multiple submissions. radv currently has
> > a limit of 192 which seems to work for most gfx submissions, but
> > is way too high for e.g. compute or sdma.
>
> Why do you need so many IBs in the first place?
>
> You can use sub-IBs since R600 and newer hw even supports a JUMP command
> to chain IBs together IIRC.

Couple of reasons:

1) We can't reliably use sub-IBs (both because on GFX6-7 there are
indirect draw commands that cannot be used in sub-IBs and because the
Vulkan API exposes sub-IBs, so we can have the primary IBs be sub-IBs
already)

2) We believed GFX6 may not support chaining. (Then again, it turns
out the GFX7+ packet may just work on GFX6?
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22406)

3) Chaining doesn't work if the IB may be in flight multiple times in
a different sequence.

We try to chain when we can but (2) and (3) are cases where we
fallback to submitting multiple IBs.

>
> For the kernel UAPI you only need separate IBs if you have separate
> properties on them, E.g. preamble or submitting to a different engine.
>
> Everything else just needs additional CPU overhead in the IOCTL.
>
> Regards,
> Christian.
>
> >
> > Because the kernel handling is just fine we can just use the v2
> > everywhere and have the return_size do the versioning for us,
> > with userspace interpreting 0 as unknown.
> >
> > Userspace implementation:
> > https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection
> > https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection
> >
> > v2: Use base member in the new struct.
> >
> > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
> > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > Cc: David Airlie <airlied@gmail.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31 ++++++++++++++-----------
> >   include/uapi/drm/amdgpu_drm.h           | 14 +++++++++++
> >   2 files changed, 31 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index 89689b940493..5b575e1aef1a 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
> >
> >   static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >                            struct drm_amdgpu_info *info,
> > -                          struct drm_amdgpu_info_hw_ip *result)
> > +                          struct drm_amdgpu_info_hw_ip2 *result)
> >   {
> >       uint32_t ib_start_alignment = 0;
> >       uint32_t ib_size_alignment = 0;
> > @@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >               return -EINVAL;
> >       }
> >
> > +     result->max_ibs = UINT_MAX;
> >       for (i = 0; i < adev->num_rings; ++i) {
> >               /* Note that this uses that ring types alias the equivalent
> >                * HW IP exposes to userspace.
> > @@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >               if (adev->rings[i]->funcs->type == info->query_hw_ip.type &&
> >                   adev->rings[i]->sched.ready) {
> >                       ++num_rings;
> > +                     result->max_ibs = min(result->max_ibs,
> > +                                           adev->rings[i]->max_ibs);
> >               }
> >       }
> >
> > @@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> >       num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
> >                       num_rings);
> >
> > -     result->hw_ip_version_major = adev->ip_blocks[i].version->major;
> > -     result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
> > +     result->base.hw_ip_version_major = adev->ip_blocks[i].version->major;
> > +     result->base.hw_ip_version_minor = adev->ip_blocks[i].version->minor;
> >
> >       if (adev->asic_type >= CHIP_VEGA10) {
> >               switch (type) {
> >               case AMD_IP_BLOCK_TYPE_GFX:
> > -                     result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[GC_HWIP][0];
> >                       break;
> >               case AMD_IP_BLOCK_TYPE_SDMA:
> > -                     result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
> >                       break;
> >               case AMD_IP_BLOCK_TYPE_UVD:
> >               case AMD_IP_BLOCK_TYPE_VCN:
> >               case AMD_IP_BLOCK_TYPE_JPEG:
> > -                     result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
> >                       break;
> >               case AMD_IP_BLOCK_TYPE_VCE:
> > -                     result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
> > +                     result->base.ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
> >                       break;
> >               default:
> > -                     result->ip_discovery_version = 0;
> > +                     result->base.ip_discovery_version = 0;
> >                       break;
> >               }
> >       } else {
> > -             result->ip_discovery_version = 0;
> > +             result->base.ip_discovery_version = 0;
> >       }
> > -     result->capabilities_flags = 0;
> > -     result->available_rings = (1 << num_rings) - 1;
> > -     result->ib_start_alignment = ib_start_alignment;
> > -     result->ib_size_alignment = ib_size_alignment;
> > +     result->base.capabilities_flags = 0;
> > +     result->base.available_rings = (1 << num_rings) - 1;
> > +     result->base.ib_start_alignment = ib_start_alignment;
> > +     result->base.ib_size_alignment = ib_size_alignment;
> >       return 0;
> >   }
> >
> > @@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> >               }
> >               return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
> >       case AMDGPU_INFO_HW_IP_INFO: {
> > -             struct drm_amdgpu_info_hw_ip ip = {};
> > +             struct drm_amdgpu_info_hw_ip2 ip = {};
> >               int ret;
> >
> >               ret = amdgpu_hw_ip_info(adev, info, &ip);
> > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > index b6eb90df5d05..6b9e35b6f747 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device {
> >       __u32 enabled_rb_pipes_mask_hi;
> >   };
> >
> > +/* The size of this struct cannot be increased for compatibility reasons, use
> > + * struct drm_amdgpu_info_hw_ip2 instead.
> > + */
> >   struct drm_amdgpu_info_hw_ip {
> >       /** Version of h/w IP */
> >       __u32  hw_ip_version_major;
> > @@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip {
> >       __u32  ip_discovery_version;
> >   };
> >
> > +struct drm_amdgpu_info_hw_ip2 {
> > +     /** Previous version of the struct */
> > +     struct drm_amdgpu_info_hw_ip  base;
> > +     /** The maximum number of IBs one can submit in a single submission
> > +      * ioctl. (When using gang submit: this is per IP type)
> > +      */
> > +     __u32  max_ibs;
> > +     /** padding to 64bit for arch differences */
> > +     __u32  pad;
> > +};
> > +
> >   struct drm_amdgpu_info_num_handles {
> >       /** Max handles as supported by firmware for UVD */
> >       __u32  uvd_max_handles;
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2)
  2023-04-11  8:22     ` Bas Nieuwenhuizen
@ 2023-04-11  9:06       ` Timur Kristóf
  2023-04-11  9:22         ` Christian König
  0 siblings, 1 reply; 10+ messages in thread
From: Timur Kristóf @ 2023-04-11  9:06 UTC (permalink / raw)
  To: Bas Nieuwenhuizen
  Cc: Deucher, Alexander, Marek Olšák, David Airlie,
	Christian König, amd-gfx list

[-- Attachment #1: Type: text/plain, Size: 9613 bytes --]

Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> ezt írta (időpont: 2023. ápr.
11., Ke 10:25):

> On Tue, Apr 11, 2023 at 10:10 AM Christian König
> <christian.koenig@amd.com> wrote:
> >
> > Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
> > > We need to introduce a new version of the info struct because
> > > libdrm_amdgpu forgot any versioning info in amdgpu_query_hw_ip_info
> > > so the mesa<->libdrm_amdgpu interface can't handle increasing the
> > > size.
> > >
> > > This info would be used by radv to figure out when we need to
> > > split a submission into multiple submissions. radv currently has
> > > a limit of 192 which seems to work for most gfx submissions, but
> > > is way too high for e.g. compute or sdma.
> >
> > Why do you need so many IBs in the first place?
> >
> > You can use sub-IBs since R600 and newer hw even supports a JUMP command
> > to chain IBs together IIRC.
>
> Couple of reasons:
>
> 1) We can't reliably use sub-IBs (both because on GFX6-7 there are
> indirect draw commands that cannot be used in sub-IBs and because the
> Vulkan API exposes sub-IBs, so we can have the primary IBs be sub-IBs
> already)
>

Furthermore, only the GFX queue supports the "IB2" packet that is used to
implement sub-IBs.

(The same packet hangs the compute queue and is documented as not working
in the PAL source code. Other queues don't seem to have a packet for this
purpose.)

2) We believed GFX6 may not support chaining. (Then again, it turns
> out the GFX7+ packet may just work on GFX6?
> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22406)
>

I was actually quite surprised when I found this out. Mesa developers seem
to have believed that this is not possible on GFX6. I'd love to get some
more context on this, did it always work or was it added in a FW update?

3) Chaining doesn't work if the IB may be in flight multiple times in
> a different sequence.
>

Additionally, chaining also doesn't work on any queue other than GFX and
compute. As far as we know, SDMA and the various encoder/decoder queues
don't have a packet for chaining.

Christian, please let us know if we are wrong about this.


Best regards,
Timur




> We try to chain when we can but (2) and (3) are cases where we
> fallback to submitting multiple IBs.
>
> >
> > For the kernel UAPI you only need separate IBs if you have separate
> > properties on them, E.g. preamble or submitting to a different engine.
> >
> > Everything else just needs additional CPU overhead in the IOCTL.
> >
> > Regards,
> > Christian.
> >
> > >
> > > Because the kernel handling is just fine we can just use the v2
> > > everywhere and have the return_size do the versioning for us,
> > > with userspace interpreting 0 as unknown.
> > >
> > > Userspace implementation:
> > > https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection
> > > https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection
> > >
> > > v2: Use base member in the new struct.
> > >
> > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
> > > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> > > Cc: David Airlie <airlied@gmail.com>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31
> ++++++++++++++-----------
> > >   include/uapi/drm/amdgpu_drm.h           | 14 +++++++++++
> > >   2 files changed, 31 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > index 89689b940493..5b575e1aef1a 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > @@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct
> drm_amdgpu_info_firmware *fw_info,
> > >
> > >   static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
> > >                            struct drm_amdgpu_info *info,
> > > -                          struct drm_amdgpu_info_hw_ip *result)
> > > +                          struct drm_amdgpu_info_hw_ip2 *result)
> > >   {
> > >       uint32_t ib_start_alignment = 0;
> > >       uint32_t ib_size_alignment = 0;
> > > @@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
> > >               return -EINVAL;
> > >       }
> > >
> > > +     result->max_ibs = UINT_MAX;
> > >       for (i = 0; i < adev->num_rings; ++i) {
> > >               /* Note that this uses that ring types alias the
> equivalent
> > >                * HW IP exposes to userspace.
> > > @@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
> *adev,
> > >               if (adev->rings[i]->funcs->type ==
> info->query_hw_ip.type &&
> > >                   adev->rings[i]->sched.ready) {
> > >                       ++num_rings;
> > > +                     result->max_ibs = min(result->max_ibs,
> > > +                                           adev->rings[i]->max_ibs);
> > >               }
> > >       }
> > >
> > > @@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct
> amdgpu_device *adev,
> > >       num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
> > >                       num_rings);
> > >
> > > -     result->hw_ip_version_major = adev->ip_blocks[i].version->major;
> > > -     result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
> > > +     result->base.hw_ip_version_major =
> adev->ip_blocks[i].version->major;
> > > +     result->base.hw_ip_version_minor =
> adev->ip_blocks[i].version->minor;
> > >
> > >       if (adev->asic_type >= CHIP_VEGA10) {
> > >               switch (type) {
> > >               case AMD_IP_BLOCK_TYPE_GFX:
> > > -                     result->ip_discovery_version =
> adev->ip_versions[GC_HWIP][0];
> > > +                     result->base.ip_discovery_version =
> adev->ip_versions[GC_HWIP][0];
> > >                       break;
> > >               case AMD_IP_BLOCK_TYPE_SDMA:
> > > -                     result->ip_discovery_version =
> adev->ip_versions[SDMA0_HWIP][0];
> > > +                     result->base.ip_discovery_version =
> adev->ip_versions[SDMA0_HWIP][0];
> > >                       break;
> > >               case AMD_IP_BLOCK_TYPE_UVD:
> > >               case AMD_IP_BLOCK_TYPE_VCN:
> > >               case AMD_IP_BLOCK_TYPE_JPEG:
> > > -                     result->ip_discovery_version =
> adev->ip_versions[UVD_HWIP][0];
> > > +                     result->base.ip_discovery_version =
> adev->ip_versions[UVD_HWIP][0];
> > >                       break;
> > >               case AMD_IP_BLOCK_TYPE_VCE:
> > > -                     result->ip_discovery_version =
> adev->ip_versions[VCE_HWIP][0];
> > > +                     result->base.ip_discovery_version =
> adev->ip_versions[VCE_HWIP][0];
> > >                       break;
> > >               default:
> > > -                     result->ip_discovery_version = 0;
> > > +                     result->base.ip_discovery_version = 0;
> > >                       break;
> > >               }
> > >       } else {
> > > -             result->ip_discovery_version = 0;
> > > +             result->base.ip_discovery_version = 0;
> > >       }
> > > -     result->capabilities_flags = 0;
> > > -     result->available_rings = (1 << num_rings) - 1;
> > > -     result->ib_start_alignment = ib_start_alignment;
> > > -     result->ib_size_alignment = ib_size_alignment;
> > > +     result->base.capabilities_flags = 0;
> > > +     result->base.available_rings = (1 << num_rings) - 1;
> > > +     result->base.ib_start_alignment = ib_start_alignment;
> > > +     result->base.ib_size_alignment = ib_size_alignment;
> > >       return 0;
> > >   }
> > >
> > > @@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void
> *data, struct drm_file *filp)
> > >               }
> > >               return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT
> : 0;
> > >       case AMDGPU_INFO_HW_IP_INFO: {
> > > -             struct drm_amdgpu_info_hw_ip ip = {};
> > > +             struct drm_amdgpu_info_hw_ip2 ip = {};
> > >               int ret;
> > >
> > >               ret = amdgpu_hw_ip_info(adev, info, &ip);
> > > diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h
> > > index b6eb90df5d05..6b9e35b6f747 100644
> > > --- a/include/uapi/drm/amdgpu_drm.h
> > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > @@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device {
> > >       __u32 enabled_rb_pipes_mask_hi;
> > >   };
> > >
> > > +/* The size of this struct cannot be increased for compatibility
> reasons, use
> > > + * struct drm_amdgpu_info_hw_ip2 instead.
> > > + */
> > >   struct drm_amdgpu_info_hw_ip {
> > >       /** Version of h/w IP */
> > >       __u32  hw_ip_version_major;
> > > @@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip {
> > >       __u32  ip_discovery_version;
> > >   };
> > >
> > > +struct drm_amdgpu_info_hw_ip2 {
> > > +     /** Previous version of the struct */
> > > +     struct drm_amdgpu_info_hw_ip  base;
> > > +     /** The maximum number of IBs one can submit in a single
> submission
> > > +      * ioctl. (When using gang submit: this is per IP type)
> > > +      */
> > > +     __u32  max_ibs;
> > > +     /** padding to 64bit for arch differences */
> > > +     __u32  pad;
> > > +};
> > > +
> > >   struct drm_amdgpu_info_num_handles {
> > >       /** Max handles as supported by firmware for UVD */
> > >       __u32  uvd_max_handles;
> >
>

[-- Attachment #2: Type: text/html, Size: 13635 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2)
  2023-04-11  9:06       ` Timur Kristóf
@ 2023-04-11  9:22         ` Christian König
  2023-04-11 11:06           ` Timur Kristóf
  0 siblings, 1 reply; 10+ messages in thread
From: Christian König @ 2023-04-11  9:22 UTC (permalink / raw)
  To: Timur Kristóf, Bas Nieuwenhuizen
  Cc: Deucher, Alexander, Pelloux-prayer, Pierre-eric, David Airlie,
	Marek Olšák, amd-gfx list

[-- Attachment #1: Type: text/plain, Size: 11502 bytes --]

Am 11.04.23 um 11:06 schrieb Timur Kristóf:
>
>
> Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> ezt írta (időpont: 2023. 
> ápr. 11., Ke 10:25):
>
>     On Tue, Apr 11, 2023 at 10:10 AM Christian König
>     <christian.koenig@amd.com> wrote:
>     >
>     > Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
>     > > We need to introduce a new version of the info struct because
>     > > libdrm_amdgpu forgot any versioning info in
>     amdgpu_query_hw_ip_info
>     > > so the mesa<->libdrm_amdgpu interface can't handle increasing the
>     > > size.
>     > >
>     > > This info would be used by radv to figure out when we need to
>     > > split a submission into multiple submissions. radv currently has
>     > > a limit of 192 which seems to work for most gfx submissions, but
>     > > is way too high for e.g. compute or sdma.
>     >
>     > Why do you need so many IBs in the first place?
>     >
>     > You can use sub-IBs since R600 and newer hw even supports a JUMP
>     command
>     > to chain IBs together IIRC.
>
>     Couple of reasons:
>
>     1) We can't reliably use sub-IBs (both because on GFX6-7 there are
>     indirect draw commands that cannot be used in sub-IBs and because the
>     Vulkan API exposes sub-IBs, so we can have the primary IBs be sub-IBs
>     already)
>
>
> Furthermore, only the GFX queue supports the "IB2" packet that is used 
> to implement sub-IBs.
>
> (The same packet hangs the compute queue and is documented as not 
> working in the PAL source code. Other queues don't seem to have a 
> packet for this purpose.)
>
>     2) We believed GFX6 may not support chaining. (Then again, it turns
>     out the GFX7+ packet may just work on GFX6?
>     https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22406)
>
>
> I was actually quite surprised when I found this out. Mesa developers 
> seem to have believed that this is not possible on GFX6. I'd love to 
> get some more context on this, did it always work or was it added in a 
> FW update?
>
>     3) Chaining doesn't work if the IB may be in flight multiple times in
>     a different sequence.
>
>
> Additionally, chaining also doesn't work on any queue other than GFX 
> and compute. As far as we know, SDMA and the various encoder/decoder 
> queues don't have a packet for chaining.
>
> Christian, please let us know if we are wrong about this.

I'm not an expert for this either. Marek and Pierre-eric know more about 
that stuff than me. On the other hand I could ping the firmware people 
as well if our UMD guys can't answer that.

It's just that last time we discussed this internally somebody from the 
PAL team commented that this isn't an issue any more because we don't 
need that many IBs any more.

libdrm defined that you shouldn't use more than 4 IBs in a CS, on the 
other hand we dropped checking that long ago and exposing the numbers of 
IBs the UMD can use is just good design.

Bas what do you think of adding an AMDGPU_INFO_MAX_IBS query instead of 
extending the drm_amdgpu_info_hw_ip structure?

Background is that the drm_amdgpu_info_hw_ip structure is actually not 
meant to be used for sw parameters (which the maximum number of IBs is) 
and we wouldn't need to dance around issues with query size parameters 
because that function takes the size as parameter.

Regards,
Christian.

>
>
> Best regards,
> Timur
>
>
>
>
>     We try to chain when we can but (2) and (3) are cases where we
>     fallback to submitting multiple IBs.
>
>     >
>     > For the kernel UAPI you only need separate IBs if you have separate
>     > properties on them, E.g. preamble or submitting to a different
>     engine.
>     >
>     > Everything else just needs additional CPU overhead in the IOCTL.
>     >
>     > Regards,
>     > Christian.
>     >
>     > >
>     > > Because the kernel handling is just fine we can just use the v2
>     > > everywhere and have the return_size do the versioning for us,
>     > > with userspace interpreting 0 as unknown.
>     > >
>     > > Userspace implementation:
>     > >
>     https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection
>     > >
>     https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection
>     > >
>     > > v2: Use base member in the new struct.
>     > >
>     > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
>     > > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>     > > Cc: David Airlie <airlied@gmail.com>
>     > > ---
>     > >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31
>     ++++++++++++++-----------
>     > >   include/uapi/drm/amdgpu_drm.h           | 14 +++++++++++
>     > >   2 files changed, 31 insertions(+), 14 deletions(-)
>     > >
>     > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>     > > index 89689b940493..5b575e1aef1a 100644
>     > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>     > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>     > > @@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct
>     drm_amdgpu_info_firmware *fw_info,
>     > >
>     > >   static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>     > >                            struct drm_amdgpu_info *info,
>     > > -                          struct drm_amdgpu_info_hw_ip *result)
>     > > +                          struct drm_amdgpu_info_hw_ip2 *result)
>     > >   {
>     > >       uint32_t ib_start_alignment = 0;
>     > >       uint32_t ib_size_alignment = 0;
>     > > @@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct
>     amdgpu_device *adev,
>     > >               return -EINVAL;
>     > >       }
>     > >
>     > > +     result->max_ibs = UINT_MAX;
>     > >       for (i = 0; i < adev->num_rings; ++i) {
>     > >               /* Note that this uses that ring types alias the
>     equivalent
>     > >                * HW IP exposes to userspace.
>     > > @@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct
>     amdgpu_device *adev,
>     > >               if (adev->rings[i]->funcs->type ==
>     info->query_hw_ip.type &&
>     > >  adev->rings[i]->sched.ready) {
>     > >                       ++num_rings;
>     > > +                     result->max_ibs = min(result->max_ibs,
>     > > +  adev->rings[i]->max_ibs);
>     > >               }
>     > >       }
>     > >
>     > > @@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct
>     amdgpu_device *adev,
>     > >       num_rings =
>     min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
>     > >                       num_rings);
>     > >
>     > > -     result->hw_ip_version_major =
>     adev->ip_blocks[i].version->major;
>     > > -     result->hw_ip_version_minor =
>     adev->ip_blocks[i].version->minor;
>     > > +     result->base.hw_ip_version_major =
>     adev->ip_blocks[i].version->major;
>     > > +     result->base.hw_ip_version_minor =
>     adev->ip_blocks[i].version->minor;
>     > >
>     > >       if (adev->asic_type >= CHIP_VEGA10) {
>     > >               switch (type) {
>     > >               case AMD_IP_BLOCK_TYPE_GFX:
>     > > -  result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
>     > > +  result->base.ip_discovery_version =
>     adev->ip_versions[GC_HWIP][0];
>     > >                       break;
>     > >               case AMD_IP_BLOCK_TYPE_SDMA:
>     > > -  result->ip_discovery_version =
>     adev->ip_versions[SDMA0_HWIP][0];
>     > > +  result->base.ip_discovery_version =
>     adev->ip_versions[SDMA0_HWIP][0];
>     > >                       break;
>     > >               case AMD_IP_BLOCK_TYPE_UVD:
>     > >               case AMD_IP_BLOCK_TYPE_VCN:
>     > >               case AMD_IP_BLOCK_TYPE_JPEG:
>     > > -  result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
>     > > +  result->base.ip_discovery_version =
>     adev->ip_versions[UVD_HWIP][0];
>     > >                       break;
>     > >               case AMD_IP_BLOCK_TYPE_VCE:
>     > > -  result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
>     > > +  result->base.ip_discovery_version =
>     adev->ip_versions[VCE_HWIP][0];
>     > >                       break;
>     > >               default:
>     > > -  result->ip_discovery_version = 0;
>     > > +  result->base.ip_discovery_version = 0;
>     > >                       break;
>     > >               }
>     > >       } else {
>     > > -             result->ip_discovery_version = 0;
>     > > +  result->base.ip_discovery_version = 0;
>     > >       }
>     > > -     result->capabilities_flags = 0;
>     > > -     result->available_rings = (1 << num_rings) - 1;
>     > > -     result->ib_start_alignment = ib_start_alignment;
>     > > -     result->ib_size_alignment = ib_size_alignment;
>     > > +     result->base.capabilities_flags = 0;
>     > > +     result->base.available_rings = (1 << num_rings) - 1;
>     > > +     result->base.ib_start_alignment = ib_start_alignment;
>     > > +     result->base.ib_size_alignment = ib_size_alignment;
>     > >       return 0;
>     > >   }
>     > >
>     > > @@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device
>     *dev, void *data, struct drm_file *filp)
>     > >               }
>     > >               return copy_to_user(out, &ui32, min(size, 4u)) ?
>     -EFAULT : 0;
>     > >       case AMDGPU_INFO_HW_IP_INFO: {
>     > > -             struct drm_amdgpu_info_hw_ip ip = {};
>     > > +             struct drm_amdgpu_info_hw_ip2 ip = {};
>     > >               int ret;
>     > >
>     > >               ret = amdgpu_hw_ip_info(adev, info, &ip);
>     > > diff --git a/include/uapi/drm/amdgpu_drm.h
>     b/include/uapi/drm/amdgpu_drm.h
>     > > index b6eb90df5d05..6b9e35b6f747 100644
>     > > --- a/include/uapi/drm/amdgpu_drm.h
>     > > +++ b/include/uapi/drm/amdgpu_drm.h
>     > > @@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device {
>     > >       __u32 enabled_rb_pipes_mask_hi;
>     > >   };
>     > >
>     > > +/* The size of this struct cannot be increased for
>     compatibility reasons, use
>     > > + * struct drm_amdgpu_info_hw_ip2 instead.
>     > > + */
>     > >   struct drm_amdgpu_info_hw_ip {
>     > >       /** Version of h/w IP */
>     > >       __u32  hw_ip_version_major;
>     > > @@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip {
>     > >       __u32  ip_discovery_version;
>     > >   };
>     > >
>     > > +struct drm_amdgpu_info_hw_ip2 {
>     > > +     /** Previous version of the struct */
>     > > +     struct drm_amdgpu_info_hw_ip  base;
>     > > +     /** The maximum number of IBs one can submit in a single
>     submission
>     > > +      * ioctl. (When using gang submit: this is per IP type)
>     > > +      */
>     > > +     __u32  max_ibs;
>     > > +     /** padding to 64bit for arch differences */
>     > > +     __u32  pad;
>     > > +};
>     > > +
>     > >   struct drm_amdgpu_info_num_handles {
>     > >       /** Max handles as supported by firmware for UVD */
>     > >       __u32  uvd_max_handles;
>     >
>

[-- Attachment #2: Type: text/html, Size: 22389 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2)
  2023-04-11  9:22         ` Christian König
@ 2023-04-11 11:06           ` Timur Kristóf
  0 siblings, 0 replies; 10+ messages in thread
From: Timur Kristóf @ 2023-04-11 11:06 UTC (permalink / raw)
  To: Christian König
  Cc: Pelloux-prayer, Pierre-eric, Marek Olšák, amd-gfx list,
	Bas Nieuwenhuizen, Deucher, Alexander, David Airlie

[-- Attachment #1: Type: text/plain, Size: 11841 bytes --]

Christian König <christian.koenig@amd.com> ezt írta (időpont: 2023. ápr.
11., Ke 11:23):

> Am 11.04.23 um 11:06 schrieb Timur Kristóf:
>
>
>
> Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> ezt írta (időpont: 2023. ápr.
> 11., Ke 10:25):
>
>> On Tue, Apr 11, 2023 at 10:10 AM Christian König
>> <christian.koenig@amd.com> wrote:
>> >
>> > Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
>> > > We need to introduce a new version of the info struct because
>> > > libdrm_amdgpu forgot any versioning info in amdgpu_query_hw_ip_info
>> > > so the mesa<->libdrm_amdgpu interface can't handle increasing the
>> > > size.
>> > >
>> > > This info would be used by radv to figure out when we need to
>> > > split a submission into multiple submissions. radv currently has
>> > > a limit of 192 which seems to work for most gfx submissions, but
>> > > is way too high for e.g. compute or sdma.
>> >
>> > Why do you need so many IBs in the first place?
>> >
>> > You can use sub-IBs since R600 and newer hw even supports a JUMP command
>> > to chain IBs together IIRC.
>>
>> Couple of reasons:
>>
>> 1) We can't reliably use sub-IBs (both because on GFX6-7 there are
>> indirect draw commands that cannot be used in sub-IBs and because the
>> Vulkan API exposes sub-IBs, so we can have the primary IBs be sub-IBs
>> already)
>>
>
> Furthermore, only the GFX queue supports the "IB2" packet that is used to
> implement sub-IBs.
>
> (The same packet hangs the compute queue and is documented as not working
> in the PAL source code. Other queues don't seem to have a packet for this
> purpose.)
>
> 2) We believed GFX6 may not support chaining. (Then again, it turns
>> out the GFX7+ packet may just work on GFX6?
>> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22406)
>>
>
> I was actually quite surprised when I found this out. Mesa developers seem
> to have believed that this is not possible on GFX6. I'd love to get some
> more context on this, did it always work or was it added in a FW update?
>
> 3) Chaining doesn't work if the IB may be in flight multiple times in
>> a different sequence.
>>
>
> Additionally, chaining also doesn't work on any queue other than GFX and
> compute. As far as we know, SDMA and the various encoder/decoder queues
> don't have a packet for chaining.
>
> Christian, please let us know if we are wrong about this.
>
>
> I'm not an expert for this either. Marek and Pierre-eric know more about
> that stuff than me. On the other hand I could ping the firmware people as
> well if our UMD guys can't answer that.
>

Thanks, I'm looking forward to hearing more about it.


> It's just that last time we discussed this internally somebody from the
> PAL team commented that this isn't an issue any more because we don't need
> that many IBs any more.
>

It is true that the typical game would only submit a few IBs: either 2 (1
preamble + 1 IB that has all the command buffers chained) or 8 for gang
submit (4 preambles, 1 ACE IB, 1 GFX IB, 2 postambles), but we would like
to ensure that the less common use cases that can't use chaining work well
too.

We also have an increased interest in using the other HW queues in RADV
these days: video encode/decode queues for the Vulkan Video API, and of
course SDMA which we are considering for transfer queues in the future.


> libdrm defined that you shouldn't use more than 4 IBs in a CS, on the
> other hand we dropped checking that long ago and exposing the numbers of
> IBs the UMD can use is just good design.
>

Yes, I agree.

Can we remove that old (and confusing) define from libdrm? Or does anyone
still depend on that?


> Bas what do you think of adding an AMDGPU_INFO_MAX_IBS query instead of
> extending the drm_amdgpu_info_hw_ip structure?
>
> Background is that the drm_amdgpu_info_hw_ip structure is actually not
> meant to be used for sw parameters (which the maximum number of IBs is) and
> we wouldn't need to dance around issues with query size parameters because
> that function takes the size as parameter.
>

Sounds good to me but I'll defer to Bas's judgement on this.




> Regards,
> Christian.
>
>
>
> Best regards,
> Timur
>
>
>
>
>> We try to chain when we can but (2) and (3) are cases where we
>> fallback to submitting multiple IBs.
>>
>> >
>> > For the kernel UAPI you only need separate IBs if you have separate
>> > properties on them, E.g. preamble or submitting to a different engine.
>> >
>> > Everything else just needs additional CPU overhead in the IOCTL.
>> >
>> > Regards,
>> > Christian.
>> >
>> > >
>> > > Because the kernel handling is just fine we can just use the v2
>> > > everywhere and have the return_size do the versioning for us,
>> > > with userspace interpreting 0 as unknown.
>> > >
>> > > Userspace implementation:
>> > > https://gitlab.freedesktop.org/bnieuwenhuizen/drm/-/tree/ib-rejection
>> > >
>> https://gitlab.freedesktop.org/bnieuwenhuizen/mesa/-/tree/ib-rejection
>> > >
>> > > v2: Use base member in the new struct.
>> > >
>> > > Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
>> > > Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>> > > Cc: David Airlie <airlied@gmail.com>
>> > > ---
>> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 31
>> ++++++++++++++-----------
>> > >   include/uapi/drm/amdgpu_drm.h           | 14 +++++++++++
>> > >   2 files changed, 31 insertions(+), 14 deletions(-)
>> > >
>> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> > > index 89689b940493..5b575e1aef1a 100644
>> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> > > @@ -360,7 +360,7 @@ static int amdgpu_firmware_info(struct
>> drm_amdgpu_info_firmware *fw_info,
>> > >
>> > >   static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>> > >                            struct drm_amdgpu_info *info,
>> > > -                          struct drm_amdgpu_info_hw_ip *result)
>> > > +                          struct drm_amdgpu_info_hw_ip2 *result)
>> > >   {
>> > >       uint32_t ib_start_alignment = 0;
>> > >       uint32_t ib_size_alignment = 0;
>> > > @@ -431,6 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> *adev,
>> > >               return -EINVAL;
>> > >       }
>> > >
>> > > +     result->max_ibs = UINT_MAX;
>> > >       for (i = 0; i < adev->num_rings; ++i) {
>> > >               /* Note that this uses that ring types alias the
>> equivalent
>> > >                * HW IP exposes to userspace.
>> > > @@ -438,6 +439,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device
>> *adev,
>> > >               if (adev->rings[i]->funcs->type ==
>> info->query_hw_ip.type &&
>> > >                   adev->rings[i]->sched.ready) {
>> > >                       ++num_rings;
>> > > +                     result->max_ibs = min(result->max_ibs,
>> > > +                                           adev->rings[i]->max_ibs);
>> > >               }
>> > >       }
>> > >
>> > > @@ -452,36 +455,36 @@ static int amdgpu_hw_ip_info(struct
>> amdgpu_device *adev,
>> > >       num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type],
>> > >                       num_rings);
>> > >
>> > > -     result->hw_ip_version_major = adev->ip_blocks[i].version->major;
>> > > -     result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
>> > > +     result->base.hw_ip_version_major =
>> adev->ip_blocks[i].version->major;
>> > > +     result->base.hw_ip_version_minor =
>> adev->ip_blocks[i].version->minor;
>> > >
>> > >       if (adev->asic_type >= CHIP_VEGA10) {
>> > >               switch (type) {
>> > >               case AMD_IP_BLOCK_TYPE_GFX:
>> > > -                     result->ip_discovery_version =
>> adev->ip_versions[GC_HWIP][0];
>> > > +                     result->base.ip_discovery_version =
>> adev->ip_versions[GC_HWIP][0];
>> > >                       break;
>> > >               case AMD_IP_BLOCK_TYPE_SDMA:
>> > > -                     result->ip_discovery_version =
>> adev->ip_versions[SDMA0_HWIP][0];
>> > > +                     result->base.ip_discovery_version =
>> adev->ip_versions[SDMA0_HWIP][0];
>> > >                       break;
>> > >               case AMD_IP_BLOCK_TYPE_UVD:
>> > >               case AMD_IP_BLOCK_TYPE_VCN:
>> > >               case AMD_IP_BLOCK_TYPE_JPEG:
>> > > -                     result->ip_discovery_version =
>> adev->ip_versions[UVD_HWIP][0];
>> > > +                     result->base.ip_discovery_version =
>> adev->ip_versions[UVD_HWIP][0];
>> > >                       break;
>> > >               case AMD_IP_BLOCK_TYPE_VCE:
>> > > -                     result->ip_discovery_version =
>> adev->ip_versions[VCE_HWIP][0];
>> > > +                     result->base.ip_discovery_version =
>> adev->ip_versions[VCE_HWIP][0];
>> > >                       break;
>> > >               default:
>> > > -                     result->ip_discovery_version = 0;
>> > > +                     result->base.ip_discovery_version = 0;
>> > >                       break;
>> > >               }
>> > >       } else {
>> > > -             result->ip_discovery_version = 0;
>> > > +             result->base.ip_discovery_version = 0;
>> > >       }
>> > > -     result->capabilities_flags = 0;
>> > > -     result->available_rings = (1 << num_rings) - 1;
>> > > -     result->ib_start_alignment = ib_start_alignment;
>> > > -     result->ib_size_alignment = ib_size_alignment;
>> > > +     result->base.capabilities_flags = 0;
>> > > +     result->base.available_rings = (1 << num_rings) - 1;
>> > > +     result->base.ib_start_alignment = ib_start_alignment;
>> > > +     result->base.ib_size_alignment = ib_size_alignment;
>> > >       return 0;
>> > >   }
>> > >
>> > > @@ -536,7 +539,7 @@ int amdgpu_info_ioctl(struct drm_device *dev,
>> void *data, struct drm_file *filp)
>> > >               }
>> > >               return copy_to_user(out, &ui32, min(size, 4u)) ?
>> -EFAULT : 0;
>> > >       case AMDGPU_INFO_HW_IP_INFO: {
>> > > -             struct drm_amdgpu_info_hw_ip ip = {};
>> > > +             struct drm_amdgpu_info_hw_ip2 ip = {};
>> > >               int ret;
>> > >
>> > >               ret = amdgpu_hw_ip_info(adev, info, &ip);
>> > > diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h
>> > > index b6eb90df5d05..6b9e35b6f747 100644
>> > > --- a/include/uapi/drm/amdgpu_drm.h
>> > > +++ b/include/uapi/drm/amdgpu_drm.h
>> > > @@ -1128,6 +1128,9 @@ struct drm_amdgpu_info_device {
>> > >       __u32 enabled_rb_pipes_mask_hi;
>> > >   };
>> > >
>> > > +/* The size of this struct cannot be increased for compatibility
>> reasons, use
>> > > + * struct drm_amdgpu_info_hw_ip2 instead.
>> > > + */
>> > >   struct drm_amdgpu_info_hw_ip {
>> > >       /** Version of h/w IP */
>> > >       __u32  hw_ip_version_major;
>> > > @@ -1144,6 +1147,17 @@ struct drm_amdgpu_info_hw_ip {
>> > >       __u32  ip_discovery_version;
>> > >   };
>> > >
>> > > +struct drm_amdgpu_info_hw_ip2 {
>> > > +     /** Previous version of the struct */
>> > > +     struct drm_amdgpu_info_hw_ip  base;
>> > > +     /** The maximum number of IBs one can submit in a single
>> submission
>> > > +      * ioctl. (When using gang submit: this is per IP type)
>> > > +      */
>> > > +     __u32  max_ibs;
>> > > +     /** padding to 64bit for arch differences */
>> > > +     __u32  pad;
>> > > +};
>> > > +
>> > >   struct drm_amdgpu_info_num_handles {
>> > >       /** Max handles as supported by firmware for UVD */
>> > >       __u32  uvd_max_handles;
>> >
>>
>
>

[-- Attachment #2: Type: text/html, Size: 22042 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs.
  2023-04-09 18:59 [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs Bas Nieuwenhuizen
  2023-04-09 18:59 ` [PATCH v2 2/3] drm/amdgpu: Simplify amdgpu_hw_ip_info Bas Nieuwenhuizen
  2023-04-09 18:59 ` [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2) Bas Nieuwenhuizen
@ 2023-04-11 12:34 ` Christian König
  2 siblings, 0 replies; 10+ messages in thread
From: Christian König @ 2023-04-11 12:34 UTC (permalink / raw)
  To: Bas Nieuwenhuizen, amd-gfx
  Cc: alexander.deucher, christian.koenig, maraeo, timur.kristof

Am 09.04.23 um 20:59 schrieb Bas Nieuwenhuizen:
> Instead of failing somewhere in the scheduler after the
> ioctl has already succeeded.
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2498
> Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 9 +++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 5 +++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
>   3 files changed, 15 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 995ee9ff65c9..8db6618b9049 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -113,6 +113,15 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	if (!entity)
>   		return 0;
>   
> +	if (entity->rq && entity->rq->sched) {

I've just double checked and this stuff here is not necessary 
initialized yet. We need to move this a bit around.

Probably best place for the check is in amdgpu_cs_submit() after calling 
drm_sched_job_arm().

Alternatively we could go the other way around. Instead of keeping the 
max_ibs in the ring we keep a max_ibs per ip_type in adev and make sure 
that each ring can handle at least those during initialization.

Then we can check if the num_ibs are valid in amdgpu_cs_p1_ib() when we 
count them.

Thinking more about it the later is probably the better variant.

Regards,
Christian.

> +		struct amdgpu_ring *ring = to_amdgpu_ring(entity->rq->sched);
> +
> +		if (num_ibs > ring->max_ibs) {
> +			DRM_DEBUG("Rejected a submission with too many IBs");
> +			return -EINVAL;
> +		}
> +	}
> +
>   	return drm_sched_job_init(&(*job)->base, entity, owner);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index dc474b809604..933cb95a0e30 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -324,6 +324,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>   	ring->max_dw = max_dw;
>   	ring->hw_prio = hw_prio;
>   
> +	if (ring->funcs->emit_ib_size) {
> +		ring->max_ibs =
> +			(max_dw - ring->funcs->emit_frame_size) / ring->funcs->emit_ib_size;
> +	}
> +
>   	if (!ring->no_scheduler) {
>   		hw_ip = ring->funcs->type;
>   		num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 3989e755a5b4..7a295d80728b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -245,6 +245,7 @@ struct amdgpu_ring {
>   	unsigned		ring_size;
>   	unsigned		max_dw;
>   	int			count_dw;
> +	unsigned		max_ibs;
>   	uint64_t		gpu_addr;
>   	uint64_t		ptr_mask;
>   	uint32_t		buf_mask;


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-04-11 13:49 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-09 18:59 [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs Bas Nieuwenhuizen
2023-04-09 18:59 ` [PATCH v2 2/3] drm/amdgpu: Simplify amdgpu_hw_ip_info Bas Nieuwenhuizen
2023-04-11  8:05   ` Christian König
2023-04-09 18:59 ` [PATCH v2 3/3] drm/amdgpu: Add support for querying the max ibs in a submission. (v2) Bas Nieuwenhuizen
2023-04-11  8:10   ` Christian König
2023-04-11  8:22     ` Bas Nieuwenhuizen
2023-04-11  9:06       ` Timur Kristóf
2023-04-11  9:22         ` Christian König
2023-04-11 11:06           ` Timur Kristóf
2023-04-11 12:34 ` [PATCH v2 1/3] drm/amdgpu: Reject submissions with too many IBs Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.