* [PATCH] drm/amdgpu: simplify padding calculations (v3)
@ 2019-12-18 23:12 Luben Tuikov
2019-12-18 23:16 ` Luben Tuikov
0 siblings, 1 reply; 4+ messages in thread
From: Luben Tuikov @ 2019-12-18 23:12 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander Deucher, Luben Tuikov, Christian Koenig
Simplify padding calculations.
1. Taking the remainder of a binary value when
the divisor is a power of two, such as,
a % 2^n is identical to, a & (2^n - 1) in base-2
arithmetic, and so expressions like this:
(12 - (lower_32_bits(ring->wptr) & 7)) % 8
are superflous. And can be reduced to a single
remainder-taking operation.
2. Subtracting the remainder modulo 8 out of 12
and then again taking the remainder modulo 8,
yields the same result as simply subtracting
the value out of 4 and then taking the remainder.
This is true because 4 belongs to the congruence
(residue) class {4 + 8*k}, k in Z. (So using,
{..., -12, -4, 4, 12, 20, ...}, would yield
the same final result.
So the above expression, becomes,
(4 - lower_32_bits(ring->wptr)) & 7
3. Now since 8 is part of the conguence class
{0 + 8*k}, k in Z, and from 1) and 2) above, then,
(8 - (ib->length_dw & 0x7)) % 8
becomes, simply,
(-ib->length_dw) & 7.
This patch implements all of the above in this code.
v2: Comment update and spacing.
v3: Add 1, 2, 3, above, in this commit message.
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
---
drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 ++++++++++++-----
5 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 1f22a8d0f7f3..4274ccf765de 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vmid & 0xf;
/* IB packet must end on a 8 DW boundary */
- cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 606b621145a1..fd7fa6082563 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index a559573ec8fd..4a8a7f0f3a9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index bd9ed33bab43..c69df0cb21ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */
- sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1579,7 +1579,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 119364293cec..3912937f878f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
- /* IB packet must end on a 8 DW boundary */
- sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
+ /* An IB packet must end on a 8 DW boundary--the next dword
+ * must be on a 8-dword boundary. Our IB packet below is 6
+ * dwords long, thus add x number of NOPs, such that, in
+ * modular arithmetic,
+ * wptr + 6 + x = 8k, k >= 0, which in C is,
+ * (wptr + 6 + x) % 8 = 0.
+ * The expression below, is a solution of x.
+ */
+ sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
@@ -1076,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
}
/**
- * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
- *
+ * sdma_v5_0_ring_pad_ib - pad the IB
* @ib: indirect buffer to fill with padding
*
+ * Pad the IB with NOPs to a boundary multiple of 8.
*/
static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
@@ -1087,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count;
int i;
- pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ pad_count = (-ib->length_dw) & 0x7;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
--
2.24.0.390.g083378cc35
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: simplify padding calculations (v3)
2019-12-18 23:12 [PATCH] drm/amdgpu: simplify padding calculations (v3) Luben Tuikov
@ 2019-12-18 23:16 ` Luben Tuikov
2019-12-19 2:59 ` Deucher, Alexander
0 siblings, 1 reply; 4+ messages in thread
From: Luben Tuikov @ 2019-12-18 23:16 UTC (permalink / raw)
To: amd-gfx; +Cc: Alexander Deucher, Christian Koenig
This patch seems to have been dropped after posting
and reposting. I'm not sure why it keeps being dropped.
In v3, I added an explanation in the commit text
below the original commit text of the single sentence
of "Simplify padding calculations." Identical diffstat
as v2.
Regards,
Luben
On 2019-12-18 6:12 p.m., Luben Tuikov wrote:
> Simplify padding calculations.
>
> 1. Taking the remainder of a binary value when
> the divisor is a power of two, such as,
> a % 2^n is identical to, a & (2^n - 1) in base-2
> arithmetic, and so expressions like this:
>
> (12 - (lower_32_bits(ring->wptr) & 7)) % 8
>
> are superflous. And can be reduced to a single
> remainder-taking operation.
>
> 2. Subtracting the remainder modulo 8 out of 12
> and then again taking the remainder modulo 8,
> yields the same result as simply subtracting
> the value out of 4 and then taking the remainder.
> This is true because 4 belongs to the congruence
> (residue) class {4 + 8*k}, k in Z. (So using,
> {..., -12, -4, 4, 12, 20, ...}, would yield
> the same final result.
>
> So the above expression, becomes,
>
> (4 - lower_32_bits(ring->wptr)) & 7
>
> 3. Now since 8 is part of the conguence class
> {0 + 8*k}, k in Z, and from 1) and 2) above, then,
>
> (8 - (ib->length_dw & 0x7)) % 8
>
> becomes, simply,
>
> (-ib->length_dw) & 7.
>
> This patch implements all of the above in this code.
>
> v2: Comment update and spacing.
> v3: Add 1, 2, 3, above, in this commit message.
>
> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 ++++++++++++-----
> 5 files changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index 1f22a8d0f7f3..4274ccf765de 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
> u32 extra_bits = vmid & 0xf;
>
> /* IB packet must end on a 8 DW boundary */
> - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
> amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 606b621145a1..fd7fa6082563 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>
> /* IB packet must end on a 8 DW boundary */
> - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index a559573ec8fd..4a8a7f0f3a9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>
> /* IB packet must end on a 8 DW boundary */
> - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index bd9ed33bab43..c69df0cb21ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>
> /* IB packet must end on a 8 DW boundary */
> - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1579,7 +1579,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 119364293cec..3912937f878f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
> uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
>
> - /* IB packet must end on a 8 DW boundary */
> - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
> + /* An IB packet must end on a 8 DW boundary--the next dword
> + * must be on a 8-dword boundary. Our IB packet below is 6
> + * dwords long, thus add x number of NOPs, such that, in
> + * modular arithmetic,
> + * wptr + 6 + x = 8k, k >= 0, which in C is,
> + * (wptr + 6 + x) % 8 = 0.
> + * The expression below, is a solution of x.
> + */
> + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>
> amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1076,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
> }
>
> /**
> - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
> - *
> + * sdma_v5_0_ring_pad_ib - pad the IB
> * @ib: indirect buffer to fill with padding
> *
> + * Pad the IB with NOPs to a boundary multiple of 8.
> */
> static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
> {
> @@ -1087,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> u32 pad_count;
> int i;
>
> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> + pad_count = (-ib->length_dw) & 0x7;
> for (i = 0; i < pad_count; i++)
> if (sdma && sdma->burst_nop && (i == 0))
> ib->ptr[ib->length_dw++] =
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH] drm/amdgpu: simplify padding calculations (v3)
2019-12-18 23:16 ` Luben Tuikov
@ 2019-12-19 2:59 ` Deucher, Alexander
2019-12-19 3:13 ` Luben Tuikov
0 siblings, 1 reply; 4+ messages in thread
From: Deucher, Alexander @ 2019-12-19 2:59 UTC (permalink / raw)
To: Tuikov, Luben, amd-gfx; +Cc: Koenig, Christian
> -----Original Message-----
> From: Tuikov, Luben <Luben.Tuikov@amd.com>
> Sent: Wednesday, December 18, 2019 6:17 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>
> Subject: Re: [PATCH] drm/amdgpu: simplify padding calculations (v3)
>
> This patch seems to have been dropped after posting and reposting. I'm not
> sure why it keeps being dropped.
Dropped from what? Once it's been reviewed, add the RBs and go ahead and push it to amd-staging-drm-next.
Alex
>
> In v3, I added an explanation in the commit text below the original commit
> text of the single sentence of "Simplify padding calculations." Identical
> diffstat as v2.
>
> Regards,
> Luben
>
> On 2019-12-18 6:12 p.m., Luben Tuikov wrote:
> > Simplify padding calculations.
> >
> > 1. Taking the remainder of a binary value when the divisor is a power
> > of two, such as, a % 2^n is identical to, a & (2^n - 1) in base-2
> > arithmetic, and so expressions like this:
> >
> > (12 - (lower_32_bits(ring->wptr) & 7)) % 8
> >
> > are superflous. And can be reduced to a single remainder-taking
> > operation.
> >
> > 2. Subtracting the remainder modulo 8 out of 12 and then again taking
> > the remainder modulo 8, yields the same result as simply subtracting
> > the value out of 4 and then taking the remainder.
> > This is true because 4 belongs to the congruence
> > (residue) class {4 + 8*k}, k in Z. (So using, {..., -12, -4, 4, 12,
> > 20, ...}, would yield the same final result.
> >
> > So the above expression, becomes,
> >
> > (4 - lower_32_bits(ring->wptr)) & 7
> >
> > 3. Now since 8 is part of the conguence class
> > {0 + 8*k}, k in Z, and from 1) and 2) above, then,
> >
> > (8 - (ib->length_dw & 0x7)) % 8
> >
> > becomes, simply,
> >
> > (-ib->length_dw) & 7.
> >
> > This patch implements all of the above in this code.
> >
> > v2: Comment update and spacing.
> > v3: Add 1, 2, 3, above, in this commit message.
> >
> > Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++--
> > drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
> > drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++--
> > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
> > drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 ++++++++++++-----
> > 5 files changed, 20 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > index 1f22a8d0f7f3..4274ccf765de 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> > @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct
> amdgpu_ring *ring,
> > u32 extra_bits = vmid & 0xf;
> >
> > /* IB packet must end on a 8 DW boundary */
> > - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) &
> 7)) % 8);
> > + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
> >
> > amdgpu_ring_write(ring,
> SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
> > amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must
> be
> > 32 byte aligned */ @@ -811,7 +811,7 @@ static void
> cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
> > u32 pad_count;
> > int i;
> >
> > - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> > + pad_count = (-ib->length_dw) & 7;
> > for (i = 0; i < pad_count; i++)
> > if (sdma && sdma->burst_nop && (i == 0))
> > ib->ptr[ib->length_dw++] =
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > index 606b621145a1..fd7fa6082563 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> > @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct
> amdgpu_ring *ring,
> > unsigned vmid = AMDGPU_JOB_GET_VMID(job);
> >
> > /* IB packet must end on a 8 DW boundary */
> > - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
> 7)) % 8);
> > + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
> > +7);
> >
> > amdgpu_ring_write(ring,
> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -750,7 +750,7 @@
> > static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct
> amdgpu_ib *ib
> > u32 pad_count;
> > int i;
> >
> > - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> > + pad_count = (-ib->length_dw) & 7;
> > for (i = 0; i < pad_count; i++)
> > if (sdma && sdma->burst_nop && (i == 0))
> > ib->ptr[ib->length_dw++] =
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > index a559573ec8fd..4a8a7f0f3a9c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> > @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct
> amdgpu_ring *ring,
> > unsigned vmid = AMDGPU_JOB_GET_VMID(job);
> >
> > /* IB packet must end on a 8 DW boundary */
> > - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
> 7)) % 8);
> > + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
> > +7);
> >
> > amdgpu_ring_write(ring,
> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1021,7 +1021,7
> > @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct
> amdgpu_ib *ib
> > u32 pad_count;
> > int i;
> >
> > - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> > + pad_count = (-ib->length_dw) & 7;
> > for (i = 0; i < pad_count; i++)
> > if (sdma && sdma->burst_nop && (i == 0))
> > ib->ptr[ib->length_dw++] =
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > index bd9ed33bab43..c69df0cb21ec 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> > @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct
> amdgpu_ring *ring,
> > unsigned vmid = AMDGPU_JOB_GET_VMID(job);
> >
> > /* IB packet must end on a 8 DW boundary */
> > - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
> 7)) % 8);
> > + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
> > +7);
> >
> > amdgpu_ring_write(ring,
> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1579,7 +1579,7
> > @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct
> amdgpu_ib *ib
> > u32 pad_count;
> > int i;
> >
> > - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> > + pad_count = (-ib->length_dw) & 7;
> > for (i = 0; i < pad_count; i++)
> > if (sdma && sdma->burst_nop && (i == 0))
> > ib->ptr[ib->length_dw++] =
> > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > index 119364293cec..3912937f878f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> > @@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct
> amdgpu_ring *ring,
> > unsigned vmid = AMDGPU_JOB_GET_VMID(job);
> > uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring,
> vmid);
> >
> > - /* IB packet must end on a 8 DW boundary */
> > - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
> 7)) % 8);
> > + /* An IB packet must end on a 8 DW boundary--the next dword
> > + * must be on a 8-dword boundary. Our IB packet below is 6
> > + * dwords long, thus add x number of NOPs, such that, in
> > + * modular arithmetic,
> > + * wptr + 6 + x = 8k, k >= 0, which in C is,
> > + * (wptr + 6 + x) % 8 = 0.
> > + * The expression below, is a solution of x.
> > + */
> > + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
> > +7);
> >
> > amdgpu_ring_write(ring,
> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
> > SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1076,10 +1083,10
> > @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, }
> >
> > /**
> > - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
> > - *
> > + * sdma_v5_0_ring_pad_ib - pad the IB
> > * @ib: indirect buffer to fill with padding
> > *
> > + * Pad the IB with NOPs to a boundary multiple of 8.
> > */
> > static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct
> > amdgpu_ib *ib) { @@ -1087,7 +1094,7 @@ static void
> > sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
> > u32 pad_count;
> > int i;
> >
> > - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
> > + pad_count = (-ib->length_dw) & 0x7;
> > for (i = 0; i < pad_count; i++)
> > if (sdma && sdma->burst_nop && (i == 0))
> > ib->ptr[ib->length_dw++] =
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] drm/amdgpu: simplify padding calculations (v3)
2019-12-19 2:59 ` Deucher, Alexander
@ 2019-12-19 3:13 ` Luben Tuikov
0 siblings, 0 replies; 4+ messages in thread
From: Luben Tuikov @ 2019-12-19 3:13 UTC (permalink / raw)
To: Deucher, Alexander, amd-gfx; +Cc: Koenig, Christian
On 2019-12-18 9:59 p.m., Deucher, Alexander wrote:
>> -----Original Message-----
>> From: Tuikov, Luben <Luben.Tuikov@amd.com>
>> Sent: Wednesday, December 18, 2019 6:17 PM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; Koenig, Christian
>> <Christian.Koenig@amd.com>
>> Subject: Re: [PATCH] drm/amdgpu: simplify padding calculations (v3)
>>
>> This patch seems to have been dropped after posting and reposting. I'm not
>> sure why it keeps being dropped.
>
> Dropped from what? Once it's been reviewed, add the RBs and go ahead and push it to amd-staging-drm-next.
Oh, thanks!
I seem to forget about this push to refs/for/amd-staging-drm-next.
I'll write it on a post-it note so I can remember.
Okay, so then that's what I need to do.
Regards,
Luben
>
> Alex
>
>>
>> In v3, I added an explanation in the commit text below the original commit
>> text of the single sentence of "Simplify padding calculations." Identical
>> diffstat as v2.
>>
>> Regards,
>> Luben
>>
>> On 2019-12-18 6:12 p.m., Luben Tuikov wrote:
>>> Simplify padding calculations.
>>>
>>> 1. Taking the remainder of a binary value when the divisor is a power
>>> of two, such as, a % 2^n is identical to, a & (2^n - 1) in base-2
>>> arithmetic, and so expressions like this:
>>>
>>> (12 - (lower_32_bits(ring->wptr) & 7)) % 8
>>>
>>> are superflous. And can be reduced to a single remainder-taking
>>> operation.
>>>
>>> 2. Subtracting the remainder modulo 8 out of 12 and then again taking
>>> the remainder modulo 8, yields the same result as simply subtracting
>>> the value out of 4 and then taking the remainder.
>>> This is true because 4 belongs to the congruence
>>> (residue) class {4 + 8*k}, k in Z. (So using, {..., -12, -4, 4, 12,
>>> 20, ...}, would yield the same final result.
>>>
>>> So the above expression, becomes,
>>>
>>> (4 - lower_32_bits(ring->wptr)) & 7
>>>
>>> 3. Now since 8 is part of the conguence class
>>> {0 + 8*k}, k in Z, and from 1) and 2) above, then,
>>>
>>> (8 - (ib->length_dw & 0x7)) % 8
>>>
>>> becomes, simply,
>>>
>>> (-ib->length_dw) & 7.
>>>
>>> This patch implements all of the above in this code.
>>>
>>> v2: Comment update and spacing.
>>> v3: Add 1, 2, 3, above, in this commit message.
>>>
>>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++--
>>> drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 4 ++--
>>> drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++--
>>> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++--
>>> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 17 ++++++++++++-----
>>> 5 files changed, 20 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> index 1f22a8d0f7f3..4274ccf765de 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct
>> amdgpu_ring *ring,
>>> u32 extra_bits = vmid & 0xf;
>>>
>>> /* IB packet must end on a 8 DW boundary */
>>> - cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) &
>> 7)) % 8);
>>> + cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
>>>
>>> amdgpu_ring_write(ring,
>> SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
>>> amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must
>> be
>>> 32 byte aligned */ @@ -811,7 +811,7 @@ static void
>> cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
>>> u32 pad_count;
>>> int i;
>>>
>>> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> + pad_count = (-ib->length_dw) & 7;
>>> for (i = 0; i < pad_count; i++)
>>> if (sdma && sdma->burst_nop && (i == 0))
>>> ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> index 606b621145a1..fd7fa6082563 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct
>> amdgpu_ring *ring,
>>> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>
>>> /* IB packet must end on a 8 DW boundary */
>>> - sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
>> 7)) % 8);
>>> + sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
>>> +7);
>>>
>>> amdgpu_ring_write(ring,
>> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -750,7 +750,7 @@
>>> static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct
>> amdgpu_ib *ib
>>> u32 pad_count;
>>> int i;
>>>
>>> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> + pad_count = (-ib->length_dw) & 7;
>>> for (i = 0; i < pad_count; i++)
>>> if (sdma && sdma->burst_nop && (i == 0))
>>> ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> index a559573ec8fd..4a8a7f0f3a9c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct
>> amdgpu_ring *ring,
>>> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>
>>> /* IB packet must end on a 8 DW boundary */
>>> - sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
>> 7)) % 8);
>>> + sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
>>> +7);
>>>
>>> amdgpu_ring_write(ring,
>> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1021,7 +1021,7
>>> @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct
>> amdgpu_ib *ib
>>> u32 pad_count;
>>> int i;
>>>
>>> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> + pad_count = (-ib->length_dw) & 7;
>>> for (i = 0; i < pad_count; i++)
>>> if (sdma && sdma->burst_nop && (i == 0))
>>> ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index bd9ed33bab43..c69df0cb21ec 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct
>> amdgpu_ring *ring,
>>> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>
>>> /* IB packet must end on a 8 DW boundary */
>>> - sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
>> 7)) % 8);
>>> + sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
>>> +7);
>>>
>>> amdgpu_ring_write(ring,
>> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1579,7 +1579,7
>>> @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct
>> amdgpu_ib *ib
>>> u32 pad_count;
>>> int i;
>>>
>>> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> + pad_count = (-ib->length_dw) & 7;
>>> for (i = 0; i < pad_count; i++)
>>> if (sdma && sdma->burst_nop && (i == 0))
>>> ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> index 119364293cec..3912937f878f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> @@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct
>> amdgpu_ring *ring,
>>> unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>> uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring,
>> vmid);
>>>
>>> - /* IB packet must end on a 8 DW boundary */
>>> - sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) &
>> 7)) % 8);
>>> + /* An IB packet must end on a 8 DW boundary--the next dword
>>> + * must be on a 8-dword boundary. Our IB packet below is 6
>>> + * dwords long, thus add x number of NOPs, such that, in
>>> + * modular arithmetic,
>>> + * wptr + 6 + x = 8k, k >= 0, which in C is,
>>> + * (wptr + 6 + x) % 8 = 0.
>>> + * The expression below, is a solution of x.
>>> + */
>>> + sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) &
>>> +7);
>>>
>>> amdgpu_ring_write(ring,
>> SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>> SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>> @@ -1076,10 +1083,10
>>> @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, }
>>>
>>> /**
>>> - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
>>> - *
>>> + * sdma_v5_0_ring_pad_ib - pad the IB
>>> * @ib: indirect buffer to fill with padding
>>> *
>>> + * Pad the IB with NOPs to a boundary multiple of 8.
>>> */
>>> static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct
>>> amdgpu_ib *ib) { @@ -1087,7 +1094,7 @@ static void
>>> sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>> u32 pad_count;
>>> int i;
>>>
>>> - pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> + pad_count = (-ib->length_dw) & 0x7;
>>> for (i = 0; i < pad_count; i++)
>>> if (sdma && sdma->burst_nop && (i == 0))
>>> ib->ptr[ib->length_dw++] =
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2019-12-19 3:13 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-18 23:12 [PATCH] drm/amdgpu: simplify padding calculations (v3) Luben Tuikov
2019-12-18 23:16 ` Luben Tuikov
2019-12-19 2:59 ` Deucher, Alexander
2019-12-19 3:13 ` Luben Tuikov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.