All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Koenig, Christian" <Christian.Koenig-5C7GfCeVMHo@public.gmane.org>
To: "Tuikov, Luben" <Luben.Tuikov-5C7GfCeVMHo@public.gmane.org>,
	"amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org"
	<amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
Cc: "Deucher, Alexander" <Alexander.Deucher-5C7GfCeVMHo@public.gmane.org>
Subject: Re: [PATCH] drm/amdgpu: simplify padding calculations
Date: Sat, 26 Oct 2019 12:05:43 +0000	[thread overview]
Message-ID: <db4d6ece-2cec-a465-0639-d75142621dda@amd.com> (raw)
In-Reply-To: <ed713bfe-2032-b56a-3e29-f61f48aeeb3b-5C7GfCeVMHo@public.gmane.org>

Am 25.10.19 um 23:51 schrieb Tuikov, Luben:
> On 2019-10-25 2:54 a.m., Koenig, Christian wrote:
>> Am 25.10.19 um 01:44 schrieb Tuikov, Luben:
>>> Simplify padding calculations.
>>>
>>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++-----
>>>    5 files changed, 17 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> index c45304f1047c..1ea9e18d6f08 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	u32 extra_bits = vmid & 0xf;
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
>>>    	amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
>>> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> index a10175838013..d340f179401a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> index 5f4e2c616241..5c3c310188b6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index 45bd538ba97e..7c71c88e38a4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> index 0c41b4fdc58b..67ede9e4df01 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> @@ -382,8 +382,12 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
>>>    
>>> -	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	/* An IB packet must end on a 8 DW boundary--the next dword must
>>> +	 * be on a 8-dword boundary. Our IB below is 6 dwords long,
>> That should probably read "our IB command" or "our IB packet".
> Done.
>
>>> +	 * thus add x number of NOPs, such that
>>> +	 * wptr + 6 + x = 8k, k >= 0.
>> Maybe write (wrptr + 6 + x) % 8 = 0 to make that more obvious.
> That's less obvious. In my mind I always translate it to
> a congruence expression. It's what I used to derive the algebraic
> simplifications of this patch.
>
> I'll add the highly cryptic "(wrptr + 6 + x) % 8 = 0" to make you happy,
> below the algebraic expression already there.

The problem I see here is that people who are used to GPUs will think 
that k is a float and not immediately get what's meant here.

>>> +	 */
>>> +	sdma_v5_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>> Spaces between operators please.
> Yeah, I wanted to put an emphasis on the AND-op, but I'll add them.

What would be really nice to have is to keep the 10 as number of DW in 
the IB packet around.

Maybe add an "static inline amdgpu_ring_unalign(ring, 10, 8)" helper or 
something like that, the insert_nop is a ring callback anyway IIRC.

Regards,
Christian.

>
> Regards,
> Luben
>
>
>> Apart form that looks good to me,
>> Christian.
>>
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -1086,10 +1090,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
>>>    }
>>>    
>>>    /**
>>> - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
>>> - *
>>> + * sdma_v5_0_ring_pad_ib - pad the IB
>>>     * @ib: indirect buffer to fill with padding
>>>     *
>>> + * Pad the IB with NOPs to a boundary multiple of 8.
>>>     */
>>>    static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
>>>    {
>>> @@ -1097,7 +1101,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 0x7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

WARNING: multiple messages have this Message-ID (diff)
From: "Koenig, Christian" <Christian.Koenig@amd.com>
To: "Tuikov, Luben" <Luben.Tuikov@amd.com>,
	"amd-gfx@lists.freedesktop.org" <amd-gfx@lists.freedesktop.org>
Cc: "Deucher, Alexander" <Alexander.Deucher@amd.com>
Subject: Re: [PATCH] drm/amdgpu: simplify padding calculations
Date: Sat, 26 Oct 2019 12:05:43 +0000	[thread overview]
Message-ID: <db4d6ece-2cec-a465-0639-d75142621dda@amd.com> (raw)
Message-ID: <20191026120543._Je6c3OjBg3K18KdwnyZ9RUDrDw9DVTxudyaGvr2yHs@z> (raw)
In-Reply-To: <ed713bfe-2032-b56a-3e29-f61f48aeeb3b@amd.com>

Am 25.10.19 um 23:51 schrieb Tuikov, Luben:
> On 2019-10-25 2:54 a.m., Koenig, Christian wrote:
>> Am 25.10.19 um 01:44 schrieb Tuikov, Luben:
>>> Simplify padding calculations.
>>>
>>> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/cik_sdma.c  |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  4 ++--
>>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++-----
>>>    5 files changed, 17 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> index c45304f1047c..1ea9e18d6f08 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
>>> @@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	u32 extra_bits = vmid & 0xf;
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	cik_sdma_ring_insert_nop(ring, (4-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
>>>    	amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
>>> @@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> index a10175838013..d340f179401a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
>>> @@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	sdma_v2_4_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> index 5f4e2c616241..5c3c310188b6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
>>> @@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	sdma_v3_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> index 45bd538ba97e..7c71c88e38a4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
>>> @@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    
>>>    	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	sdma_v4_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -1580,7 +1580,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> index 0c41b4fdc58b..67ede9e4df01 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>>> @@ -382,8 +382,12 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
>>>    	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>>>    	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
>>>    
>>> -	/* IB packet must end on a 8 DW boundary */
>>> -	sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8);
>>> +	/* An IB packet must end on a 8 DW boundary--the next dword must
>>> +	 * be on a 8-dword boundary. Our IB below is 6 dwords long,
>> That should probably read "our IB command" or "our IB packet".
> Done.
>
>>> +	 * thus add x number of NOPs, such that
>>> +	 * wptr + 6 + x = 8k, k >= 0.
>> Maybe write (wrptr + 6 + x) % 8 = 0 to make that more obvious.
> That's less obvious. In my mind I always translate it to
> a congruence expression. It's what I used to derive the algebraic
> simplifications of this patch.
>
> I'll add the highly cryptic "(wrptr + 6 + x) % 8 = 0" to make you happy,
> below the algebraic expression already there.

The problem I see here is that people who are used to GPUs will think 
that k is a float and not immediately get what's meant here.

>>> +	 */
>>> +	sdma_v5_0_ring_insert_nop(ring, (2-lower_32_bits(ring->wptr)) & 7);
>> Spaces between operators please.
> Yeah, I wanted to put an emphasis on the AND-op, but I'll add them.

What would be really nice to have is to keep the 10 as number of DW in 
the IB packet around.

Maybe add an "static inline amdgpu_ring_unalign(ring, 10, 8)" helper or 
something like that, the insert_nop is a ring callback anyway IIRC.

Regards,
Christian.

>
> Regards,
> Luben
>
>
>> Apart form that looks good to me,
>> Christian.
>>
>>>    
>>>    	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>>>    			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
>>> @@ -1086,10 +1090,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
>>>    }
>>>    
>>>    /**
>>> - * sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw
>>> - *
>>> + * sdma_v5_0_ring_pad_ib - pad the IB
>>>     * @ib: indirect buffer to fill with padding
>>>     *
>>> + * Pad the IB with NOPs to a boundary multiple of 8.
>>>     */
>>>    static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
>>>    {
>>> @@ -1097,7 +1101,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
>>>    	u32 pad_count;
>>>    	int i;
>>>    
>>> -	pad_count = (8 - (ib->length_dw & 0x7)) % 8;
>>> +	pad_count = (-ib->length_dw) & 0x7;
>>>    	for (i = 0; i < pad_count; i++)
>>>    		if (sdma && sdma->burst_nop && (i == 0))
>>>    			ib->ptr[ib->length_dw++] =

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2019-10-26 12:05 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-24 23:44 [PATCH] drm/amdgpu: simplify padding calculations Tuikov, Luben
2019-10-24 23:44 ` Tuikov, Luben
     [not found] ` <20191024234444.4375-1-luben.tuikov-5C7GfCeVMHo@public.gmane.org>
2019-10-25  6:54   ` Koenig, Christian
2019-10-25  6:54     ` Koenig, Christian
     [not found]     ` <45e07677-717a-2c01-369e-1dbd19ce8e74-5C7GfCeVMHo@public.gmane.org>
2019-10-25 21:51       ` Tuikov, Luben
2019-10-25 21:51         ` Tuikov, Luben
     [not found]         ` <ed713bfe-2032-b56a-3e29-f61f48aeeb3b-5C7GfCeVMHo@public.gmane.org>
2019-10-25 22:30           ` Tuikov, Luben
2019-10-25 22:30             ` Tuikov, Luben
2019-10-26 12:05           ` Koenig, Christian [this message]
2019-10-26 12:05             ` Koenig, Christian
     [not found]             ` <db4d6ece-2cec-a465-0639-d75142621dda-5C7GfCeVMHo@public.gmane.org>
2019-10-27 21:46               ` Tuikov, Luben
2019-10-27 21:46                 ` Tuikov, Luben
2019-10-25 22:41       ` [PATCH] drm/amdgpu: simplify padding calculations (v2) Tuikov, Luben
2019-10-25 22:41         ` Tuikov, Luben
     [not found]         ` <20191025224116.12470-1-luben.tuikov-5C7GfCeVMHo@public.gmane.org>
2019-10-28 11:01           ` Koenig, Christian
2019-10-28 11:01             ` Koenig, Christian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=db4d6ece-2cec-a465-0639-d75142621dda@amd.com \
    --to=christian.koenig-5c7gfcevmho@public.gmane.org \
    --cc=Alexander.Deucher-5C7GfCeVMHo@public.gmane.org \
    --cc=Luben.Tuikov-5C7GfCeVMHo@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.