It really *is* a NOALLOC
feature. In other words there is
no latency improvement on reads
because the cache is always
checked, even with the noalloc
flag set.
The only thing it affects is
that misses not enter the cache
and so don't cause any
additional pressure on evicting
cache lines.
You might want to double check
with the hardware guys, but I'm
something like 95% sure that it
works this way.
Christian.
Am 11.05.22 um 09:22
schrieb Marek Olšák:
Bypass means
that the contents of the
cache are ignored, which
decreases latency at the
cost of no coherency
between bypassed and
normal memory requests.
NOA (noalloc) means that
the cache is checked and
can give you cache hits,
but misses are not cached
and the overall latency is
higher. I don't know what
the hw does, but I hope it
was misnamed and it really
means bypass because there
is no point in doing cache
lookups on every memory
request if the driver
wants to disable caching
to *decrease* latency in
the situations when the
cache isn't helping.
Marek
On 5/11/2022 11:36 AM,
Christian König wrote:
> Mhm, it doesn't
really bypass MALL. It
just doesn't allocate
any MALL
> entries on write.
>
> How about
AMDGPU_VM_PAGE_NO_MALL ?
One more -
AMDGPU_VM_PAGE_LLC_* [
LLC = last level cache,
* = some sort
of attribute which
decides LLC behaviour]
Thanks,
Lijo
>
> Christian.
>
> Am 10.05.22 um
23:21 schrieb Marek
Olšák:
>> A better name
would be:
>>
AMDGPU_VM_PAGE_BYPASS_MALL
>>
>> Marek
>>
>> On Fri, May 6,
2022 at 7:23 AM
Christian König
>> <ckoenig.leichtzumerken@gmail.com>
wrote:
>>
>> Add the
AMDGPU_VM_NOALLOC flag
to let userspace control
MALL
>> allocation.
>>
>> Only
compile tested!
>>
>>
Signed-off-by:
Christian König <christian.koenig@amd.com>
>> ---
>>
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
| 2 ++
>>
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
| 3 +++
>>
drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
| 3 +++
>>
include/uapi/drm/amdgpu_drm.h
| 2 ++
>> 4 files
changed, 10
insertions(+)
>>
>> diff --git
a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index
bf97d8f07f57..d8129626581f
100644
>> ---
a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -650,6
+650,8 @@ uint64_t
amdgpu_gem_va_map_flags(struct
>>
amdgpu_device *adev,
uint32_t flags)
>>
pte_flag |=
AMDGPU_PTE_WRITEABLE;
>> if
(flags &
AMDGPU_VM_PAGE_PRT)
>>
pte_flag |=
AMDGPU_PTE_PRT;
>> + if
(flags &
AMDGPU_VM_PAGE_NOALLOC)
>> +
pte_flag |=
AMDGPU_PTE_NOALLOC;
>>
>> if
(adev->gmc.gmc_funcs->map_mtype)
>>
pte_flag |=
amdgpu_gmc_map_mtype(adev,
>> diff --git
a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>>
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index
b8c79789e1e4..9077dfccaf3c
100644
>> ---
a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -613,6
+613,9 @@ static void
gmc_v10_0_get_vm_pte(struct
>>
amdgpu_device *adev,
>>
*flags &=
~AMDGPU_PTE_MTYPE_NV10_MASK;
>>
*flags |=
(mapping->flags &
AMDGPU_PTE_MTYPE_NV10_MASK);
>>
>> +
*flags &=
~AMDGPU_PTE_NOALLOC;
>> +
*flags |=
(mapping->flags &
AMDGPU_PTE_NOALLOC);
>> +
>> if
(mapping->flags &
AMDGPU_PTE_PRT) {
>>
*flags |=
AMDGPU_PTE_PRT;
>>
*flags |=
AMDGPU_PTE_SNOOPED;
>> diff --git
a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>>
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>> index
8d733eeac556..32ee56adb602
100644
>> ---
a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>> +++
b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
>> @@ -508,6
+508,9 @@ static void
gmc_v11_0_get_vm_pte(struct
>>
amdgpu_device *adev,
>>
*flags &=
~AMDGPU_PTE_MTYPE_NV10_MASK;
>>
*flags |=
(mapping->flags &
AMDGPU_PTE_MTYPE_NV10_MASK);
>>
>> +
*flags &=
~AMDGPU_PTE_NOALLOC;
>> +
*flags |=
(mapping->flags &
AMDGPU_PTE_NOALLOC);
>> +
>> if
(mapping->flags &
AMDGPU_PTE_PRT) {
>>
*flags |=
AMDGPU_PTE_PRT;
>>
*flags |=
AMDGPU_PTE_SNOOPED;
>> diff --git
a/include/uapi/drm/amdgpu_drm.h
>>
b/include/uapi/drm/amdgpu_drm.h
>> index
57b9d8f0133a..9d71d6330687
100644
>> ---
a/include/uapi/drm/amdgpu_drm.h
>> +++
b/include/uapi/drm/amdgpu_drm.h
>> @@ -533,6
+533,8 @@ struct
drm_amdgpu_gem_op {
>> #define
AMDGPU_VM_MTYPE_UC
(4 << 5)
>> /* Use
Read Write MTYPE instead
of default MTYPE */
>> #define
AMDGPU_VM_MTYPE_RW
(5 << 5)
>> +/* don't
allocate MALL */
>> +#define
AMDGPU_VM_PAGE_NOALLOC
(1 << 9)
>>
>> struct
drm_amdgpu_gem_va {
>> /**
GEM object handle */
>> --
>> 2.25.1
>>
>