* [RESEND PATCH v2 1/5] drm/msm: add MSM_BO_CACHED_COHERENT
2020-11-14 15:17 [RESEND PATCH v2 0/5] drm/msm: support for host-cached BOs Jonathan Marek
@ 2020-11-14 15:17 ` Jonathan Marek
2020-11-14 15:17 ` [RESEND PATCH v2 2/5] dma-direct: add dma_direct_bypass() to force direct ops Jonathan Marek
` (3 subsequent siblings)
4 siblings, 0 replies; 20+ messages in thread
From: Jonathan Marek @ 2020-11-14 15:17 UTC (permalink / raw)
To: freedreno, hch
Cc: Jordan Crouse, Rob Clark, Sean Paul, David Airlie, Daniel Vetter,
Bjorn Andersson, Shawn Guo, Sharat Masetty,
open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
Add a new cache mode for creating coherent host-cached BOs.
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>
---
drivers/gpu/drm/msm/adreno/adreno_device.c | 1 +
drivers/gpu/drm/msm/msm_drv.h | 1 +
drivers/gpu/drm/msm/msm_gem.c | 8 ++++++++
include/uapi/drm/msm_drm.h | 5 ++---
4 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 58e03b20e1c7..21c9bc954f38 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -410,6 +410,7 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
config.rev.minor, config.rev.patchid);
priv->is_a2xx = config.rev.core == 2;
+ priv->has_cached_coherent = config.rev.core >= 6;
gpu = info->init(drm);
if (IS_ERR(gpu)) {
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index f33281ac7913..22ebecb28349 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -168,6 +168,7 @@ struct msm_drm_private {
struct msm_file_private *lastctx;
/* gpu is only set on open(), but we need this info earlier */
bool is_a2xx;
+ bool has_cached_coherent;
struct drm_fb_helper *fbdev;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 04be4cfcccc1..3d8254b5de16 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -420,6 +420,9 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj,
if (msm_obj->flags & MSM_BO_MAP_PRIV)
prot |= IOMMU_PRIV;
+ if (msm_obj->flags & MSM_BO_CACHED_COHERENT)
+ prot |= IOMMU_CACHE;
+
WARN_ON(!mutex_is_locked(&msm_obj->lock));
if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED))
@@ -1004,6 +1007,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
uint32_t size, uint32_t flags,
struct drm_gem_object **obj)
{
+ struct msm_drm_private *priv = dev->dev_private;
struct msm_gem_object *msm_obj;
switch (flags & MSM_BO_CACHE_MASK) {
@@ -1011,6 +1015,10 @@ static int msm_gem_new_impl(struct drm_device *dev,
case MSM_BO_CACHED:
case MSM_BO_WC:
break;
+ case MSM_BO_CACHED_COHERENT:
+ if (priv->has_cached_coherent)
+ break;
+ /* fallthrough */
default:
DRM_DEV_ERROR(dev->dev, "invalid cache flag: %x\n",
(flags & MSM_BO_CACHE_MASK));
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index a6c1f3eb2623..474497e8743a 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -94,12 +94,11 @@ struct drm_msm_param {
#define MSM_BO_CACHED 0x00010000
#define MSM_BO_WC 0x00020000
#define MSM_BO_UNCACHED 0x00040000
+#define MSM_BO_CACHED_COHERENT 0x080000
#define MSM_BO_FLAGS (MSM_BO_SCANOUT | \
MSM_BO_GPU_READONLY | \
- MSM_BO_CACHED | \
- MSM_BO_WC | \
- MSM_BO_UNCACHED)
+ MSM_BO_CACHE_MASK)
struct drm_msm_gem_new {
__u64 size; /* in */
--
2.26.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [RESEND PATCH v2 2/5] dma-direct: add dma_direct_bypass() to force direct ops
2020-11-14 15:17 [RESEND PATCH v2 0/5] drm/msm: support for host-cached BOs Jonathan Marek
2020-11-14 15:17 ` [RESEND PATCH v2 1/5] drm/msm: add MSM_BO_CACHED_COHERENT Jonathan Marek
@ 2020-11-14 15:17 ` Jonathan Marek
2020-11-14 16:21 ` Christoph Hellwig
2020-11-14 15:17 ` [RESEND PATCH v2 3/5] drm/msm: call dma_direct_bypass() Jonathan Marek
` (2 subsequent siblings)
4 siblings, 1 reply; 20+ messages in thread
From: Jonathan Marek @ 2020-11-14 15:17 UTC (permalink / raw)
To: freedreno, hch
Cc: Marek Szyprowski, Robin Murphy, open list:DMA MAPPING HELPERS, open list
Add a function to force direct ops and disable swiotlb for a deivce.
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
---
include/linux/dma-direct.h | 9 +++++++++
kernel/dma/direct.c | 23 +++++++++++++++++++++++
2 files changed, 32 insertions(+)
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 18aade195884..41f57e1b7aa5 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -124,4 +124,13 @@ int dma_direct_supported(struct device *dev, u64 mask);
dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir, unsigned long attrs);
+#if IS_ENABLED(CONFIG_DMA_OPS_BYPASS) && !IS_ENABLED(CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED)
+int dma_direct_bypass(struct device *dev);
+#else
+static inline int dma_direct_bypass(struct device *dev)
+{
+ return -EIO;
+}
+#endif
+
#endif /* _LINUX_DMA_DIRECT_H */
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 06c111544f61..304a5a77cccb 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -548,3 +548,26 @@ int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
return 0;
}
EXPORT_SYMBOL_GPL(dma_direct_set_offset);
+
+/**
+ * dma_direct_bypass - always use direct mapping path for device
+ * @dev: device pointer
+ *
+ * Note: this also bypasses swiotlb. Not available for arch with
+ * force_dma_unencrypted(), since this doesn't deal with that.
+ */
+#if IS_ENABLED(CONFIG_DMA_OPS_BYPASS) && !IS_ENABLED(CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED)
+int dma_direct_bypass(struct device *dev)
+{
+ int ret;
+
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ if (ret)
+ return ret;
+
+ dev->bus_dma_limit = DMA_BIT_MASK(64);
+ dev->dma_ops_bypass = true;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dma_direct_bypass);
+#endif
--
2.26.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 2/5] dma-direct: add dma_direct_bypass() to force direct ops
2020-11-14 15:17 ` [RESEND PATCH v2 2/5] dma-direct: add dma_direct_bypass() to force direct ops Jonathan Marek
@ 2020-11-14 16:21 ` Christoph Hellwig
0 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2020-11-14 16:21 UTC (permalink / raw)
To: Jonathan Marek
Cc: freedreno, hch, Marek Szyprowski, Robin Murphy,
open list:DMA MAPPING HELPERS, open list
On Sat, Nov 14, 2020 at 10:17:10AM -0500, Jonathan Marek wrote:
> Add a function to force direct ops and disable swiotlb for a deivce.
s/deivce/device/
> +#if IS_ENABLED(CONFIG_DMA_OPS_BYPASS) && !IS_ENABLED(CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED)
overly long line.
> +#if IS_ENABLED(CONFIG_DMA_OPS_BYPASS) && !IS_ENABLED(CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED)
Again.
> +int dma_direct_bypass(struct device *dev)
> +{
> + int ret;
> +
> + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
> + if (ret)
> + return ret;
> +
> + dev->bus_dma_limit = DMA_BIT_MASK(64);
> + dev->dma_ops_bypass = true;
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(dma_direct_bypass);
But more importantly ARCH_HAS_FORCE_DMA_UNENCRYPTED is just a compile
time flag. With this you disable the functionality for all the usual
x86, s390 and powerpc configs, while only a tiny number of systems
for bounce buffering. But I think you can just trivialy check
force_dma_unencrypted instead. We do not need an extra Kconfig symbol
symbol for this trivial helper.
Also the helper is misnamed and misplaced. The semantics have nothing
to do with dma-direct, the fact that is uses the ops bypass is an
implementation detail. It really fits into the iommu code, as it
allows the driver to use the IOMMU API for IOVA management, while using
the DMA API for cache management. So it should be named to reflect
that, and also grow a kerneldoc comment explaining how it will be used.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [RESEND PATCH v2 3/5] drm/msm: call dma_direct_bypass()
2020-11-14 15:17 [RESEND PATCH v2 0/5] drm/msm: support for host-cached BOs Jonathan Marek
2020-11-14 15:17 ` [RESEND PATCH v2 1/5] drm/msm: add MSM_BO_CACHED_COHERENT Jonathan Marek
2020-11-14 15:17 ` [RESEND PATCH v2 2/5] dma-direct: add dma_direct_bypass() to force direct ops Jonathan Marek
@ 2020-11-14 15:17 ` Jonathan Marek
2020-11-14 16:21 ` Christoph Hellwig
2020-11-14 15:17 ` [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance Jonathan Marek
2020-11-14 15:17 ` [RESEND PATCH v2 5/5] drm/msm: bump up the uapi version Jonathan Marek
4 siblings, 1 reply; 20+ messages in thread
From: Jonathan Marek @ 2020-11-14 15:17 UTC (permalink / raw)
To: freedreno, hch
Cc: Rob Clark, Sean Paul, David Airlie, Daniel Vetter,
open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
Always use direct dma ops and no swiotlb.
Note: arm-smmu-qcom already avoids creating iommu dma ops, but not
everything uses arm-smmu-qcom and this also sets the dma mask.
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
---
drivers/gpu/drm/msm/Kconfig | 1 +
drivers/gpu/drm/msm/msm_drv.c | 8 +++++---
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index e5816b498494..07c50405970a 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -20,6 +20,7 @@ config DRM_MSM
select SND_SOC_HDMI_CODEC if SND_SOC
select SYNC_FILE
select PM_OPP
+ select DMA_OPS_BYPASS
help
DRM/KMS driver for MSM/snapdragon.
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 49685571dc0e..bae48afca82e 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -6,6 +6,7 @@
*/
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/kthread.h>
#include <linux/uaccess.h>
#include <uapi/linux/sched/types.h>
@@ -1288,10 +1289,11 @@ static int msm_pdev_probe(struct platform_device *pdev)
if (ret)
goto fail;
- /* on all devices that I am aware of, iommu's which can map
- * any address the cpu can see are used:
+ /* always use direct dma ops and no swiotlb
+ * note: arm-smmu-qcom already avoids creating iommu dma ops, but
+ * not everything uses arm-smmu-qcom and this also sets the dma mask
*/
- ret = dma_set_mask_and_coherent(&pdev->dev, ~0);
+ ret = dma_direct_bypass(&pdev->dev);
if (ret)
goto fail;
--
2.26.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 3/5] drm/msm: call dma_direct_bypass()
2020-11-14 15:17 ` [RESEND PATCH v2 3/5] drm/msm: call dma_direct_bypass() Jonathan Marek
@ 2020-11-14 16:21 ` Christoph Hellwig
0 siblings, 0 replies; 20+ messages in thread
From: Christoph Hellwig @ 2020-11-14 16:21 UTC (permalink / raw)
To: Jonathan Marek
Cc: freedreno, hch, Rob Clark, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Sat, Nov 14, 2020 at 10:17:11AM -0500, Jonathan Marek wrote:
> Always use direct dma ops and no swiotlb.
>
> Note: arm-smmu-qcom already avoids creating iommu dma ops, but not
> everything uses arm-smmu-qcom and this also sets the dma mask.
>
> Signed-off-by: Jonathan Marek <jonathan@marek.ca>
> ---
> drivers/gpu/drm/msm/Kconfig | 1 +
> drivers/gpu/drm/msm/msm_drv.c | 8 +++++---
> 2 files changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
> index e5816b498494..07c50405970a 100644
> --- a/drivers/gpu/drm/msm/Kconfig
> +++ b/drivers/gpu/drm/msm/Kconfig
> @@ -20,6 +20,7 @@ config DRM_MSM
> select SND_SOC_HDMI_CODEC if SND_SOC
> select SYNC_FILE
> select PM_OPP
> + select DMA_OPS_BYPASS
> help
> DRM/KMS driver for MSM/snapdragon.
>
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 49685571dc0e..bae48afca82e 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -6,6 +6,7 @@
> */
>
> #include <linux/dma-mapping.h>
> +#include <linux/dma-direct.h>
> #include <linux/kthread.h>
> #include <linux/uaccess.h>
> #include <uapi/linux/sched/types.h>
> @@ -1288,10 +1289,11 @@ static int msm_pdev_probe(struct platform_device *pdev)
> if (ret)
> goto fail;
>
> - /* on all devices that I am aware of, iommu's which can map
> - * any address the cpu can see are used:
> + /* always use direct dma ops and no swiotlb
Again, and implementation detail. Comments should not explain details
obvious from the code (especially layers away) but the intent.
^ permalink raw reply [flat|nested] 20+ messages in thread
* [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 15:17 [RESEND PATCH v2 0/5] drm/msm: support for host-cached BOs Jonathan Marek
` (2 preceding siblings ...)
2020-11-14 15:17 ` [RESEND PATCH v2 3/5] drm/msm: call dma_direct_bypass() Jonathan Marek
@ 2020-11-14 15:17 ` Jonathan Marek
2020-11-14 16:24 ` Christoph Hellwig
2020-11-16 17:25 ` Jordan Crouse
2020-11-14 15:17 ` [RESEND PATCH v2 5/5] drm/msm: bump up the uapi version Jonathan Marek
4 siblings, 2 replies; 20+ messages in thread
From: Jonathan Marek @ 2020-11-14 15:17 UTC (permalink / raw)
To: freedreno, hch
Cc: Rob Clark, Sean Paul, David Airlie, Daniel Vetter,
open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
This makes it possible to use the non-coherent cached MSM_BO_CACHED mode,
which otherwise doesn't provide any method for cleaning/invalidating the
cache to sync with the device.
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
---
drivers/gpu/drm/msm/msm_drv.c | 21 +++++++++++++++++++++
drivers/gpu/drm/msm/msm_drv.h | 2 ++
drivers/gpu/drm/msm/msm_gem.c | 23 +++++++++++++++++++++++
include/uapi/drm/msm_drm.h | 20 ++++++++++++++++++++
4 files changed, 66 insertions(+)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index bae48afca82e..3f17acdf6594 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -959,6 +959,26 @@ static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
return msm_submitqueue_remove(file->driver_priv, id);
}
+static int msm_ioctl_gem_sync_cache(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct drm_msm_gem_sync_cache *args = data;
+ struct drm_gem_object *obj;
+
+ if (args->flags & ~MSM_GEM_SYNC_CACHE_FLAGS)
+ return -EINVAL;
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ msm_gem_sync_cache(obj, args->flags, args->offset, args->end);
+
+ drm_gem_object_put(obj);
+
+ return 0;
+}
+
static const struct drm_ioctl_desc msm_ioctls[] = {
DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_RENDER_ALLOW),
@@ -971,6 +991,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(MSM_GEM_SYNC_CACHE, msm_ioctl_gem_sync_cache, DRM_RENDER_ALLOW),
};
static const struct vm_operations_struct vm_ops = {
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 22ebecb28349..f170f843010e 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -318,6 +318,8 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu);
void msm_gem_active_put(struct drm_gem_object *obj);
int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout);
int msm_gem_cpu_fini(struct drm_gem_object *obj);
+void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
+ size_t range_start, size_t range_end);
void msm_gem_free_object(struct drm_gem_object *obj);
int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
uint32_t size, uint32_t flags, uint32_t *handle, char *name);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 3d8254b5de16..039738696f9a 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -797,6 +797,29 @@ int msm_gem_cpu_fini(struct drm_gem_object *obj)
return 0;
}
+void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
+ size_t range_start, size_t range_end)
+{
+ struct msm_gem_object *msm_obj = to_msm_bo(obj);
+ struct device *dev = msm_obj->base.dev->dev;
+
+ /* exit early if get_pages() hasn't been called yet */
+ if (!msm_obj->pages)
+ return;
+
+ /* TODO: sync only the specified range */
+
+ if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
+ dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
+ msm_obj->sgt->nents, DMA_TO_DEVICE);
+ }
+
+ if (flags & MSM_GEM_SYNC_FOR_CPU) {
+ dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
+ msm_obj->sgt->nents, DMA_FROM_DEVICE);
+ }
+}
+
#ifdef CONFIG_DEBUG_FS
static void describe_fence(struct dma_fence *fence, const char *type,
struct seq_file *m)
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 474497e8743a..c8288f328528 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -319,6 +319,24 @@ struct drm_msm_submitqueue_query {
__u32 pad;
};
+/*
+ * Host cache maintenance (relevant for MSM_BO_CACHED)
+ * driver may both clean/invalidate (flush) for clean
+ */
+
+#define MSM_GEM_SYNC_FOR_DEVICE 0x1
+#define MSM_GEM_SYNC_FOR_CPU 0x2
+
+#define MSM_GEM_SYNC_CACHE_FLAGS (MSM_GEM_SYNC_FOR_DEVICE | \
+ MSM_GEM_SYNC_FOR_CPU)
+
+struct drm_msm_gem_sync_cache {
+ __u32 handle;
+ __u32 flags;
+ __u64 offset;
+ __u64 end; /* offset + size */
+};
+
#define DRM_MSM_GET_PARAM 0x00
/* placeholder:
#define DRM_MSM_SET_PARAM 0x01
@@ -336,6 +354,7 @@ struct drm_msm_submitqueue_query {
#define DRM_MSM_SUBMITQUEUE_NEW 0x0A
#define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
#define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
+#define DRM_MSM_GEM_SYNC_CACHE 0x0D
#define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
#define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
@@ -348,6 +367,7 @@ struct drm_msm_submitqueue_query {
#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
#define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
+#define DRM_IOCTL_MSM_GEM_SYNC_CACHE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_SYNC_CACHE, struct drm_msm_gem_sync_cache)
#if defined(__cplusplus)
}
--
2.26.1
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 15:17 ` [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance Jonathan Marek
@ 2020-11-14 16:24 ` Christoph Hellwig
2020-11-14 18:46 ` Rob Clark
2020-11-16 17:25 ` Jordan Crouse
1 sibling, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2020-11-14 16:24 UTC (permalink / raw)
To: Jonathan Marek
Cc: freedreno, hch, Rob Clark, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> + size_t range_start, size_t range_end)
> +{
> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
> + struct device *dev = msm_obj->base.dev->dev;
> +
> + /* exit early if get_pages() hasn't been called yet */
> + if (!msm_obj->pages)
> + return;
> +
> + /* TODO: sync only the specified range */
> +
> + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
> + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
> + msm_obj->sgt->nents, DMA_TO_DEVICE);
> + }
> +
> + if (flags & MSM_GEM_SYNC_FOR_CPU) {
> + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
> + msm_obj->sgt->nents, DMA_FROM_DEVICE);
> + }
Splitting this helper from the only caller is rather strange, epecially
with the two unused arguments. And I think the way this is specified
to take a range, but ignoring it is actively dangerous. User space will
rely on it syncing everything sooner or later and then you are stuck.
So just define a sync all primitive for now, and if you really need a
range sync and have actually implemented it add a new ioctl for that.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 16:24 ` Christoph Hellwig
@ 2020-11-14 18:46 ` Rob Clark
2020-11-14 18:54 ` Jonathan Marek
0 siblings, 1 reply; 20+ messages in thread
From: Rob Clark @ 2020-11-14 18:46 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jonathan Marek, freedreno, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Sat, Nov 14, 2020 at 8:24 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
> > +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> > + size_t range_start, size_t range_end)
> > +{
> > + struct msm_gem_object *msm_obj = to_msm_bo(obj);
> > + struct device *dev = msm_obj->base.dev->dev;
> > +
> > + /* exit early if get_pages() hasn't been called yet */
> > + if (!msm_obj->pages)
> > + return;
> > +
> > + /* TODO: sync only the specified range */
> > +
> > + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
> > + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
> > + msm_obj->sgt->nents, DMA_TO_DEVICE);
> > + }
> > +
> > + if (flags & MSM_GEM_SYNC_FOR_CPU) {
> > + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
> > + msm_obj->sgt->nents, DMA_FROM_DEVICE);
> > + }
>
> Splitting this helper from the only caller is rather strange, epecially
> with the two unused arguments. And I think the way this is specified
> to take a range, but ignoring it is actively dangerous. User space will
> rely on it syncing everything sooner or later and then you are stuck.
> So just define a sync all primitive for now, and if you really need a
> range sync and have actually implemented it add a new ioctl for that.
We do already have a split of ioctl "layer" which enforces valid ioctl
params, etc, and gem (or other) module code which is called by the
ioctl func. So I think it is fine to keep this split here. (Also, I
think at some point there will be a uring type of ioctl alternative
which would re-use the same gem func.)
But I do agree that the range should be respected or added later..
drm_ioctl() dispatch is well prepared for extending ioctls.
And I assume there should be some validation that the range is aligned
to cache-line? Or can we flush a partial cache line?
BR,
-R
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 18:46 ` Rob Clark
@ 2020-11-14 18:54 ` Jonathan Marek
2020-11-14 19:39 ` Rob Clark
0 siblings, 1 reply; 20+ messages in thread
From: Jonathan Marek @ 2020-11-14 18:54 UTC (permalink / raw)
To: Rob Clark, Christoph Hellwig
Cc: freedreno, Sean Paul, David Airlie, Daniel Vetter,
open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On 11/14/20 1:46 PM, Rob Clark wrote:
> On Sat, Nov 14, 2020 at 8:24 AM Christoph Hellwig <hch@lst.de> wrote:
>>
>> On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
>>> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
>>> + size_t range_start, size_t range_end)
>>> +{
>>> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
>>> + struct device *dev = msm_obj->base.dev->dev;
>>> +
>>> + /* exit early if get_pages() hasn't been called yet */
>>> + if (!msm_obj->pages)
>>> + return;
>>> +
>>> + /* TODO: sync only the specified range */
>>> +
>>> + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
>>> + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
>>> + msm_obj->sgt->nents, DMA_TO_DEVICE);
>>> + }
>>> +
>>> + if (flags & MSM_GEM_SYNC_FOR_CPU) {
>>> + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
>>> + msm_obj->sgt->nents, DMA_FROM_DEVICE);
>>> + }
>>
>> Splitting this helper from the only caller is rather strange, epecially
>> with the two unused arguments. And I think the way this is specified
>> to take a range, but ignoring it is actively dangerous. User space will
>> rely on it syncing everything sooner or later and then you are stuck.
>> So just define a sync all primitive for now, and if you really need a
>> range sync and have actually implemented it add a new ioctl for that.
>
> We do already have a split of ioctl "layer" which enforces valid ioctl
> params, etc, and gem (or other) module code which is called by the
> ioctl func. So I think it is fine to keep this split here. (Also, I
> think at some point there will be a uring type of ioctl alternative
> which would re-use the same gem func.)
>
> But I do agree that the range should be respected or added later..
> drm_ioctl() dispatch is well prepared for extending ioctls.
>
> And I assume there should be some validation that the range is aligned
> to cache-line? Or can we flush a partial cache line?
>
The range is intended to be "sync at least this range", so that
userspace doesn't have to worry about details like that.
> BR,
> -R
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 18:54 ` Jonathan Marek
@ 2020-11-14 19:39 ` Rob Clark
2020-11-14 20:07 ` Jonathan Marek
2020-11-16 17:27 ` [Freedreno] " Jordan Crouse
0 siblings, 2 replies; 20+ messages in thread
From: Rob Clark @ 2020-11-14 19:39 UTC (permalink / raw)
To: Jonathan Marek
Cc: Christoph Hellwig, freedreno, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Sat, Nov 14, 2020 at 10:58 AM Jonathan Marek <jonathan@marek.ca> wrote:
>
> On 11/14/20 1:46 PM, Rob Clark wrote:
> > On Sat, Nov 14, 2020 at 8:24 AM Christoph Hellwig <hch@lst.de> wrote:
> >>
> >> On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
> >>> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> >>> + size_t range_start, size_t range_end)
> >>> +{
> >>> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
> >>> + struct device *dev = msm_obj->base.dev->dev;
> >>> +
> >>> + /* exit early if get_pages() hasn't been called yet */
> >>> + if (!msm_obj->pages)
> >>> + return;
> >>> +
> >>> + /* TODO: sync only the specified range */
> >>> +
> >>> + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
> >>> + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
> >>> + msm_obj->sgt->nents, DMA_TO_DEVICE);
> >>> + }
> >>> +
> >>> + if (flags & MSM_GEM_SYNC_FOR_CPU) {
> >>> + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
> >>> + msm_obj->sgt->nents, DMA_FROM_DEVICE);
> >>> + }
> >>
> >> Splitting this helper from the only caller is rather strange, epecially
> >> with the two unused arguments. And I think the way this is specified
> >> to take a range, but ignoring it is actively dangerous. User space will
> >> rely on it syncing everything sooner or later and then you are stuck.
> >> So just define a sync all primitive for now, and if you really need a
> >> range sync and have actually implemented it add a new ioctl for that.
> >
> > We do already have a split of ioctl "layer" which enforces valid ioctl
> > params, etc, and gem (or other) module code which is called by the
> > ioctl func. So I think it is fine to keep this split here. (Also, I
> > think at some point there will be a uring type of ioctl alternative
> > which would re-use the same gem func.)
> >
> > But I do agree that the range should be respected or added later..
> > drm_ioctl() dispatch is well prepared for extending ioctls.
> >
> > And I assume there should be some validation that the range is aligned
> > to cache-line? Or can we flush a partial cache line?
> >
>
> The range is intended to be "sync at least this range", so that
> userspace doesn't have to worry about details like that.
>
I don't think userspace can *not* worry about details like that.
Consider a case where the cpu and gpu are simultaneously accessing
different parts of a buffer (for ex, sub-allocation). There needs to
be cache-line separation between the two.
BR,
-R
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 19:39 ` Rob Clark
@ 2020-11-14 20:07 ` Jonathan Marek
2020-11-14 20:48 ` Rob Clark
2020-11-16 17:33 ` Christoph Hellwig
2020-11-16 17:27 ` [Freedreno] " Jordan Crouse
1 sibling, 2 replies; 20+ messages in thread
From: Jonathan Marek @ 2020-11-14 20:07 UTC (permalink / raw)
To: Rob Clark
Cc: Christoph Hellwig, freedreno, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On 11/14/20 2:39 PM, Rob Clark wrote:
> On Sat, Nov 14, 2020 at 10:58 AM Jonathan Marek <jonathan@marek.ca> wrote:
>>
>> On 11/14/20 1:46 PM, Rob Clark wrote:
>>> On Sat, Nov 14, 2020 at 8:24 AM Christoph Hellwig <hch@lst.de> wrote:
>>>>
>>>> On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
>>>>> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
>>>>> + size_t range_start, size_t range_end)
>>>>> +{
>>>>> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
>>>>> + struct device *dev = msm_obj->base.dev->dev;
>>>>> +
>>>>> + /* exit early if get_pages() hasn't been called yet */
>>>>> + if (!msm_obj->pages)
>>>>> + return;
>>>>> +
>>>>> + /* TODO: sync only the specified range */
>>>>> +
>>>>> + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
>>>>> + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
>>>>> + msm_obj->sgt->nents, DMA_TO_DEVICE);
>>>>> + }
>>>>> +
>>>>> + if (flags & MSM_GEM_SYNC_FOR_CPU) {
>>>>> + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
>>>>> + msm_obj->sgt->nents, DMA_FROM_DEVICE);
>>>>> + }
>>>>
>>>> Splitting this helper from the only caller is rather strange, epecially
>>>> with the two unused arguments. And I think the way this is specified
>>>> to take a range, but ignoring it is actively dangerous. User space will
>>>> rely on it syncing everything sooner or later and then you are stuck.
>>>> So just define a sync all primitive for now, and if you really need a
>>>> range sync and have actually implemented it add a new ioctl for that.
>>>
>>> We do already have a split of ioctl "layer" which enforces valid ioctl
>>> params, etc, and gem (or other) module code which is called by the
>>> ioctl func. So I think it is fine to keep this split here. (Also, I
>>> think at some point there will be a uring type of ioctl alternative
>>> which would re-use the same gem func.)
>>>
>>> But I do agree that the range should be respected or added later..
>>> drm_ioctl() dispatch is well prepared for extending ioctls.
>>>
>>> And I assume there should be some validation that the range is aligned
>>> to cache-line? Or can we flush a partial cache line?
>>>
>>
>> The range is intended to be "sync at least this range", so that
>> userspace doesn't have to worry about details like that.
>>
>
> I don't think userspace can *not* worry about details like that.
> Consider a case where the cpu and gpu are simultaneously accessing
> different parts of a buffer (for ex, sub-allocation). There needs to
> be cache-line separation between the two.
>
Right.. and it also seems like we can't get away with just
flushing/invalidating the whole thing.
qcom's vulkan driver has nonCoherentAtomSize=1, and it looks like
dma_sync_single_for_cpu() does deal in some way with the partial cache
line case, although I'm not sure that means we can have a
nonCoherentAtomSize=1.
> BR,
> -R
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 20:07 ` Jonathan Marek
@ 2020-11-14 20:48 ` Rob Clark
2020-11-16 17:33 ` Christoph Hellwig
1 sibling, 0 replies; 20+ messages in thread
From: Rob Clark @ 2020-11-14 20:48 UTC (permalink / raw)
To: Jonathan Marek
Cc: Christoph Hellwig, freedreno, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Sat, Nov 14, 2020 at 12:10 PM Jonathan Marek <jonathan@marek.ca> wrote:
>
> On 11/14/20 2:39 PM, Rob Clark wrote:
> > On Sat, Nov 14, 2020 at 10:58 AM Jonathan Marek <jonathan@marek.ca> wrote:
> >>
> >> On 11/14/20 1:46 PM, Rob Clark wrote:
> >>> On Sat, Nov 14, 2020 at 8:24 AM Christoph Hellwig <hch@lst.de> wrote:
> >>>>
> >>>> On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
> >>>>> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> >>>>> + size_t range_start, size_t range_end)
> >>>>> +{
> >>>>> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
> >>>>> + struct device *dev = msm_obj->base.dev->dev;
> >>>>> +
> >>>>> + /* exit early if get_pages() hasn't been called yet */
> >>>>> + if (!msm_obj->pages)
> >>>>> + return;
> >>>>> +
> >>>>> + /* TODO: sync only the specified range */
> >>>>> +
> >>>>> + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
> >>>>> + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
> >>>>> + msm_obj->sgt->nents, DMA_TO_DEVICE);
> >>>>> + }
> >>>>> +
> >>>>> + if (flags & MSM_GEM_SYNC_FOR_CPU) {
> >>>>> + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
> >>>>> + msm_obj->sgt->nents, DMA_FROM_DEVICE);
> >>>>> + }
> >>>>
> >>>> Splitting this helper from the only caller is rather strange, epecially
> >>>> with the two unused arguments. And I think the way this is specified
> >>>> to take a range, but ignoring it is actively dangerous. User space will
> >>>> rely on it syncing everything sooner or later and then you are stuck.
> >>>> So just define a sync all primitive for now, and if you really need a
> >>>> range sync and have actually implemented it add a new ioctl for that.
> >>>
> >>> We do already have a split of ioctl "layer" which enforces valid ioctl
> >>> params, etc, and gem (or other) module code which is called by the
> >>> ioctl func. So I think it is fine to keep this split here. (Also, I
> >>> think at some point there will be a uring type of ioctl alternative
> >>> which would re-use the same gem func.)
> >>>
> >>> But I do agree that the range should be respected or added later..
> >>> drm_ioctl() dispatch is well prepared for extending ioctls.
> >>>
> >>> And I assume there should be some validation that the range is aligned
> >>> to cache-line? Or can we flush a partial cache line?
> >>>
> >>
> >> The range is intended to be "sync at least this range", so that
> >> userspace doesn't have to worry about details like that.
> >>
> >
> > I don't think userspace can *not* worry about details like that.
> > Consider a case where the cpu and gpu are simultaneously accessing
> > different parts of a buffer (for ex, sub-allocation). There needs to
> > be cache-line separation between the two.
> >
>
> Right.. and it also seems like we can't get away with just
> flushing/invalidating the whole thing.
>
> qcom's vulkan driver has nonCoherentAtomSize=1, and it looks like
> dma_sync_single_for_cpu() does deal in some way with the partial cache
> line case, although I'm not sure that means we can have a
> nonCoherentAtomSize=1.
>
flush/inv the whole thing could be a useful first step, or at least I
can think of some uses for it. But if it isn't useful for how vk sees
the world, then maybe we should just implement the range properly from
the get-go. (And I *think* requiring the range to be aligned to
cacheline boundaries.. it is always easy from a kernel uabi PoV to
loosen restrictions later, than the other way around.)
BR,
-R
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 20:07 ` Jonathan Marek
2020-11-14 20:48 ` Rob Clark
@ 2020-11-16 17:33 ` Christoph Hellwig
2020-11-16 17:50 ` Rob Clark
1 sibling, 1 reply; 20+ messages in thread
From: Christoph Hellwig @ 2020-11-16 17:33 UTC (permalink / raw)
To: Jonathan Marek
Cc: Rob Clark, Christoph Hellwig, freedreno, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Sat, Nov 14, 2020 at 03:07:20PM -0500, Jonathan Marek wrote:
> qcom's vulkan driver has nonCoherentAtomSize=1, and it looks like
> dma_sync_single_for_cpu() does deal in some way with the partial cache line
> case, although I'm not sure that means we can have a nonCoherentAtomSize=1.
No, it doesn't. You need to ensure ownership is managed at
dma_get_cache_alignment() granularity.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-16 17:33 ` Christoph Hellwig
@ 2020-11-16 17:50 ` Rob Clark
2020-11-16 17:52 ` Jonathan Marek
0 siblings, 1 reply; 20+ messages in thread
From: Rob Clark @ 2020-11-16 17:50 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jonathan Marek, freedreno, Sean Paul, David Airlie,
Daniel Vetter, open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Mon, Nov 16, 2020 at 9:33 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Sat, Nov 14, 2020 at 03:07:20PM -0500, Jonathan Marek wrote:
> > qcom's vulkan driver has nonCoherentAtomSize=1, and it looks like
> > dma_sync_single_for_cpu() does deal in some way with the partial cache line
> > case, although I'm not sure that means we can have a nonCoherentAtomSize=1.
>
> No, it doesn't. You need to ensure ownership is managed at
> dma_get_cache_alignment() granularity.
my guess is nonCoherentAtomSize=1 only works in the case of cache
coherent buffers
BR,
-R
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-16 17:50 ` Rob Clark
@ 2020-11-16 17:52 ` Jonathan Marek
2020-11-29 18:51 ` Rob Clark
0 siblings, 1 reply; 20+ messages in thread
From: Jonathan Marek @ 2020-11-16 17:52 UTC (permalink / raw)
To: Rob Clark, Christoph Hellwig, Jordan Crouse
Cc: freedreno, Sean Paul, David Airlie, Daniel Vetter,
open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On 11/16/20 12:50 PM, Rob Clark wrote:
> On Mon, Nov 16, 2020 at 9:33 AM Christoph Hellwig <hch@lst.de> wrote:
>>
>> On Sat, Nov 14, 2020 at 03:07:20PM -0500, Jonathan Marek wrote:
>>> qcom's vulkan driver has nonCoherentAtomSize=1, and it looks like
>>> dma_sync_single_for_cpu() does deal in some way with the partial cache line
>>> case, although I'm not sure that means we can have a nonCoherentAtomSize=1.
>>
>> No, it doesn't. You need to ensure ownership is managed at
>> dma_get_cache_alignment() granularity.
>
> my guess is nonCoherentAtomSize=1 only works in the case of cache
> coherent buffers
>
nonCoherentAtomSize doesn't apply to coherent memory (as the name
implies), I guess qcom's driver is just wrong about having
nonCoherentAtomSize=1.
Jordan just mentioned there is at least one conformance test for this, I
wonder if it just doesn't test it well enough, or just doesn't test the
non-coherent memory type?
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-16 17:52 ` Jonathan Marek
@ 2020-11-29 18:51 ` Rob Clark
0 siblings, 0 replies; 20+ messages in thread
From: Rob Clark @ 2020-11-29 18:51 UTC (permalink / raw)
To: Jonathan Marek
Cc: Christoph Hellwig, Jordan Crouse, freedreno, Sean Paul,
David Airlie, Daniel Vetter,
open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
On Mon, Nov 16, 2020 at 9:55 AM Jonathan Marek <jonathan@marek.ca> wrote:
>
> On 11/16/20 12:50 PM, Rob Clark wrote:
> > On Mon, Nov 16, 2020 at 9:33 AM Christoph Hellwig <hch@lst.de> wrote:
> >>
> >> On Sat, Nov 14, 2020 at 03:07:20PM -0500, Jonathan Marek wrote:
> >>> qcom's vulkan driver has nonCoherentAtomSize=1, and it looks like
> >>> dma_sync_single_for_cpu() does deal in some way with the partial cache line
> >>> case, although I'm not sure that means we can have a nonCoherentAtomSize=1.
> >>
> >> No, it doesn't. You need to ensure ownership is managed at
> >> dma_get_cache_alignment() granularity.
> >
> > my guess is nonCoherentAtomSize=1 only works in the case of cache
> > coherent buffers
> >
>
> nonCoherentAtomSize doesn't apply to coherent memory (as the name
> implies), I guess qcom's driver is just wrong about having
> nonCoherentAtomSize=1.
>
> Jordan just mentioned there is at least one conformance test for this, I
> wonder if it just doesn't test it well enough, or just doesn't test the
> non-coherent memory type?
I was *assuming* (but could be wrong) that Jordan was referring to an
opencl cts test?
At any rate, it is sounding like you should add a
`MSM_PARAM_CACHE_ALIGNMENT` type of param that returns
dma_get_cache_alignment(), and then properly implement offset/end
BR,
-R
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [Freedreno] [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 19:39 ` Rob Clark
2020-11-14 20:07 ` Jonathan Marek
@ 2020-11-16 17:27 ` Jordan Crouse
1 sibling, 0 replies; 20+ messages in thread
From: Jordan Crouse @ 2020-11-16 17:27 UTC (permalink / raw)
To: Rob Clark
Cc: Jonathan Marek, David Airlie, freedreno, open list,
open list:DRM DRIVER FOR MSM ADRENO GPU, Daniel Vetter,
open list:DRM DRIVER FOR MSM ADRENO GPU, Sean Paul,
Christoph Hellwig
On Sat, Nov 14, 2020 at 11:39:45AM -0800, Rob Clark wrote:
> On Sat, Nov 14, 2020 at 10:58 AM Jonathan Marek <jonathan@marek.ca> wrote:
> >
> > On 11/14/20 1:46 PM, Rob Clark wrote:
> > > On Sat, Nov 14, 2020 at 8:24 AM Christoph Hellwig <hch@lst.de> wrote:
> > >>
> > >> On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
> > >>> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> > >>> + size_t range_start, size_t range_end)
> > >>> +{
> > >>> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
> > >>> + struct device *dev = msm_obj->base.dev->dev;
> > >>> +
> > >>> + /* exit early if get_pages() hasn't been called yet */
> > >>> + if (!msm_obj->pages)
> > >>> + return;
> > >>> +
> > >>> + /* TODO: sync only the specified range */
> > >>> +
> > >>> + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
> > >>> + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
> > >>> + msm_obj->sgt->nents, DMA_TO_DEVICE);
> > >>> + }
> > >>> +
> > >>> + if (flags & MSM_GEM_SYNC_FOR_CPU) {
> > >>> + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
> > >>> + msm_obj->sgt->nents, DMA_FROM_DEVICE);
> > >>> + }
> > >>
> > >> Splitting this helper from the only caller is rather strange, epecially
> > >> with the two unused arguments. And I think the way this is specified
> > >> to take a range, but ignoring it is actively dangerous. User space will
> > >> rely on it syncing everything sooner or later and then you are stuck.
> > >> So just define a sync all primitive for now, and if you really need a
> > >> range sync and have actually implemented it add a new ioctl for that.
> > >
> > > We do already have a split of ioctl "layer" which enforces valid ioctl
> > > params, etc, and gem (or other) module code which is called by the
> > > ioctl func. So I think it is fine to keep this split here. (Also, I
> > > think at some point there will be a uring type of ioctl alternative
> > > which would re-use the same gem func.)
> > >
> > > But I do agree that the range should be respected or added later..
> > > drm_ioctl() dispatch is well prepared for extending ioctls.
> > >
> > > And I assume there should be some validation that the range is aligned
> > > to cache-line? Or can we flush a partial cache line?
> > >
> >
> > The range is intended to be "sync at least this range", so that
> > userspace doesn't have to worry about details like that.
> >
>
> I don't think userspace can *not* worry about details like that.
> Consider a case where the cpu and gpu are simultaneously accessing
> different parts of a buffer (for ex, sub-allocation). There needs to
> be cache-line separation between the two.
There is at least one compute conformance test that I can think of that does
exactly this.
Jordan
> BR,
> -R
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [Freedreno] [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance
2020-11-14 15:17 ` [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance Jonathan Marek
2020-11-14 16:24 ` Christoph Hellwig
@ 2020-11-16 17:25 ` Jordan Crouse
1 sibling, 0 replies; 20+ messages in thread
From: Jordan Crouse @ 2020-11-16 17:25 UTC (permalink / raw)
To: Jonathan Marek
Cc: freedreno, hch, David Airlie,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list,
open list:DRM DRIVER FOR MSM ADRENO GPU, Rob Clark,
Daniel Vetter, Sean Paul
On Sat, Nov 14, 2020 at 10:17:12AM -0500, Jonathan Marek wrote:
> This makes it possible to use the non-coherent cached MSM_BO_CACHED mode,
> which otherwise doesn't provide any method for cleaning/invalidating the
> cache to sync with the device.
>
> Signed-off-by: Jonathan Marek <jonathan@marek.ca>
> ---
> drivers/gpu/drm/msm/msm_drv.c | 21 +++++++++++++++++++++
> drivers/gpu/drm/msm/msm_drv.h | 2 ++
> drivers/gpu/drm/msm/msm_gem.c | 23 +++++++++++++++++++++++
> include/uapi/drm/msm_drm.h | 20 ++++++++++++++++++++
> 4 files changed, 66 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index bae48afca82e..3f17acdf6594 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -959,6 +959,26 @@ static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
> return msm_submitqueue_remove(file->driver_priv, id);
> }
>
> +static int msm_ioctl_gem_sync_cache(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct drm_msm_gem_sync_cache *args = data;
> + struct drm_gem_object *obj;
> +
> + if (args->flags & ~MSM_GEM_SYNC_CACHE_FLAGS)
> + return -EINVAL;
> +
> + obj = drm_gem_object_lookup(file, args->handle);
> + if (!obj)
> + return -ENOENT;
> +
> + msm_gem_sync_cache(obj, args->flags, args->offset, args->end);
> +
> + drm_gem_object_put(obj);
> +
> + return 0;
> +}
> +
> static const struct drm_ioctl_desc msm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_RENDER_ALLOW),
> @@ -971,6 +991,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(MSM_GEM_SYNC_CACHE, msm_ioctl_gem_sync_cache, DRM_RENDER_ALLOW),
> };
>
> static const struct vm_operations_struct vm_ops = {
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index 22ebecb28349..f170f843010e 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -318,6 +318,8 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu);
> void msm_gem_active_put(struct drm_gem_object *obj);
> int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout);
> int msm_gem_cpu_fini(struct drm_gem_object *obj);
> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> + size_t range_start, size_t range_end);
> void msm_gem_free_object(struct drm_gem_object *obj);
> int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
> uint32_t size, uint32_t flags, uint32_t *handle, char *name);
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index 3d8254b5de16..039738696f9a 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -797,6 +797,29 @@ int msm_gem_cpu_fini(struct drm_gem_object *obj)
> return 0;
> }
>
> +void msm_gem_sync_cache(struct drm_gem_object *obj, uint32_t flags,
> + size_t range_start, size_t range_end)
> +{
> + struct msm_gem_object *msm_obj = to_msm_bo(obj);
> + struct device *dev = msm_obj->base.dev->dev;
> +
On a6xx targets with I/O coherency you can skip this (assuming that the IOMMU
flag has been set for this buffer). I'm not sure if one of the other patches
already does the bypass but I mention it here since this is the point of
no-return.
Jordan
> + /* exit early if get_pages() hasn't been called yet */
> + if (!msm_obj->pages)
> + return;
> +
> + /* TODO: sync only the specified range */
> +
> + if (flags & MSM_GEM_SYNC_FOR_DEVICE) {
> + dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
> + msm_obj->sgt->nents, DMA_TO_DEVICE);
> + }
> +
> + if (flags & MSM_GEM_SYNC_FOR_CPU) {
> + dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
> + msm_obj->sgt->nents, DMA_FROM_DEVICE);
> + }
> +}
> +
> #ifdef CONFIG_DEBUG_FS
> static void describe_fence(struct dma_fence *fence, const char *type,
> struct seq_file *m)
> diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> index 474497e8743a..c8288f328528 100644
> --- a/include/uapi/drm/msm_drm.h
> +++ b/include/uapi/drm/msm_drm.h
> @@ -319,6 +319,24 @@ struct drm_msm_submitqueue_query {
> __u32 pad;
> };
>
> +/*
> + * Host cache maintenance (relevant for MSM_BO_CACHED)
> + * driver may both clean/invalidate (flush) for clean
> + */
> +
> +#define MSM_GEM_SYNC_FOR_DEVICE 0x1
> +#define MSM_GEM_SYNC_FOR_CPU 0x2
> +
> +#define MSM_GEM_SYNC_CACHE_FLAGS (MSM_GEM_SYNC_FOR_DEVICE | \
> + MSM_GEM_SYNC_FOR_CPU)
> +
> +struct drm_msm_gem_sync_cache {
> + __u32 handle;
> + __u32 flags;
> + __u64 offset;
> + __u64 end; /* offset + size */
> +};
> +
> #define DRM_MSM_GET_PARAM 0x00
> /* placeholder:
> #define DRM_MSM_SET_PARAM 0x01
> @@ -336,6 +354,7 @@ struct drm_msm_submitqueue_query {
> #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> +#define DRM_MSM_GEM_SYNC_CACHE 0x0D
>
> #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
> #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> @@ -348,6 +367,7 @@ struct drm_msm_submitqueue_query {
> #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> +#define DRM_IOCTL_MSM_GEM_SYNC_CACHE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_SYNC_CACHE, struct drm_msm_gem_sync_cache)
>
> #if defined(__cplusplus)
> }
> --
> 2.26.1
>
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
^ permalink raw reply [flat|nested] 20+ messages in thread
* [RESEND PATCH v2 5/5] drm/msm: bump up the uapi version
2020-11-14 15:17 [RESEND PATCH v2 0/5] drm/msm: support for host-cached BOs Jonathan Marek
` (3 preceding siblings ...)
2020-11-14 15:17 ` [RESEND PATCH v2 4/5] drm/msm: add DRM_MSM_GEM_SYNC_CACHE for non-coherent cache maintenance Jonathan Marek
@ 2020-11-14 15:17 ` Jonathan Marek
4 siblings, 0 replies; 20+ messages in thread
From: Jonathan Marek @ 2020-11-14 15:17 UTC (permalink / raw)
To: freedreno, hch
Cc: Rob Clark, Sean Paul, David Airlie, Daniel Vetter,
open list:DRM DRIVER FOR MSM ADRENO GPU,
open list:DRM DRIVER FOR MSM ADRENO GPU, open list
Increase the minor version to indicate the presence of new features.
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
---
drivers/gpu/drm/msm/msm_drv.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 3f17acdf6594..7230d3c0eee5 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -39,9 +39,10 @@
* GEM object's debug name
* - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
* - 1.6.0 - Syncobj support
+ * - 1.7.0 - MSM_BO_CACHED_COHERENT and DRM_IOCTL_MSM_GEM_SYNC_CACHE
*/
#define MSM_VERSION_MAJOR 1
-#define MSM_VERSION_MINOR 6
+#define MSM_VERSION_MINOR 7
#define MSM_VERSION_PATCHLEVEL 0
static const struct drm_mode_config_funcs mode_config_funcs = {
--
2.26.1
^ permalink raw reply related [flat|nested] 20+ messages in thread