All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mikko Perttunen <mperttunen@nvidia.com>
To: thierry.reding@gmail.com, jonathanh@nvidia.com, digetx@gmail.com,
	airlied@linux.ie, daniel@ffwll.ch
Cc: dri-devel@lists.freedesktop.org, linux-tegra@vger.kernel.org,
	Mikko Perttunen <mperttunen@nvidia.com>
Subject: [PATCH v7 13/15] drm/tegra: Implement job submission part of new UAPI
Date: Thu, 10 Jun 2021 14:04:54 +0300	[thread overview]
Message-ID: <20210610110456.3692391-14-mperttunen@nvidia.com> (raw)
In-Reply-To: <20210610110456.3692391-1-mperttunen@nvidia.com>

Implement the job submission IOCTL with a minimum feature set.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
v7:
* Allocate gather BO with DMA API to get page-aligned
  memory
* Add error prints to a few places where they were missing
v6:
* Remove sgt bypass path in gather_bo - this would cause
  cache maintenance to be skipped and is unnecessary in
  general.
* Changes related to moving to using syncpoint IDs
* Add syncobj related code
* Print warning on submit failure describing the issue
* Use ARCH_DMA_ADDR_T_64BIT to check if that is indeed
  the case
* Add support for relative syncpoint wait
* Use pm_runtime_resume_and_get
* Only try to resume engines that support runtime PM
* Removed uapi subdirectory
* Don't use "copy_err" variables for copy_from_user
  return value
* Fix setting of blocklinear flag
v5:
* Add 16K size limit to copies from userspace.
* Guard RELOC_BLOCKLINEAR flag handling to only exist in ARM64
  to prevent oversized shift on 32-bit platforms.
v4:
* Remove all features that are not strictly necessary.
* Split into two patches.
v3:
* Remove WRITE_RELOC. Relocations are now patched implicitly
  when patching is needed.
* Directly call PM runtime APIs on devices instead of using
  power_on/power_off callbacks.
* Remove incorrect mutex unlock in tegra_drm_ioctl_channel_open
* Use XA_FLAGS_ALLOC1 instead of XA_FLAGS_ALLOC
* Accommodate for removal of timeout field and inlining of
  syncpt_incrs array.
* Copy entire user arrays at a time instead of going through
  elements one-by-one.
* Implement waiting of DMA reservations.
* Split out gather_bo implementation into a separate file.
* Fix length parameter passed to sg_init_one in gather_bo
* Cosmetic cleanup.
---
 drivers/gpu/drm/tegra/Makefile    |   2 +
 drivers/gpu/drm/tegra/drm.c       |   4 +-
 drivers/gpu/drm/tegra/gather_bo.c |  82 +++++
 drivers/gpu/drm/tegra/gather_bo.h |  24 ++
 drivers/gpu/drm/tegra/submit.c    | 549 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/submit.h    |  17 +
 6 files changed, 677 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tegra/gather_bo.c
 create mode 100644 drivers/gpu/drm/tegra/gather_bo.h
 create mode 100644 drivers/gpu/drm/tegra/submit.c
 create mode 100644 drivers/gpu/drm/tegra/submit.h

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index 783475ffd943..ab4289d1c991 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -4,6 +4,8 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
 tegra-drm-y := \
 	drm.o \
 	uapi.o \
+	submit.o \
+	gather_bo.o \
 	gem.o \
 	fb.o \
 	dp.o \
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 07c332a4fa03..bf5cb553d0ae 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -728,6 +728,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap,
 			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit,
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free,
@@ -852,7 +854,7 @@ static void tegra_debugfs_init(struct drm_minor *minor)
 
 static const struct drm_driver tegra_drm_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM |
-			   DRIVER_ATOMIC | DRIVER_RENDER,
+			   DRIVER_ATOMIC | DRIVER_RENDER | DRIVER_SYNCOBJ,
 	.open = tegra_drm_open,
 	.postclose = tegra_drm_postclose,
 	.lastclose = drm_fb_helper_lastclose,
diff --git a/drivers/gpu/drm/tegra/gather_bo.c b/drivers/gpu/drm/tegra/gather_bo.c
new file mode 100644
index 000000000000..a60edcf6737a
--- /dev/null
+++ b/drivers/gpu/drm/tegra/gather_bo.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include "gather_bo.h"
+
+static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_get(&bo->ref);
+
+	return host_bo;
+}
+
+static void gather_bo_release(struct kref *ref)
+{
+	struct gather_bo *bo = container_of(ref, struct gather_bo, ref);
+
+	dma_free_attrs(bo->drm_dev, bo->gather_data_words * 4, bo->gather_data,
+		       bo->gather_data_dma, 0);
+	kfree(bo);
+}
+
+void gather_bo_put(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_put(&bo->ref, gather_bo_release);
+}
+
+static struct sg_table *
+gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+	struct sg_table *sgt;
+	int err;
+
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	err = dma_get_sgtable(bo->drm_dev, sgt, bo->gather_data,
+			      bo->gather_data_dma, bo->gather_data_words * 4);
+	if (err) {
+		kfree(sgt);
+		return ERR_PTR(err);
+	}
+
+	return sgt;
+}
+
+static void gather_bo_unpin(struct device *dev, struct sg_table *sgt)
+{
+	if (sgt) {
+		sg_free_table(sgt);
+		kfree(sgt);
+	}
+}
+
+static void *gather_bo_mmap(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	return bo->gather_data;
+}
+
+static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr)
+{
+}
+
+const struct host1x_bo_ops gather_bo_ops = {
+	.get = gather_bo_get,
+	.put = gather_bo_put,
+	.pin = gather_bo_pin,
+	.unpin = gather_bo_unpin,
+	.mmap = gather_bo_mmap,
+	.munmap = gather_bo_munmap,
+};
diff --git a/drivers/gpu/drm/tegra/gather_bo.h b/drivers/gpu/drm/tegra/gather_bo.h
new file mode 100644
index 000000000000..e7b6564eafbb
--- /dev/null
+++ b/drivers/gpu/drm/tegra/gather_bo.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _TEGRA_DRM_SUBMIT_GATHER_BO_H
+#define _TEGRA_DRM_SUBMIT_GATHER_BO_H
+
+#include <linux/host1x.h>
+#include <linux/kref.h>
+
+struct gather_bo {
+	struct host1x_bo base;
+
+	struct kref ref;
+
+	struct device *drm_dev;
+	u32 *gather_data;
+	dma_addr_t gather_data_dma;
+	size_t gather_data_words;
+};
+
+extern const struct host1x_bo_ops gather_bo_ops;
+void gather_bo_put(struct host1x_bo *host_bo);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
new file mode 100644
index 000000000000..e3200c10ca9e
--- /dev/null
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/dma-fence-array.h>
+#include <linux/file.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/nospec.h>
+#include <linux/pm_runtime.h>
+#include <linux/sync_file.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
+
+#include "drm.h"
+#include "gather_bo.h"
+#include "gem.h"
+#include "submit.h"
+#include "uapi.h"
+
+#define SUBMIT_ERR(ctx, fmt, ...) \
+	dev_err_ratelimited(ctx->client->base.dev, \
+		"%s: job submission failed: " fmt "\n", \
+		current->comm __VA_OPT__(,) __VA_ARGS__)
+
+static struct tegra_drm_mapping *
+tegra_drm_mapping_get(struct tegra_drm_context *ctx, u32 id)
+{
+	struct tegra_drm_mapping *mapping;
+
+	xa_lock(&ctx->mappings);
+	mapping = xa_load(&ctx->mappings, id);
+	if (mapping)
+		kref_get(&mapping->ref);
+	xa_unlock(&ctx->mappings);
+
+	return mapping;
+}
+
+static void *alloc_copy_user_array(void __user *from, size_t count, size_t size)
+{
+	size_t copy_len;
+	void *data;
+
+	if (check_mul_overflow(count, size, &copy_len))
+		return ERR_PTR(-EINVAL);
+
+	if (copy_len > 0x4000)
+		return ERR_PTR(-E2BIG);
+
+	data = kvmalloc(copy_len, GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_user(data, from, copy_len)) {
+		kvfree(data);
+		return ERR_PTR(-EFAULT);
+	}
+
+	return data;
+}
+
+static int submit_copy_gather_data(struct gather_bo **pbo,
+				   struct device *drm_dev,
+				   struct tegra_drm_context *ctx,
+				   struct drm_tegra_channel_submit *args)
+{
+	struct gather_bo *bo;
+	size_t copy_len;
+
+	if (args->gather_data_words == 0) {
+		SUBMIT_ERR(ctx, "gather_data_words cannot be zero");
+		return -EINVAL;
+	}
+
+	if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, &copy_len)) {
+		SUBMIT_ERR(ctx, "gather_data_words is too large");
+		return -EINVAL;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for bo info");
+		return -ENOMEM;
+	}
+
+	kref_init(&bo->ref);
+	host1x_bo_init(&bo->base, &gather_bo_ops);
+
+	bo->drm_dev = drm_dev;
+	bo->gather_data =
+		dma_alloc_attrs(drm_dev, copy_len, &bo->gather_data_dma,
+				GFP_KERNEL | __GFP_NOWARN, 0);
+	if (!bo->gather_data) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for gather data");
+		kfree(bo);
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(bo->gather_data,
+			   u64_to_user_ptr(args->gather_data_ptr), copy_len)) {
+		SUBMIT_ERR(ctx, "failed to copy gather data from userspace");
+		dma_free_attrs(drm_dev, copy_len, bo->gather_data, bo->gather_data_dma, 0);
+		kfree(bo);
+		return -EFAULT;
+	}
+
+	bo->gather_data_words = args->gather_data_words;
+
+	*pbo = bo;
+
+	return 0;
+}
+
+static int submit_write_reloc(struct tegra_drm_context *ctx,
+			      struct gather_bo *bo,
+			      struct drm_tegra_submit_buf *buf,
+			      struct tegra_drm_mapping *mapping)
+{
+	/* TODO check that target_offset is within bounds */
+	dma_addr_t iova = mapping->iova + buf->reloc.target_offset;
+	u32 written_ptr;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (buf->flags & DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR)
+		iova |= BIT_ULL(39);
+#endif
+
+	written_ptr = iova >> buf->reloc.shift;
+
+	if (buf->reloc.gather_offset_words >= bo->gather_data_words) {
+		SUBMIT_ERR(ctx,
+			   "relocation has too large gather offset (%u vs gather length %zu)",
+			   buf->reloc.gather_offset_words, bo->gather_data_words);
+		return -EINVAL;
+	}
+
+	buf->reloc.gather_offset_words = array_index_nospec(
+		buf->reloc.gather_offset_words, bo->gather_data_words);
+
+	bo->gather_data[buf->reloc.gather_offset_words] = written_ptr;
+
+	return 0;
+}
+
+static int submit_process_bufs(struct tegra_drm_context *ctx,
+			       struct gather_bo *bo,
+			       struct drm_tegra_channel_submit *args,
+			       struct tegra_drm_submit_data *job_data)
+{
+	struct tegra_drm_used_mapping *mappings;
+	struct drm_tegra_submit_buf *bufs;
+	int err;
+	u32 i;
+
+	bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr),
+				     args->num_bufs, sizeof(*bufs));
+	if (IS_ERR(bufs)) {
+		SUBMIT_ERR(ctx, "failed to copy bufs array from userspace");
+		return PTR_ERR(bufs);
+	}
+
+	mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL);
+	if (!mappings) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for mapping info");
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < args->num_bufs; i++) {
+		struct drm_tegra_submit_buf *buf = &bufs[i];
+		struct tegra_drm_mapping *mapping;
+
+		if (buf->flags & ~DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR) {
+			SUBMIT_ERR(ctx, "invalid flag specified for buf");
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		mapping = tegra_drm_mapping_get(ctx, buf->mapping_id);
+		if (!mapping) {
+			SUBMIT_ERR(ctx, "invalid mapping_id for buf '%u'",
+				   buf->mapping_id);
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		err = submit_write_reloc(ctx, bo, buf, mapping);
+		if (err) {
+			tegra_drm_mapping_put(mapping);
+			goto drop_refs;
+		}
+
+		mappings[i].mapping = mapping;
+		mappings[i].flags = buf->flags;
+	}
+
+	job_data->used_mappings = mappings;
+	job_data->num_used_mappings = i;
+
+	err = 0;
+
+	goto done;
+
+drop_refs:
+	for (;;) {
+		if (i-- == 0)
+			break;
+
+		tegra_drm_mapping_put(mappings[i].mapping);
+	}
+
+	kfree(mappings);
+	job_data->used_mappings = NULL;
+
+done:
+	kvfree(bufs);
+
+	return err;
+}
+
+static int submit_get_syncpt(struct tegra_drm_context *ctx,
+			     struct host1x_job *job, struct xarray *syncpoints,
+			     struct drm_tegra_channel_submit *args)
+{
+	struct host1x_syncpt *sp;
+
+	if (args->syncpt_incr.flags) {
+		SUBMIT_ERR(ctx, "invalid flag specified for syncpt_incr");
+		return -EINVAL;
+	}
+
+	/* Syncpt ref will be dropped on job release */
+	sp = xa_load(syncpoints, args->syncpt_incr.id);
+	if (!sp) {
+		SUBMIT_ERR(ctx, "syncpoint specified in syncpt_incr was not allocated");
+		return -EINVAL;
+	}
+
+	job->syncpt = host1x_syncpt_get(sp);
+	job->syncpt_incrs = args->syncpt_incr.num_incrs;
+
+	return 0;
+}
+
+static int submit_job_add_gather(struct host1x_job *job,
+				 struct tegra_drm_context *ctx,
+				 struct drm_tegra_submit_cmd_gather_uptr *cmd,
+				 struct gather_bo *bo, u32 *offset,
+				 struct tegra_drm_submit_data *job_data)
+{
+	u32 next_offset;
+
+	if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2]) {
+		SUBMIT_ERR(ctx, "non-zero reserved field in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	/* Check for maximum gather size */
+	if (cmd->words > 16383) {
+		SUBMIT_ERR(ctx, "too many words in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	if (check_add_overflow(*offset, cmd->words, &next_offset)) {
+		SUBMIT_ERR(ctx, "too many total words in job");
+		return -EINVAL;
+	}
+
+	if (next_offset > bo->gather_data_words) {
+		SUBMIT_ERR(ctx, "GATHER_UPTR command overflows gather data");
+		return -EINVAL;
+	}
+
+	host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4);
+
+	*offset = next_offset;
+
+	return 0;
+}
+
+static int submit_create_job(struct host1x_job **pjob,
+			     struct tegra_drm_context *ctx,
+			     struct gather_bo *bo,
+			     struct drm_tegra_channel_submit *args,
+			     struct tegra_drm_submit_data *job_data,
+			     struct xarray *syncpoints)
+{
+	struct drm_tegra_submit_cmd *cmds;
+	u32 i, gather_offset = 0, class;
+	struct host1x_job *job;
+	int err;
+
+	/* Set initial class for firewall. */
+	class = ctx->client->base.class;
+
+	cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr),
+				     args->num_cmds, sizeof(*cmds));
+	if (IS_ERR(cmds)) {
+		SUBMIT_ERR(ctx, "failed to copy cmds array from userspace");
+		return PTR_ERR(cmds);
+	}
+
+	job = host1x_job_alloc(ctx->channel, args->num_cmds, 0, true);
+	if (!job) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for job");
+		err = -ENOMEM;
+		goto done;
+	}
+
+	err = submit_get_syncpt(ctx, job, syncpoints, args);
+	if (err < 0)
+		goto free_job;
+
+	job->client = &ctx->client->base;
+	job->class = ctx->client->base.class;
+	job->serialize = true;
+
+	for (i = 0; i < args->num_cmds; i++) {
+		struct drm_tegra_submit_cmd *cmd = &cmds[i];
+
+		if (cmd->flags) {
+			SUBMIT_ERR(ctx, "unknown flags given for cmd");
+			err = -EINVAL;
+			goto free_job;
+		}
+
+		if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) {
+			err = submit_job_add_gather(job, ctx,
+						    &cmd->gather_uptr, bo,
+						    &gather_offset, job_data);
+			if (err)
+				goto free_job;
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) {
+			if (cmd->wait_syncpt.reserved[0] ||
+			    cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(ctx, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id,
+					    cmd->wait_syncpt.threshold,
+					    false,
+					    class);
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE) {
+			if (cmd->wait_syncpt.reserved[0] ||
+			    cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(ctx, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			if (cmd->wait_syncpt.id != args->syncpt_incr.id) {
+				SUBMIT_ERR(ctx, "syncpoint ID in CMD_WAIT_SYNCPT_RELATIVE is not used by the job");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id,
+					    cmd->wait_syncpt.threshold,
+					    true,
+					    class);
+		} else {
+			SUBMIT_ERR(ctx, "unknown cmd type");
+			err = -EINVAL;
+			goto free_job;
+		}
+	}
+
+	if (gather_offset == 0) {
+		SUBMIT_ERR(ctx, "job must have at least one gather");
+		err = -EINVAL;
+		goto free_job;
+	}
+
+	*pjob = job;
+
+	err = 0;
+	goto done;
+
+free_job:
+	host1x_job_put(job);
+
+done:
+	kvfree(cmds);
+
+	return err;
+}
+
+static void release_job(struct host1x_job *job)
+{
+	struct tegra_drm_client *client =
+		container_of(job->client, struct tegra_drm_client, base);
+	struct tegra_drm_submit_data *job_data = job->user_data;
+	u32 i;
+
+	for (i = 0; i < job_data->num_used_mappings; i++)
+		tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+
+	kfree(job_data->used_mappings);
+	kfree(job_data);
+
+	if (pm_runtime_enabled(client->base.dev))
+		pm_runtime_put_autosuspend(client->base.dev);
+}
+
+int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
+				   struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_submit *args = data;
+	struct tegra_drm_submit_data *job_data;
+	struct drm_syncobj *syncobj = NULL;
+	struct tegra_drm_context *ctx;
+	struct host1x_job *job;
+	struct gather_bo *bo;
+	u32 i;
+	int err;
+
+	mutex_lock(&fpriv->lock);
+	ctx = xa_load(&fpriv->contexts, args->channel_ctx);
+	if (!ctx) {
+		mutex_unlock(&fpriv->lock);
+		pr_err_ratelimited("%s: %s: invalid channel_ctx '%d'", __func__,
+			current->comm, args->channel_ctx);
+		return -EINVAL;
+	}
+
+	if (args->syncobj_in) {
+		struct dma_fence *fence;
+
+		err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence);
+		if (err) {
+			SUBMIT_ERR(ctx, "invalid syncobj_in '%d'", args->syncobj_in);
+			goto unlock;
+		}
+
+		err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000));
+		dma_fence_put(fence);
+		if (err) {
+			SUBMIT_ERR(ctx, "wait for syncobj_in timed out");
+			goto unlock;
+		}
+	}
+
+	if (args->syncobj_out) {
+		syncobj = drm_syncobj_find(file, args->syncobj_out);
+		if (!syncobj) {
+			SUBMIT_ERR(ctx, "invalid syncobj_out '%d'", args->syncobj_out);
+			err = -ENOENT;
+			goto unlock;
+		}
+	}
+
+	/* Allocate gather BO and copy gather words in. */
+	err = submit_copy_gather_data(&bo, drm->dev, ctx, args);
+	if (err)
+		goto unlock;
+
+	job_data = kzalloc(sizeof(*job_data), GFP_KERNEL);
+	if (!job_data) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for job data");
+		err = -ENOMEM;
+		goto put_bo;
+	}
+
+	/* Get data buffer mappings and do relocation patching. */
+	err = submit_process_bufs(ctx, bo, args, job_data);
+	if (err)
+		goto free_job_data;
+
+	/* Allocate host1x_job and add gathers and waits to it. */
+	err = submit_create_job(&job, ctx, bo, args, job_data,
+				&fpriv->syncpoints);
+	if (err)
+		goto free_job_data;
+
+	/* Map gather data for Host1x. */
+	err = host1x_job_pin(job, ctx->client->base.dev);
+	if (err) {
+		SUBMIT_ERR(ctx, "failed to pin job: %d", err);
+		goto put_job;
+	}
+
+	/* Boot engine. */
+	if (pm_runtime_enabled(ctx->client->base.dev)) {
+		err = pm_runtime_resume_and_get(ctx->client->base.dev);
+		if (err < 0) {
+			SUBMIT_ERR(ctx, "could not power up engine: %d", err);
+			goto unpin_job;
+		}
+	}
+
+	job->user_data = job_data;
+	job->release = release_job;
+	job->timeout = 10000;
+
+	/*
+	 * job_data is now part of job reference counting, so don't release
+	 * it from here.
+	 */
+	job_data = NULL;
+
+	/* Submit job to hardware. */
+	err = host1x_job_submit(job);
+	if (err) {
+		SUBMIT_ERR(ctx, "host1x job submission failed: %d", err);
+		goto unpin_job;
+	}
+
+	/* Return postfences to userspace and add fences to DMA reservations. */
+	args->syncpt_incr.fence_value = job->syncpt_end;
+
+	if (syncobj) {
+		struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			SUBMIT_ERR(ctx, "failed to create postfence: %d", err);
+		}
+
+		drm_syncobj_replace_fence(syncobj, fence);
+	}
+
+	goto put_job;
+
+unpin_job:
+	host1x_job_unpin(job);
+put_job:
+	host1x_job_put(job);
+free_job_data:
+	if (job_data && job_data->used_mappings) {
+		for (i = 0; i < job_data->num_used_mappings; i++)
+			tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+		kfree(job_data->used_mappings);
+	}
+	if (job_data)
+		kfree(job_data);
+put_bo:
+	gather_bo_put(&bo->base);
+unlock:
+	if (syncobj)
+		drm_syncobj_put(syncobj);
+	mutex_unlock(&fpriv->lock);
+	return err;
+}
diff --git a/drivers/gpu/drm/tegra/submit.h b/drivers/gpu/drm/tegra/submit.h
new file mode 100644
index 000000000000..0a165e9e4bda
--- /dev/null
+++ b/drivers/gpu/drm/tegra/submit.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _TEGRA_DRM_UAPI_SUBMIT_H
+#define _TEGRA_DRM_UAPI_SUBMIT_H
+
+struct tegra_drm_used_mapping {
+	struct tegra_drm_mapping *mapping;
+	u32 flags;
+};
+
+struct tegra_drm_submit_data {
+	struct tegra_drm_used_mapping *used_mappings;
+	u32 num_used_mappings;
+};
+
+#endif
-- 
2.30.1


WARNING: multiple messages have this Message-ID (diff)
From: Mikko Perttunen <mperttunen@nvidia.com>
To: thierry.reding@gmail.com, jonathanh@nvidia.com, digetx@gmail.com,
	airlied@linux.ie, daniel@ffwll.ch
Cc: linux-tegra@vger.kernel.org, dri-devel@lists.freedesktop.org,
	Mikko Perttunen <mperttunen@nvidia.com>
Subject: [PATCH v7 13/15] drm/tegra: Implement job submission part of new UAPI
Date: Thu, 10 Jun 2021 14:04:54 +0300	[thread overview]
Message-ID: <20210610110456.3692391-14-mperttunen@nvidia.com> (raw)
In-Reply-To: <20210610110456.3692391-1-mperttunen@nvidia.com>

Implement the job submission IOCTL with a minimum feature set.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
v7:
* Allocate gather BO with DMA API to get page-aligned
  memory
* Add error prints to a few places where they were missing
v6:
* Remove sgt bypass path in gather_bo - this would cause
  cache maintenance to be skipped and is unnecessary in
  general.
* Changes related to moving to using syncpoint IDs
* Add syncobj related code
* Print warning on submit failure describing the issue
* Use ARCH_DMA_ADDR_T_64BIT to check if that is indeed
  the case
* Add support for relative syncpoint wait
* Use pm_runtime_resume_and_get
* Only try to resume engines that support runtime PM
* Removed uapi subdirectory
* Don't use "copy_err" variables for copy_from_user
  return value
* Fix setting of blocklinear flag
v5:
* Add 16K size limit to copies from userspace.
* Guard RELOC_BLOCKLINEAR flag handling to only exist in ARM64
  to prevent oversized shift on 32-bit platforms.
v4:
* Remove all features that are not strictly necessary.
* Split into two patches.
v3:
* Remove WRITE_RELOC. Relocations are now patched implicitly
  when patching is needed.
* Directly call PM runtime APIs on devices instead of using
  power_on/power_off callbacks.
* Remove incorrect mutex unlock in tegra_drm_ioctl_channel_open
* Use XA_FLAGS_ALLOC1 instead of XA_FLAGS_ALLOC
* Accommodate for removal of timeout field and inlining of
  syncpt_incrs array.
* Copy entire user arrays at a time instead of going through
  elements one-by-one.
* Implement waiting of DMA reservations.
* Split out gather_bo implementation into a separate file.
* Fix length parameter passed to sg_init_one in gather_bo
* Cosmetic cleanup.
---
 drivers/gpu/drm/tegra/Makefile    |   2 +
 drivers/gpu/drm/tegra/drm.c       |   4 +-
 drivers/gpu/drm/tegra/gather_bo.c |  82 +++++
 drivers/gpu/drm/tegra/gather_bo.h |  24 ++
 drivers/gpu/drm/tegra/submit.c    | 549 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/submit.h    |  17 +
 6 files changed, 677 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/tegra/gather_bo.c
 create mode 100644 drivers/gpu/drm/tegra/gather_bo.h
 create mode 100644 drivers/gpu/drm/tegra/submit.c
 create mode 100644 drivers/gpu/drm/tegra/submit.h

diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index 783475ffd943..ab4289d1c991 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -4,6 +4,8 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
 tegra-drm-y := \
 	drm.o \
 	uapi.o \
+	submit.o \
+	gather_bo.o \
 	gem.o \
 	fb.o \
 	dp.o \
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 07c332a4fa03..bf5cb553d0ae 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -728,6 +728,8 @@ static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap,
 			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit,
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate,
 			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free,
@@ -852,7 +854,7 @@ static void tegra_debugfs_init(struct drm_minor *minor)
 
 static const struct drm_driver tegra_drm_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM |
-			   DRIVER_ATOMIC | DRIVER_RENDER,
+			   DRIVER_ATOMIC | DRIVER_RENDER | DRIVER_SYNCOBJ,
 	.open = tegra_drm_open,
 	.postclose = tegra_drm_postclose,
 	.lastclose = drm_fb_helper_lastclose,
diff --git a/drivers/gpu/drm/tegra/gather_bo.c b/drivers/gpu/drm/tegra/gather_bo.c
new file mode 100644
index 000000000000..a60edcf6737a
--- /dev/null
+++ b/drivers/gpu/drm/tegra/gather_bo.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include "gather_bo.h"
+
+static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_get(&bo->ref);
+
+	return host_bo;
+}
+
+static void gather_bo_release(struct kref *ref)
+{
+	struct gather_bo *bo = container_of(ref, struct gather_bo, ref);
+
+	dma_free_attrs(bo->drm_dev, bo->gather_data_words * 4, bo->gather_data,
+		       bo->gather_data_dma, 0);
+	kfree(bo);
+}
+
+void gather_bo_put(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_put(&bo->ref, gather_bo_release);
+}
+
+static struct sg_table *
+gather_bo_pin(struct device *dev, struct host1x_bo *host_bo, dma_addr_t *phys)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+	struct sg_table *sgt;
+	int err;
+
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	err = dma_get_sgtable(bo->drm_dev, sgt, bo->gather_data,
+			      bo->gather_data_dma, bo->gather_data_words * 4);
+	if (err) {
+		kfree(sgt);
+		return ERR_PTR(err);
+	}
+
+	return sgt;
+}
+
+static void gather_bo_unpin(struct device *dev, struct sg_table *sgt)
+{
+	if (sgt) {
+		sg_free_table(sgt);
+		kfree(sgt);
+	}
+}
+
+static void *gather_bo_mmap(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	return bo->gather_data;
+}
+
+static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr)
+{
+}
+
+const struct host1x_bo_ops gather_bo_ops = {
+	.get = gather_bo_get,
+	.put = gather_bo_put,
+	.pin = gather_bo_pin,
+	.unpin = gather_bo_unpin,
+	.mmap = gather_bo_mmap,
+	.munmap = gather_bo_munmap,
+};
diff --git a/drivers/gpu/drm/tegra/gather_bo.h b/drivers/gpu/drm/tegra/gather_bo.h
new file mode 100644
index 000000000000..e7b6564eafbb
--- /dev/null
+++ b/drivers/gpu/drm/tegra/gather_bo.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _TEGRA_DRM_SUBMIT_GATHER_BO_H
+#define _TEGRA_DRM_SUBMIT_GATHER_BO_H
+
+#include <linux/host1x.h>
+#include <linux/kref.h>
+
+struct gather_bo {
+	struct host1x_bo base;
+
+	struct kref ref;
+
+	struct device *drm_dev;
+	u32 *gather_data;
+	dma_addr_t gather_data_dma;
+	size_t gather_data_words;
+};
+
+extern const struct host1x_bo_ops gather_bo_ops;
+void gather_bo_put(struct host1x_bo *host_bo);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
new file mode 100644
index 000000000000..e3200c10ca9e
--- /dev/null
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/dma-fence-array.h>
+#include <linux/file.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/nospec.h>
+#include <linux/pm_runtime.h>
+#include <linux/sync_file.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
+
+#include "drm.h"
+#include "gather_bo.h"
+#include "gem.h"
+#include "submit.h"
+#include "uapi.h"
+
+#define SUBMIT_ERR(ctx, fmt, ...) \
+	dev_err_ratelimited(ctx->client->base.dev, \
+		"%s: job submission failed: " fmt "\n", \
+		current->comm __VA_OPT__(,) __VA_ARGS__)
+
+static struct tegra_drm_mapping *
+tegra_drm_mapping_get(struct tegra_drm_context *ctx, u32 id)
+{
+	struct tegra_drm_mapping *mapping;
+
+	xa_lock(&ctx->mappings);
+	mapping = xa_load(&ctx->mappings, id);
+	if (mapping)
+		kref_get(&mapping->ref);
+	xa_unlock(&ctx->mappings);
+
+	return mapping;
+}
+
+static void *alloc_copy_user_array(void __user *from, size_t count, size_t size)
+{
+	size_t copy_len;
+	void *data;
+
+	if (check_mul_overflow(count, size, &copy_len))
+		return ERR_PTR(-EINVAL);
+
+	if (copy_len > 0x4000)
+		return ERR_PTR(-E2BIG);
+
+	data = kvmalloc(copy_len, GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_user(data, from, copy_len)) {
+		kvfree(data);
+		return ERR_PTR(-EFAULT);
+	}
+
+	return data;
+}
+
+static int submit_copy_gather_data(struct gather_bo **pbo,
+				   struct device *drm_dev,
+				   struct tegra_drm_context *ctx,
+				   struct drm_tegra_channel_submit *args)
+{
+	struct gather_bo *bo;
+	size_t copy_len;
+
+	if (args->gather_data_words == 0) {
+		SUBMIT_ERR(ctx, "gather_data_words cannot be zero");
+		return -EINVAL;
+	}
+
+	if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, &copy_len)) {
+		SUBMIT_ERR(ctx, "gather_data_words is too large");
+		return -EINVAL;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for bo info");
+		return -ENOMEM;
+	}
+
+	kref_init(&bo->ref);
+	host1x_bo_init(&bo->base, &gather_bo_ops);
+
+	bo->drm_dev = drm_dev;
+	bo->gather_data =
+		dma_alloc_attrs(drm_dev, copy_len, &bo->gather_data_dma,
+				GFP_KERNEL | __GFP_NOWARN, 0);
+	if (!bo->gather_data) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for gather data");
+		kfree(bo);
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(bo->gather_data,
+			   u64_to_user_ptr(args->gather_data_ptr), copy_len)) {
+		SUBMIT_ERR(ctx, "failed to copy gather data from userspace");
+		dma_free_attrs(drm_dev, copy_len, bo->gather_data, bo->gather_data_dma, 0);
+		kfree(bo);
+		return -EFAULT;
+	}
+
+	bo->gather_data_words = args->gather_data_words;
+
+	*pbo = bo;
+
+	return 0;
+}
+
+static int submit_write_reloc(struct tegra_drm_context *ctx,
+			      struct gather_bo *bo,
+			      struct drm_tegra_submit_buf *buf,
+			      struct tegra_drm_mapping *mapping)
+{
+	/* TODO check that target_offset is within bounds */
+	dma_addr_t iova = mapping->iova + buf->reloc.target_offset;
+	u32 written_ptr;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (buf->flags & DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR)
+		iova |= BIT_ULL(39);
+#endif
+
+	written_ptr = iova >> buf->reloc.shift;
+
+	if (buf->reloc.gather_offset_words >= bo->gather_data_words) {
+		SUBMIT_ERR(ctx,
+			   "relocation has too large gather offset (%u vs gather length %zu)",
+			   buf->reloc.gather_offset_words, bo->gather_data_words);
+		return -EINVAL;
+	}
+
+	buf->reloc.gather_offset_words = array_index_nospec(
+		buf->reloc.gather_offset_words, bo->gather_data_words);
+
+	bo->gather_data[buf->reloc.gather_offset_words] = written_ptr;
+
+	return 0;
+}
+
+static int submit_process_bufs(struct tegra_drm_context *ctx,
+			       struct gather_bo *bo,
+			       struct drm_tegra_channel_submit *args,
+			       struct tegra_drm_submit_data *job_data)
+{
+	struct tegra_drm_used_mapping *mappings;
+	struct drm_tegra_submit_buf *bufs;
+	int err;
+	u32 i;
+
+	bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr),
+				     args->num_bufs, sizeof(*bufs));
+	if (IS_ERR(bufs)) {
+		SUBMIT_ERR(ctx, "failed to copy bufs array from userspace");
+		return PTR_ERR(bufs);
+	}
+
+	mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL);
+	if (!mappings) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for mapping info");
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < args->num_bufs; i++) {
+		struct drm_tegra_submit_buf *buf = &bufs[i];
+		struct tegra_drm_mapping *mapping;
+
+		if (buf->flags & ~DRM_TEGRA_SUBMIT_BUF_RELOC_BLOCKLINEAR) {
+			SUBMIT_ERR(ctx, "invalid flag specified for buf");
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		mapping = tegra_drm_mapping_get(ctx, buf->mapping_id);
+		if (!mapping) {
+			SUBMIT_ERR(ctx, "invalid mapping_id for buf '%u'",
+				   buf->mapping_id);
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		err = submit_write_reloc(ctx, bo, buf, mapping);
+		if (err) {
+			tegra_drm_mapping_put(mapping);
+			goto drop_refs;
+		}
+
+		mappings[i].mapping = mapping;
+		mappings[i].flags = buf->flags;
+	}
+
+	job_data->used_mappings = mappings;
+	job_data->num_used_mappings = i;
+
+	err = 0;
+
+	goto done;
+
+drop_refs:
+	for (;;) {
+		if (i-- == 0)
+			break;
+
+		tegra_drm_mapping_put(mappings[i].mapping);
+	}
+
+	kfree(mappings);
+	job_data->used_mappings = NULL;
+
+done:
+	kvfree(bufs);
+
+	return err;
+}
+
+static int submit_get_syncpt(struct tegra_drm_context *ctx,
+			     struct host1x_job *job, struct xarray *syncpoints,
+			     struct drm_tegra_channel_submit *args)
+{
+	struct host1x_syncpt *sp;
+
+	if (args->syncpt_incr.flags) {
+		SUBMIT_ERR(ctx, "invalid flag specified for syncpt_incr");
+		return -EINVAL;
+	}
+
+	/* Syncpt ref will be dropped on job release */
+	sp = xa_load(syncpoints, args->syncpt_incr.id);
+	if (!sp) {
+		SUBMIT_ERR(ctx, "syncpoint specified in syncpt_incr was not allocated");
+		return -EINVAL;
+	}
+
+	job->syncpt = host1x_syncpt_get(sp);
+	job->syncpt_incrs = args->syncpt_incr.num_incrs;
+
+	return 0;
+}
+
+static int submit_job_add_gather(struct host1x_job *job,
+				 struct tegra_drm_context *ctx,
+				 struct drm_tegra_submit_cmd_gather_uptr *cmd,
+				 struct gather_bo *bo, u32 *offset,
+				 struct tegra_drm_submit_data *job_data)
+{
+	u32 next_offset;
+
+	if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2]) {
+		SUBMIT_ERR(ctx, "non-zero reserved field in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	/* Check for maximum gather size */
+	if (cmd->words > 16383) {
+		SUBMIT_ERR(ctx, "too many words in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	if (check_add_overflow(*offset, cmd->words, &next_offset)) {
+		SUBMIT_ERR(ctx, "too many total words in job");
+		return -EINVAL;
+	}
+
+	if (next_offset > bo->gather_data_words) {
+		SUBMIT_ERR(ctx, "GATHER_UPTR command overflows gather data");
+		return -EINVAL;
+	}
+
+	host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4);
+
+	*offset = next_offset;
+
+	return 0;
+}
+
+static int submit_create_job(struct host1x_job **pjob,
+			     struct tegra_drm_context *ctx,
+			     struct gather_bo *bo,
+			     struct drm_tegra_channel_submit *args,
+			     struct tegra_drm_submit_data *job_data,
+			     struct xarray *syncpoints)
+{
+	struct drm_tegra_submit_cmd *cmds;
+	u32 i, gather_offset = 0, class;
+	struct host1x_job *job;
+	int err;
+
+	/* Set initial class for firewall. */
+	class = ctx->client->base.class;
+
+	cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr),
+				     args->num_cmds, sizeof(*cmds));
+	if (IS_ERR(cmds)) {
+		SUBMIT_ERR(ctx, "failed to copy cmds array from userspace");
+		return PTR_ERR(cmds);
+	}
+
+	job = host1x_job_alloc(ctx->channel, args->num_cmds, 0, true);
+	if (!job) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for job");
+		err = -ENOMEM;
+		goto done;
+	}
+
+	err = submit_get_syncpt(ctx, job, syncpoints, args);
+	if (err < 0)
+		goto free_job;
+
+	job->client = &ctx->client->base;
+	job->class = ctx->client->base.class;
+	job->serialize = true;
+
+	for (i = 0; i < args->num_cmds; i++) {
+		struct drm_tegra_submit_cmd *cmd = &cmds[i];
+
+		if (cmd->flags) {
+			SUBMIT_ERR(ctx, "unknown flags given for cmd");
+			err = -EINVAL;
+			goto free_job;
+		}
+
+		if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) {
+			err = submit_job_add_gather(job, ctx,
+						    &cmd->gather_uptr, bo,
+						    &gather_offset, job_data);
+			if (err)
+				goto free_job;
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) {
+			if (cmd->wait_syncpt.reserved[0] ||
+			    cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(ctx, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id,
+					    cmd->wait_syncpt.threshold,
+					    false,
+					    class);
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE) {
+			if (cmd->wait_syncpt.reserved[0] ||
+			    cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(ctx, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			if (cmd->wait_syncpt.id != args->syncpt_incr.id) {
+				SUBMIT_ERR(ctx, "syncpoint ID in CMD_WAIT_SYNCPT_RELATIVE is not used by the job");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id,
+					    cmd->wait_syncpt.threshold,
+					    true,
+					    class);
+		} else {
+			SUBMIT_ERR(ctx, "unknown cmd type");
+			err = -EINVAL;
+			goto free_job;
+		}
+	}
+
+	if (gather_offset == 0) {
+		SUBMIT_ERR(ctx, "job must have at least one gather");
+		err = -EINVAL;
+		goto free_job;
+	}
+
+	*pjob = job;
+
+	err = 0;
+	goto done;
+
+free_job:
+	host1x_job_put(job);
+
+done:
+	kvfree(cmds);
+
+	return err;
+}
+
+static void release_job(struct host1x_job *job)
+{
+	struct tegra_drm_client *client =
+		container_of(job->client, struct tegra_drm_client, base);
+	struct tegra_drm_submit_data *job_data = job->user_data;
+	u32 i;
+
+	for (i = 0; i < job_data->num_used_mappings; i++)
+		tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+
+	kfree(job_data->used_mappings);
+	kfree(job_data);
+
+	if (pm_runtime_enabled(client->base.dev))
+		pm_runtime_put_autosuspend(client->base.dev);
+}
+
+int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
+				   struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_submit *args = data;
+	struct tegra_drm_submit_data *job_data;
+	struct drm_syncobj *syncobj = NULL;
+	struct tegra_drm_context *ctx;
+	struct host1x_job *job;
+	struct gather_bo *bo;
+	u32 i;
+	int err;
+
+	mutex_lock(&fpriv->lock);
+	ctx = xa_load(&fpriv->contexts, args->channel_ctx);
+	if (!ctx) {
+		mutex_unlock(&fpriv->lock);
+		pr_err_ratelimited("%s: %s: invalid channel_ctx '%d'", __func__,
+			current->comm, args->channel_ctx);
+		return -EINVAL;
+	}
+
+	if (args->syncobj_in) {
+		struct dma_fence *fence;
+
+		err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence);
+		if (err) {
+			SUBMIT_ERR(ctx, "invalid syncobj_in '%d'", args->syncobj_in);
+			goto unlock;
+		}
+
+		err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000));
+		dma_fence_put(fence);
+		if (err) {
+			SUBMIT_ERR(ctx, "wait for syncobj_in timed out");
+			goto unlock;
+		}
+	}
+
+	if (args->syncobj_out) {
+		syncobj = drm_syncobj_find(file, args->syncobj_out);
+		if (!syncobj) {
+			SUBMIT_ERR(ctx, "invalid syncobj_out '%d'", args->syncobj_out);
+			err = -ENOENT;
+			goto unlock;
+		}
+	}
+
+	/* Allocate gather BO and copy gather words in. */
+	err = submit_copy_gather_data(&bo, drm->dev, ctx, args);
+	if (err)
+		goto unlock;
+
+	job_data = kzalloc(sizeof(*job_data), GFP_KERNEL);
+	if (!job_data) {
+		SUBMIT_ERR(ctx, "failed to allocate memory for job data");
+		err = -ENOMEM;
+		goto put_bo;
+	}
+
+	/* Get data buffer mappings and do relocation patching. */
+	err = submit_process_bufs(ctx, bo, args, job_data);
+	if (err)
+		goto free_job_data;
+
+	/* Allocate host1x_job and add gathers and waits to it. */
+	err = submit_create_job(&job, ctx, bo, args, job_data,
+				&fpriv->syncpoints);
+	if (err)
+		goto free_job_data;
+
+	/* Map gather data for Host1x. */
+	err = host1x_job_pin(job, ctx->client->base.dev);
+	if (err) {
+		SUBMIT_ERR(ctx, "failed to pin job: %d", err);
+		goto put_job;
+	}
+
+	/* Boot engine. */
+	if (pm_runtime_enabled(ctx->client->base.dev)) {
+		err = pm_runtime_resume_and_get(ctx->client->base.dev);
+		if (err < 0) {
+			SUBMIT_ERR(ctx, "could not power up engine: %d", err);
+			goto unpin_job;
+		}
+	}
+
+	job->user_data = job_data;
+	job->release = release_job;
+	job->timeout = 10000;
+
+	/*
+	 * job_data is now part of job reference counting, so don't release
+	 * it from here.
+	 */
+	job_data = NULL;
+
+	/* Submit job to hardware. */
+	err = host1x_job_submit(job);
+	if (err) {
+		SUBMIT_ERR(ctx, "host1x job submission failed: %d", err);
+		goto unpin_job;
+	}
+
+	/* Return postfences to userspace and add fences to DMA reservations. */
+	args->syncpt_incr.fence_value = job->syncpt_end;
+
+	if (syncobj) {
+		struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			SUBMIT_ERR(ctx, "failed to create postfence: %d", err);
+		}
+
+		drm_syncobj_replace_fence(syncobj, fence);
+	}
+
+	goto put_job;
+
+unpin_job:
+	host1x_job_unpin(job);
+put_job:
+	host1x_job_put(job);
+free_job_data:
+	if (job_data && job_data->used_mappings) {
+		for (i = 0; i < job_data->num_used_mappings; i++)
+			tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+		kfree(job_data->used_mappings);
+	}
+	if (job_data)
+		kfree(job_data);
+put_bo:
+	gather_bo_put(&bo->base);
+unlock:
+	if (syncobj)
+		drm_syncobj_put(syncobj);
+	mutex_unlock(&fpriv->lock);
+	return err;
+}
diff --git a/drivers/gpu/drm/tegra/submit.h b/drivers/gpu/drm/tegra/submit.h
new file mode 100644
index 000000000000..0a165e9e4bda
--- /dev/null
+++ b/drivers/gpu/drm/tegra/submit.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _TEGRA_DRM_UAPI_SUBMIT_H
+#define _TEGRA_DRM_UAPI_SUBMIT_H
+
+struct tegra_drm_used_mapping {
+	struct tegra_drm_mapping *mapping;
+	u32 flags;
+};
+
+struct tegra_drm_submit_data {
+	struct tegra_drm_used_mapping *used_mappings;
+	u32 num_used_mappings;
+};
+
+#endif
-- 
2.30.1


  parent reply	other threads:[~2021-06-10 11:05 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-10 11:04 [PATCH v7 00/15] TegraDRM UAPI Mikko Perttunen
2021-06-10 11:04 ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 01/15] gpu: host1x: Add DMA fence implementation Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-15 21:47   ` Dmitry Osipenko
2021-06-15 21:47     ` Dmitry Osipenko
2021-06-10 11:04 ` [PATCH v7 02/15] gpu: host1x: Add no-recovery mode Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-15 22:06   ` Dmitry Osipenko
2021-06-15 22:06     ` Dmitry Osipenko
2021-06-10 11:04 ` [PATCH v7 03/15] gpu: host1x: Add job release callback Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 04/15] gpu: host1x: Add support for syncpoint waits in CDMA pushbuffer Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 05/15] gpu: host1x: Add option to skip firewall for a job Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-22 20:46   ` Michał Mirosław
2021-06-22 20:46     ` Michał Mirosław
2021-06-10 11:04 ` [PATCH v7 06/15] drm/tegra: Extract tegra_gem_lookup Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 07/15] drm/tegra: Add new UAPI to header Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 08/15] drm/tegra: Boot VIC during runtime PM resume Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 09/15] drm/tegra: Allocate per-engine channel in core code Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 10/15] drm/tegra: Implement new UAPI Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 11/15] drm/tegra: Implement syncpoint management UAPI Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 12/15] drm/tegra: Implement syncpoint wait UAPI Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` Mikko Perttunen [this message]
2021-06-10 11:04   ` [PATCH v7 13/15] drm/tegra: Implement job submission part of new UAPI Mikko Perttunen
2021-06-15 19:00   ` Jon Hunter
2021-06-15 19:00     ` Jon Hunter
2021-06-15 19:03     ` Mikko Perttunen
2021-06-15 19:03       ` Mikko Perttunen
2021-06-15 22:19   ` Dmitry Osipenko
2021-06-15 22:19     ` Dmitry Osipenko
2021-06-15 22:24   ` Dmitry Osipenko
2021-06-15 22:24     ` Dmitry Osipenko
2021-06-16  9:31   ` Jon Hunter
2021-06-16  9:31     ` Jon Hunter
2021-06-10 11:04 ` [PATCH v7 14/15] drm/tegra: Add job firewall Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-10 11:04 ` [PATCH v7 15/15] drm/tegra: Bump driver version Mikko Perttunen
2021-06-10 11:04   ` Mikko Perttunen
2021-06-15 20:32 ` [PATCH v7 00/15] TegraDRM UAPI Dmitry Osipenko
2021-06-15 20:32   ` Dmitry Osipenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210610110456.3692391-14-mperttunen@nvidia.com \
    --to=mperttunen@nvidia.com \
    --cc=airlied@linux.ie \
    --cc=daniel@ffwll.ch \
    --cc=digetx@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=jonathanh@nvidia.com \
    --cc=linux-tegra@vger.kernel.org \
    --cc=thierry.reding@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.