* [PATCH 1/2] habanalabs/gaudi: support CS with no completion
2020-12-06 16:28 [PATCH] habanalabs/gaudi: remove duplicated gaudi packets masks Oded Gabbay
@ 2020-12-06 16:28 ` Oded Gabbay
2020-12-06 16:28 ` [PATCH] habanalabs: Init the VM module for kernel context Oded Gabbay
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-12-06 16:28 UTC (permalink / raw)
To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton
From: Ofir Bitton <obitton@habana.ai>
As part of the staged submission feature, we need Gaudi to support
command submissions that will never get a completion.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/command_submission.c | 1 +
drivers/misc/habanalabs/common/habanalabs.h | 2 ++
drivers/misc/habanalabs/gaudi/gaudi.c | 10 +++++++---
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 92c1c516b65f..ac527682cd8c 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -225,6 +225,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.queue_type = job->queue_type;
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
job->patched_cb = NULL;
+ parser.completion = true;
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 270d342f7778..81a16d4f8cba 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1221,6 +1221,7 @@ struct hl_cs_job {
* MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
* have streams so the engine can't be busy by another
* stream.
+ * @completion: true if we need completion for this CS.
*/
struct hl_cs_parser {
struct hl_cb *user_cb;
@@ -1235,6 +1236,7 @@ struct hl_cs_parser {
u8 job_id;
u8 is_kernel_allocated_cb;
u8 contains_dma_pkt;
+ u8 completion;
};
/*
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 658d180d3d4d..076a55ebf357 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5041,7 +5041,8 @@ static int gaudi_validate_cb(struct hl_device *hdev,
* 1. A packet that will act as a completion packet
* 2. A packet that will generate MSI-X interrupt
*/
- parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
+ if (parser->completion)
+ parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
return rc;
}
@@ -5268,8 +5269,11 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
* 1. A packet that will act as a completion packet
* 2. A packet that will generate MSI interrupt
*/
- parser->patched_cb_size = parser->user_cb_size +
- sizeof(struct packet_msg_prot) * 2;
+ if (parser->completion)
+ parser->patched_cb_size = parser->user_cb_size +
+ sizeof(struct packet_msg_prot) * 2;
+ else
+ parser->patched_cb_size = parser->user_cb_size;
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
parser->patched_cb_size, false, false,
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH] habanalabs: Init the VM module for kernel context
2020-12-06 16:28 [PATCH] habanalabs/gaudi: remove duplicated gaudi packets masks Oded Gabbay
2020-12-06 16:28 ` [PATCH 1/2] habanalabs/gaudi: support CS with no completion Oded Gabbay
@ 2020-12-06 16:28 ` Oded Gabbay
2020-12-06 16:28 ` [PATCH] habanalabs: refactor MMU locks code Oded Gabbay
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-12-06 16:28 UTC (permalink / raw)
To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton
From: Ofir Bitton <obitton@habana.ai>
In order for reserving VA ranges for kernel memory, we need
to allow the VM module to be initiated with kernel context.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/context.c | 16 +++++++++++++---
drivers/misc/habanalabs/common/device.c | 15 ++++++++++-----
drivers/misc/habanalabs/common/memory.c | 3 ++-
drivers/misc/habanalabs/gaudi/gaudi.c | 10 ++++------
4 files changed, 29 insertions(+), 15 deletions(-)
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index f65e6559149b..3d86b83f4ca6 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -56,6 +56,8 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
idle_mask);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
+ hdev->asic_funcs->ctx_fini(ctx);
+ hl_vm_ctx_fini(ctx);
hl_mmu_ctx_fini(ctx);
}
}
@@ -151,11 +153,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
- rc = hl_mmu_ctx_init(ctx);
+ rc = hl_vm_ctx_init(ctx);
if (rc) {
- dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+ dev_err(hdev->dev, "Failed to init mem ctx module\n");
+ rc = -ENOMEM;
goto err_free_cs_pending;
}
+
+ rc = hdev->asic_funcs->ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "ctx_init failed\n");
+ goto err_vm_ctx_fini;
+ }
} else {
ctx->asid = hl_asid_alloc(hdev);
if (!ctx->asid) {
@@ -194,7 +203,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
err_vm_ctx_fini:
hl_vm_ctx_fini(ctx);
err_asid_free:
- hl_asid_free(hdev, ctx->asid);
+ if (ctx->asid != HL_KERNEL_ASID_ID)
+ hl_asid_free(hdev, ctx->asid);
err_free_cs_pending:
kfree(ctx->cs_pending);
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 59212706a836..671d1eb41146 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1310,11 +1310,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->compute_ctx = NULL;
+ hl_debugfs_add_device(hdev);
+
+ /* debugfs nodes are created in hl_ctx_init so it must be called after
+ * hl_debugfs_add_device.
+ */
rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) {
dev_err(hdev->dev, "failed to initialize kernel context\n");
kfree(hdev->kernel_ctx);
- goto mmu_fini;
+ goto remove_device_from_debugfs;
}
rc = hl_cb_pool_init(hdev);
@@ -1323,8 +1328,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto release_ctx;
}
- hl_debugfs_add_device(hdev);
-
/*
* From this point, in case of an error, add char devices and create
* sysfs nodes as part of the error flow, to allow debugging.
@@ -1413,6 +1416,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (hl_ctx_put(hdev->kernel_ctx) != 1)
dev_err(hdev->dev,
"kernel ctx is still alive on initialization failure\n");
+remove_device_from_debugfs:
+ hl_debugfs_remove_device(hdev);
mmu_fini:
hl_mmu_fini(hdev);
eq_fini:
@@ -1502,8 +1507,6 @@ void hl_device_fini(struct hl_device *hdev)
device_late_fini(hdev);
- hl_debugfs_remove_device(hdev);
-
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
@@ -1535,6 +1538,8 @@ void hl_device_fini(struct hl_device *hdev)
if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
dev_err(hdev->dev, "kernel ctx is still alive\n");
+ hl_debugfs_remove_device(hdev);
+
hl_vm_fini(hdev);
hl_mmu_fini(hdev);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index d4a94a1c3eef..c16c829bfbf2 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1923,7 +1923,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
* because the user notifies us on allocations. If the user is no more,
* all DRAM is available
*/
- if (!ctx->hdev->asic_prop.dram_supports_virtual_memory)
+ if (ctx->asid != HL_KERNEL_ASID_ID &&
+ !ctx->hdev->asic_prop.dram_supports_virtual_memory)
atomic64_set(&ctx->hdev->dram_used_mem, 0);
}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4776dfed9098..658d180d3d4d 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7849,18 +7849,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
static int gaudi_ctx_init(struct hl_ctx *ctx)
{
+ if (ctx->asid == HL_KERNEL_ASID_ID)
+ return 0;
+
gaudi_mmu_prepare(ctx->hdev, ctx->asid);
return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
}
static void gaudi_ctx_fini(struct hl_ctx *ctx)
{
- struct hl_device *hdev = ctx->hdev;
-
- /* Gaudi will NEVER support more then a single compute context.
- * Therefore, don't clear anything unless it is the compute context
- */
- if (hdev->compute_ctx != ctx)
+ if (ctx->asid == HL_KERNEL_ASID_ID)
return;
gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH] habanalabs: refactor MMU locks code
2020-12-06 16:28 [PATCH] habanalabs/gaudi: remove duplicated gaudi packets masks Oded Gabbay
2020-12-06 16:28 ` [PATCH 1/2] habanalabs/gaudi: support CS with no completion Oded Gabbay
2020-12-06 16:28 ` [PATCH] habanalabs: Init the VM module for kernel context Oded Gabbay
@ 2020-12-06 16:28 ` Oded Gabbay
2020-12-06 16:28 ` [PATCH] habanalabs: update firmware boot interface Oded Gabbay
2020-12-06 16:28 ` [PATCH 2/2] habanalabs: allow user to pass a staged submission seq Oded Gabbay
4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-12-06 16:28 UTC (permalink / raw)
To: linux-kernel; +Cc: SW_Drivers, Ohad Sharabi
From: Ohad Sharabi <osharabi@habana.ai>
remove mmu_cache_lock as it protects a section which is already
protected by mmu_lock.
in addition, wrap mmu cache invalidate calls in hl_vm_ctx_fini with
mmu_lock.
Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/common/device.c | 2 --
drivers/misc/habanalabs/common/habanalabs.h | 2 --
drivers/misc/habanalabs/common/memory.c | 4 ++++
drivers/misc/habanalabs/gaudi/gaudi.c | 8 --------
drivers/misc/habanalabs/goya/goya.c | 8 --------
5 files changed, 4 insertions(+), 20 deletions(-)
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 5871162a8442..59212706a836 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -373,7 +373,6 @@ static int device_early_init(struct hl_device *hdev)
mutex_init(&hdev->send_cpu_message_lock);
mutex_init(&hdev->debug_lock);
- mutex_init(&hdev->mmu_cache_lock);
INIT_LIST_HEAD(&hdev->cs_mirror_list);
spin_lock_init(&hdev->cs_mirror_lock);
INIT_LIST_HEAD(&hdev->fpriv_list);
@@ -414,7 +413,6 @@ static void device_early_fini(struct hl_device *hdev)
{
int i;
- mutex_destroy(&hdev->mmu_cache_lock);
mutex_destroy(&hdev->debug_lock);
mutex_destroy(&hdev->send_cpu_message_lock);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 70b778a0d60e..270d342f7778 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1762,7 +1762,6 @@ struct hl_mmu_funcs {
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
* @vm: virtual memory manager for MMU.
- * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
* @hwmon_dev: H/W monitor device.
* @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information.
@@ -1877,7 +1876,6 @@ struct hl_device {
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
struct hl_vm vm;
- struct mutex mmu_cache_lock;
struct device *hwmon_dev;
enum hl_pm_mng_profile pm_mng_profile;
struct hwmon_chip_info *hl_chip_info;
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index cbe9da4e0211..d4a94a1c3eef 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1889,10 +1889,14 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
unmap_device_va(ctx, hnode->vaddr, true);
}
+ mutex_lock(&ctx->mmu_lock);
+
/* invalidate the cache once after the unmapping loop */
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
+ mutex_unlock(&ctx->mmu_lock);
+
spin_lock(&vm->idr_lock);
idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
if (phys_pg_list->asid == ctx->asid) {
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 65895ba075fe..4776dfed9098 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7311,8 +7311,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
- mutex_lock(&hdev->mmu_cache_lock);
-
/* L0 & L1 invalidation */
WREG32(mmSTLB_INV_PS, 3);
WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
@@ -7328,8 +7326,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
WREG32(mmSTLB_INV_SET, 0);
- mutex_unlock(&hdev->mmu_cache_lock);
-
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
@@ -7352,8 +7348,6 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
hdev->hard_reset_pending)
return 0;
- mutex_lock(&hdev->mmu_cache_lock);
-
if (hdev->pldm)
timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
else
@@ -7381,8 +7375,6 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
1000,
timeout_usec);
- mutex_unlock(&hdev->mmu_cache_lock);
-
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index b8b4aa636b7c..6ae34905d34f 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5072,8 +5072,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
- mutex_lock(&hdev->mmu_cache_lock);
-
/* L0 & L1 invalidation */
WREG32(mmSTLB_INV_ALL_START, 1);
@@ -5085,8 +5083,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
1000,
timeout_usec);
- mutex_unlock(&hdev->mmu_cache_lock);
-
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
@@ -5116,8 +5112,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
else
timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
- mutex_lock(&hdev->mmu_cache_lock);
-
/*
* TODO: currently invalidate entire L0 & L1 as in regular hard
* invalidation. Need to apply invalidation of specific cache lines with
@@ -5140,8 +5134,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
1000,
timeout_usec);
- mutex_unlock(&hdev->mmu_cache_lock);
-
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH] habanalabs: update firmware boot interface
2020-12-06 16:28 [PATCH] habanalabs/gaudi: remove duplicated gaudi packets masks Oded Gabbay
` (2 preceding siblings ...)
2020-12-06 16:28 ` [PATCH] habanalabs: refactor MMU locks code Oded Gabbay
@ 2020-12-06 16:28 ` Oded Gabbay
2020-12-06 16:28 ` [PATCH 2/2] habanalabs: allow user to pass a staged submission seq Oded Gabbay
4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-12-06 16:28 UTC (permalink / raw)
To: linux-kernel; +Cc: SW_Drivers
Update to latest firmware hl_boot_if.h file.
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
drivers/misc/habanalabs/include/common/hl_boot_if.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 755c4800f002..261cb3898980 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -158,6 +158,10 @@
* statuses.
* Initialized in: preboot
*
+ * CPU_BOOT_DEV_STS0_SP_SRAM_EN SP SRAM is initialized and available
+ * for use.
+ * Initialized in: preboot
+ *
*/
#define CPU_BOOT_DEV_STS0_SECURITY_EN (1 << 0)
#define CPU_BOOT_DEV_STS0_DEBUG_EN (1 << 1)
@@ -171,6 +175,7 @@
#define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9)
#define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10)
#define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11)
+#define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12)
#define CPU_BOOT_DEV_STS0_ENABLED (1 << 31)
enum cpu_boot_status {
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] habanalabs: allow user to pass a staged submission seq
2020-12-06 16:28 [PATCH] habanalabs/gaudi: remove duplicated gaudi packets masks Oded Gabbay
` (3 preceding siblings ...)
2020-12-06 16:28 ` [PATCH] habanalabs: update firmware boot interface Oded Gabbay
@ 2020-12-06 16:28 ` Oded Gabbay
4 siblings, 0 replies; 6+ messages in thread
From: Oded Gabbay @ 2020-12-06 16:28 UTC (permalink / raw)
To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton
From: Ofir Bitton <obitton@habana.ai>
In order to support the staged submission feature, user must be
allowed to use the same CS sequence for all submissions in the
same staged submission.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
.../habanalabs/common/command_submission.c | 16 +++++++----
include/uapi/misc/habanalabs.h | 28 +++++++++++++------
2 files changed, 30 insertions(+), 14 deletions(-)
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index ac527682cd8c..a22c2751eaa1 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -792,7 +792,7 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
}
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
- u32 num_chunks, u64 *cs_seq, bool timestamp)
+ u32 num_chunks, u64 *cs_seq, u32 flags)
{
bool int_queues_only = true;
struct hl_device *hdev = hpriv->hdev;
@@ -819,7 +819,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
goto free_cs_chunk_array;
}
- cs->timestamp = !!timestamp;
+ cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
*cs_seq = cs->sequence;
hl_debugfs_add_cs(cs);
@@ -988,7 +988,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
rc = 0;
} else {
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
- cs_seq, false);
+ cs_seq, 0);
}
mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1308,7 +1308,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
enum hl_cs_type cs_type;
u64 cs_seq = ULONG_MAX;
void __user *chunks;
- u32 num_chunks;
+ u32 num_chunks, flags;
int rc;
rc = hl_cs_sanity_checks(hpriv, args);
@@ -1323,6 +1323,12 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
~HL_CS_FLAGS_FORCE_RESTORE);
chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
num_chunks = args->in.num_chunks_execute;
+ flags = args->in.cs_flags;
+
+ /* In case this is a staged CS, user should supply the CS sequence */
+ if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+ !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
+ cs_seq = args->in.seq;
switch (cs_type) {
case CS_TYPE_SIGNAL:
@@ -1333,7 +1339,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
break;
default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
- args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+ args->in.cs_flags);
break;
}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index dc8bcec195cc..8a5e60a6dae0 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -600,11 +600,14 @@ struct hl_cs_chunk {
};
/* SIGNAL and WAIT/COLLECTIVE_WAIT flags are mutually exclusive */
-#define HL_CS_FLAGS_FORCE_RESTORE 0x1
-#define HL_CS_FLAGS_SIGNAL 0x2
-#define HL_CS_FLAGS_WAIT 0x4
-#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8
-#define HL_CS_FLAGS_TIMESTAMP 0x20
+#define HL_CS_FLAGS_FORCE_RESTORE 0x1
+#define HL_CS_FLAGS_SIGNAL 0x2
+#define HL_CS_FLAGS_WAIT 0x4
+#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8
+#define HL_CS_FLAGS_TIMESTAMP 0x20
+#define HL_CS_FLAGS_STAGED_SUBMISSION 0x40
+#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80
+#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100
#define HL_CS_STATUS_SUCCESS 0
@@ -618,10 +621,17 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
- /* this holds address of array of hl_cs_chunk for store phase -
- * Currently not in use
- */
- __u64 chunks_store;
+ union {
+ /* this holds address of array of hl_cs_chunk for store phase -
+ * Currently not in use
+ */
+ __u64 chunks_store;
+
+ /* Sequence number of a staged submission CS
+ * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
+ */
+ __u64 seq;
+ };
/* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
--
2.17.1
^ permalink raw reply related [flat|nested] 6+ messages in thread