LKML Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions
@ 2020-07-30 13:13 Oded Gabbay
  2020-07-30 13:13 ` [PATCH 2/2] habanalabs: add information about PCIe controller Oded Gabbay
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Oded Gabbay @ 2020-07-30 13:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: Ofir Bitton, Greg Kroah-Hartman, Daniel Vetter

From: Ofir Bitton <obitton@habana.ai>

habanalabs driver uses dma-fence mechanism for synchronization.
dma-fence mechanism was designed solely for GPUs, hence we purpose
a simpler mechanism based on completions to replace current
dma-fence objects.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 .../habanalabs/common/command_submission.c    | 93 +++++++++----------
 drivers/misc/habanalabs/common/context.c      | 13 +--
 drivers/misc/habanalabs/common/habanalabs.h   | 32 +++++--
 drivers/misc/habanalabs/common/hw_queue.c     |  2 +-
 4 files changed, 78 insertions(+), 62 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index b9840e368eb5..d19ac641b171 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref)
 			hw_sob->q_idx, hw_sob->sob_id);
 }
 
-static const char *hl_fence_get_driver_name(struct dma_fence *fence)
-{
-	return "HabanaLabs";
-}
-
-static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
-{
-	struct hl_cs_compl *hl_cs_compl =
-		container_of(fence, struct hl_cs_compl, base_fence);
-
-	return dev_name(hl_cs_compl->hdev->dev);
-}
-
-static bool hl_fence_enable_signaling(struct dma_fence *fence)
-{
-	return true;
-}
-
-static void hl_fence_release(struct dma_fence *fence)
+void hl_fence_release(struct kref *kref)
 {
+	struct hl_fence *fence =
+		container_of(kref, struct hl_fence, refcount);
 	struct hl_cs_compl *hl_cs_cmpl =
 		container_of(fence, struct hl_cs_compl, base_fence);
 	struct hl_device *hdev = hl_cs_cmpl->hdev;
@@ -102,12 +86,24 @@ static void hl_fence_release(struct dma_fence *fence)
 	kfree_rcu(hl_cs_cmpl, base_fence.rcu);
 }
 
-static const struct dma_fence_ops hl_fence_ops = {
-	.get_driver_name = hl_fence_get_driver_name,
-	.get_timeline_name = hl_fence_get_timeline_name,
-	.enable_signaling = hl_fence_enable_signaling,
-	.release = hl_fence_release
-};
+void hl_fence_put(struct hl_fence *fence)
+{
+	if (fence)
+		kref_put(&fence->refcount, hl_fence_release);
+}
+
+void hl_fence_get(struct hl_fence *fence)
+{
+	if (fence)
+		kref_get(&fence->refcount);
+}
+
+void hl_fence_init(struct hl_fence *fence)
+{
+	kref_init(&fence->refcount);
+	fence->error = 0;
+	init_completion(&fence->completion);
+}
 
 static void cs_get(struct hl_cs *cs)
 {
@@ -336,7 +332,7 @@ static void cs_do_release(struct kref *ref)
 		 * In case the wait for signal CS was submitted, the put occurs
 		 * in init_signal_wait_cs() right before hanging on the PQ.
 		 */
-		dma_fence_put(cs->signal_fence);
+		hl_fence_put(cs->signal_fence);
 	}
 
 	/*
@@ -348,19 +344,18 @@ static void cs_do_release(struct kref *ref)
 	hl_ctx_put(cs->ctx);
 
 	/* We need to mark an error for not submitted because in that case
-	 * the dma fence release flow is different. Mainly, we don't need
+	 * the hl fence release flow is different. Mainly, we don't need
 	 * to handle hw_sob for signal/wait
 	 */
 	if (cs->timedout)
-		dma_fence_set_error(cs->fence, -ETIMEDOUT);
+		cs->fence->error = -ETIMEDOUT;
 	else if (cs->aborted)
-		dma_fence_set_error(cs->fence, -EIO);
+		cs->fence->error = -EIO;
 	else if (!cs->submitted)
-		dma_fence_set_error(cs->fence, -EBUSY);
-
-	dma_fence_signal(cs->fence);
-	dma_fence_put(cs->fence);
+		cs->fence->error = -EBUSY;
 
+	complete_all(&cs->fence->completion);
+	hl_fence_put(cs->fence);
 	cs_counters_aggregate(hdev, cs->ctx);
 
 	kfree(cs->jobs_in_queue_cnt);
@@ -401,7 +396,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 			enum hl_cs_type cs_type, struct hl_cs **cs_new)
 {
 	struct hl_cs_compl *cs_cmpl;
-	struct dma_fence *other = NULL;
+	struct hl_fence *other = NULL;
 	struct hl_cs *cs;
 	int rc;
 
@@ -434,7 +429,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cs_cmpl->cs_seq = ctx->cs_sequence;
 	other = ctx->cs_pending[cs_cmpl->cs_seq &
 				(hdev->asic_prop.max_pending_cs - 1)];
-	if ((other) && (!dma_fence_is_signaled(other))) {
+
+	if (other && !completion_done(&other->completion)) {
 		dev_dbg(hdev->dev,
 			"Rejecting CS because of too many in-flights CS\n");
 		rc = -EAGAIN;
@@ -448,8 +444,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 		goto free_fence;
 	}
 
-	dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
-			ctx->asid, ctx->cs_sequence);
+	/* init hl_fence */
+	hl_fence_init(&cs_cmpl->base_fence);
 
 	cs->sequence = cs_cmpl->cs_seq;
 
@@ -458,9 +454,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 							&cs_cmpl->base_fence;
 	ctx->cs_sequence++;
 
-	dma_fence_get(&cs_cmpl->base_fence);
+	hl_fence_get(&cs_cmpl->base_fence);
 
-	dma_fence_put(other);
+	hl_fence_put(other);
 
 	spin_unlock(&ctx->cs_lock);
 
@@ -773,7 +769,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	struct hl_ctx *ctx = hpriv->ctx;
 	struct hl_cs_chunk *cs_chunk_array, *chunk;
 	struct hw_queue_properties *hw_queue_prop;
-	struct dma_fence *sig_fence = NULL;
+	struct hl_fence *sig_fence = NULL;
 	struct hl_cs_job *job;
 	struct hl_cs *cs;
 	struct hl_cb *cb;
@@ -875,14 +871,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 			dev_err(hdev->dev,
 				"CS seq 0x%llx is not of a signal CS\n",
 				signal_seq);
-			dma_fence_put(sig_fence);
+			hl_fence_put(sig_fence);
 			rc = -EINVAL;
 			goto free_signal_seq_array;
 		}
 
-		if (dma_fence_is_signaled(sig_fence)) {
+		if (completion_done(&sig_fence->completion)) {
 			/* signal CS already finished */
-			dma_fence_put(sig_fence);
+			hl_fence_put(sig_fence);
 			rc = 0;
 			goto free_signal_seq_array;
 		}
@@ -894,7 +890,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	rc = allocate_cs(hdev, ctx, cs_type, &cs);
 	if (rc) {
 		if (cs_type == CS_TYPE_WAIT)
-			dma_fence_put(sig_fence);
+			hl_fence_put(sig_fence);
 		hl_ctx_put(ctx);
 		goto free_signal_seq_array;
 	}
@@ -1154,7 +1150,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 		struct hl_ctx *ctx, u64 timeout_us, u64 seq)
 {
-	struct dma_fence *fence;
+	struct hl_fence *fence;
 	unsigned long timeout;
 	long rc;
 
@@ -1173,12 +1169,15 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 				"Can't wait on CS %llu because current CS is at seq %llu\n",
 				seq, ctx->cs_sequence);
 	} else if (fence) {
-		rc = dma_fence_wait_timeout(fence, true, timeout);
+		rc = wait_for_completion_interruptible_timeout(
+				&fence->completion, timeout);
+
 		if (fence->error == -ETIMEDOUT)
 			rc = -ETIMEDOUT;
 		else if (fence->error == -EIO)
 			rc = -EIO;
-		dma_fence_put(fence);
+
+		hl_fence_put(fence);
 	} else {
 		dev_dbg(hdev->dev,
 			"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 3e375958e73b..b168a9fce817 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	 */
 
 	for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
-		dma_fence_put(ctx->cs_pending[i]);
+		hl_fence_put(ctx->cs_pending[i]);
 
 	kfree(ctx->cs_pending);
 
@@ -128,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	atomic_set(&ctx->thread_ctx_switch_token, 1);
 	ctx->thread_ctx_switch_wait_token = 0;
 	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
-				sizeof(struct dma_fence *),
+				sizeof(struct hl_fence *),
 				GFP_KERNEL);
 	if (!ctx->cs_pending)
 		return -ENOMEM;
@@ -184,10 +184,10 @@ int hl_ctx_put(struct hl_ctx *ctx)
 	return kref_put(&ctx->refcount, hl_ctx_do_release);
 }
 
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 {
 	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
-	struct dma_fence *fence;
+	struct hl_fence *fence;
 
 	spin_lock(&ctx->cs_lock);
 
@@ -201,8 +201,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 		return NULL;
 	}
 
-	fence = dma_fence_get(
-			ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
+	fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
+	hl_fence_get(fence);
+
 	spin_unlock(&ctx->cs_lock);
 
 	return fence;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 018d9d67e8e6..30a399337675 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -15,7 +15,6 @@
 #include <linux/cdev.h>
 #include <linux/iopoll.h>
 #include <linux/irqreturn.h>
-#include <linux/dma-fence.h>
 #include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 #include <linux/hashtable.h>
@@ -342,9 +341,24 @@ struct asic_fixed_properties {
 	u8				completion_queues_count;
 };
 
+/**
+ * struct hl_fence - software synchronization primitive
+ * @completion: fence is implemented using completion
+ * @refcount: refcount for this fence
+ * @rcu: used for releasing fence with kfree_rcu
+ * @error: mark this fence with error
+ *
+ */
+struct hl_fence {
+	struct completion	completion;
+	struct kref		refcount;
+	struct rcu_head		rcu;
+	int			error;
+};
+
 /**
  * struct hl_cs_compl - command submission completion object.
- * @base_fence: kernel fence object.
+ * @base_fence: hl fence object.
  * @lock: spinlock to protect fence.
  * @hdev: habanalabs device structure.
  * @hw_sob: the H/W SOB used in this signal/wait CS.
@@ -353,7 +367,7 @@ struct asic_fixed_properties {
  * @sob_val: the SOB value that is used in this signal/wait CS.
  */
 struct hl_cs_compl {
-	struct dma_fence	base_fence;
+	struct hl_fence		base_fence;
 	spinlock_t		lock;
 	struct hl_device	*hdev;
 	struct hl_hw_sob	*hw_sob;
@@ -800,7 +814,7 @@ struct hl_va_range {
  * @hdev: pointer to the device structure.
  * @refcount: reference counter for the context. Context is released only when
  *		this hits 0l. It is incremented on CS and CS_WAIT.
- * @cs_pending: array of DMA fence objects representing pending CS.
+ * @cs_pending: array of hl fence objects representing pending CS.
  * @host_va_range: holds available virtual addresses for host mappings.
  * @host_huge_va_range: holds available virtual addresses for host mappings
  *                      with huge pages.
@@ -832,7 +846,7 @@ struct hl_ctx {
 	struct hl_fpriv		*hpriv;
 	struct hl_device	*hdev;
 	struct kref		refcount;
-	struct dma_fence	**cs_pending;
+	struct hl_fence		**cs_pending;
 	struct hl_va_range	*host_va_range;
 	struct hl_va_range	*host_huge_va_range;
 	struct hl_va_range	*dram_va_range;
@@ -919,8 +933,8 @@ struct hl_cs {
 	struct list_head	job_list;
 	spinlock_t		job_lock;
 	struct kref		refcount;
-	struct dma_fence	*fence;
-	struct dma_fence	*signal_fence;
+	struct hl_fence		*fence;
+	struct hl_fence		*signal_fence;
 	struct work_struct	finish_work;
 	struct delayed_work	work_tdr;
 	struct list_head	mirror_node;
@@ -1736,7 +1750,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
 void hl_ctx_do_release(struct kref *ref);
 void hl_ctx_get(struct hl_device *hdev,	struct hl_ctx *ctx);
 int hl_ctx_put(struct hl_ctx *ctx);
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
 void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
 void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
 
@@ -1778,6 +1792,8 @@ void hl_cs_rollback_all(struct hl_device *hdev);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 		enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);
+void hl_fence_put(struct hl_fence *fence);
+void hl_fence_get(struct hl_fence *fence);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 void gaudi_set_asic_funcs(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 287681646071..65b9aa69a83e 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
 		 * wait CS was submitted.
 		 */
 		mb();
-		dma_fence_put(cs->signal_fence);
+		hl_fence_put(cs->signal_fence);
 		cs->signal_fence = NULL;
 	}
 }
-- 
2.17.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/2] habanalabs: add information about PCIe controller
  2020-07-30 13:13 [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
@ 2020-07-30 13:13 ` Oded Gabbay
  2020-07-31  0:17   ` kernel test robot
                     ` (2 more replies)
  2020-07-30 19:33 ` [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions kernel test robot
                   ` (3 subsequent siblings)
  4 siblings, 3 replies; 9+ messages in thread
From: Oded Gabbay @ 2020-07-30 13:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Update firmware header with new API for getting pcie info
such as tx/rx throughput and replay counter.
These counters are needed by customers for monitor and maintenance
of multiple devices.
Add new opcodes to the INFO ioctl to retrieve these counters.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/common/firmware_if.c  | 48 +++++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h   |  4 ++
 .../misc/habanalabs/common/habanalabs_ioctl.c | 41 ++++++++++++++++
 drivers/misc/habanalabs/gaudi/gaudi.c         |  4 ++
 drivers/misc/habanalabs/goya/goya.c           |  4 ++
 .../misc/habanalabs/include/common/armcp_if.h | 10 ++++
 include/uapi/misc/habanalabs.h                | 27 +++++++++++
 7 files changed, 138 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index f70302cdab1b..2a0f8e0e1131 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -354,6 +354,54 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 	return rc;
 }
 
+int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+		struct hl_info_pci_counters *counters)
+{
+	struct armcp_packet pkt = {};
+	long result;
+	int rc;
+
+	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET <<
+			ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+	/* Fetch PCI rx counter */
+	pkt.index = cpu_to_le64(armcp_pcie_throughput_rx);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+		return rc;
+	}
+	counters->rx_throughput = result;
+
+	/* Fetch PCI tx counter */
+	pkt.index = cpu_to_le64(armcp_pcie_throughput_tx);
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+					HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+		return rc;
+	}
+	counters->tx_throughput = result;
+
+	/* Fetch PCI replay counter */
+	pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET <<
+			ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+			HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
+		return rc;
+	}
+	counters->replay_cnt = (u32) result;
+
+	return rc;
+}
+
 static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
 {
 	u32 err_val;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 30a399337675..36b48e8b9b7b 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1483,6 +1483,7 @@ struct hl_device_idle_busy_ts {
  * @soft_reset_cnt: number of soft reset since the driver was loaded.
  * @hard_reset_cnt: number of hard reset since the driver was loaded.
  * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
+ * @clk_throttling_reason: bitmask represents the current clk throttling reasons
  * @id: device minor.
  * @id_control: minor of the control device
  * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
@@ -1586,6 +1587,7 @@ struct hl_device {
 	u32				soft_reset_cnt;
 	u32				hard_reset_cnt;
 	u32				idle_busy_ts_idx;
+	u32				clk_throttling_reason;
 	u16				id;
 	u16				id_control;
 	u16				cpu_pci_msb_addr;
@@ -1840,6 +1842,8 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 int hl_fw_send_heartbeat(struct hl_device *hdev);
 int hl_fw_armcp_info_get(struct hl_device *hdev);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
+		struct hl_info_pci_counters *counters);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 boot_err0_reg, bool skip_bmc,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 5af1c03da473..4d838b1a3bbe 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -276,6 +276,41 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
 		min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
 }
 
+static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_info_pci_counters pci_counters = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hl_fw_armcp_pci_counters_get(hdev, &pci_counters);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &pci_counters,
+		min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0;
+}
+
+static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_info_clk_throttle clk_throttle = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason;
+
+	return copy_to_user(out, &clk_throttle,
+		min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0;
+}
+
 static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
@@ -360,6 +395,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_CS_COUNTERS:
 		return cs_counters_info(hpriv, args);
 
+	case HL_INFO_PCI_COUNTERS:
+		return pci_counters_info(hpriv, args);
+
+	case HL_INFO_CLK_THROTTLE_REASON:
+		return clk_throttle_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 00a0a7238d81..41d55a5f7f83 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5620,21 +5620,25 @@ static void gaudi_print_clk_change_info(struct hl_device *hdev,
 {
 	switch (event_type) {
 	case GAUDI_EVENT_FIX_POWER_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to power consumption\n");
 		break;
 
 	case GAUDI_EVENT_FIX_POWER_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Power envelop is safe, back to optimal clock\n");
 		break;
 
 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to overheating\n");
 		break;
 
 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Thermal envelop is safe, back to optimal clock\n");
 		break;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 85030759b2af..c497ae25c331 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4549,18 +4549,22 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
 {
 	switch (event_type) {
 	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to power consumption\n");
 		break;
 	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
 		dev_info_ratelimited(hdev->dev,
 			"Power envelop is safe, back to optimal clock\n");
 		break;
 	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
+		hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Clock throttling due to overheating\n");
 		break;
 	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
+		hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
 		dev_info_ratelimited(hdev->dev,
 			"Thermal envelop is safe, back to optimal clock\n");
 		break;
diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h
index 07f9972db28d..1403c937253c 100644
--- a/drivers/misc/habanalabs/include/common/armcp_if.h
+++ b/drivers/misc/habanalabs/include/common/armcp_if.h
@@ -243,6 +243,8 @@ enum armcp_packet_id {
 	ARMCP_PACKET_TEMPERATURE_SET,		/* sysfs */
 	ARMCP_PACKET_VOLTAGE_SET,		/* sysfs */
 	ARMCP_PACKET_CURRENT_SET,		/* sysfs */
+	ARMCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
+	ARMCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
 };
 
 #define ARMCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -277,6 +279,9 @@ struct armcp_packet {
 			__u8 pad; /* unused */
 		};
 
+		/* For any general request */
+		__le32 index;
+
 		/* For frequency get/set */
 		__le32 pll_index;
 
@@ -344,6 +349,11 @@ enum armcp_pwm_attributes {
 	armcp_pwm_enable
 };
 
+enum armcp_pcie_throughput_attributes {
+	armcp_pcie_throughput_tx,
+	armcp_pcie_throughput_rx
+};
+
 /* Event Queue Packets */
 
 struct eq_generic_event {
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d5c4f983b7a8..ee13b919db35 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -264,6 +264,8 @@ enum hl_device_status {
  * HL_INFO_TIME_SYNC     - Retrieve the device's time alongside the host's time
  *                         for synchronization.
  * HL_INFO_CS_COUNTERS   - Retrieve command submission counters
+ * HL_INFO_PCI_COUNTERS  - Retrieve PCI counters
+ * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -276,6 +278,8 @@ enum hl_device_status {
 #define HL_INFO_RESET_COUNT		9
 #define HL_INFO_TIME_SYNC		10
 #define HL_INFO_CS_COUNTERS		11
+#define HL_INFO_PCI_COUNTERS		12
+#define HL_INFO_CLK_THROTTLE_REASON	13
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -340,6 +344,29 @@ struct hl_info_time_sync {
 	__u64 host_time;
 };
 
+/**
+ * struct hl_info_pci_counters - pci counters
+ * @rx_throughput: PCI rx throughput KBps
+ * @tx_throughput: PCI tx throughput KBps
+ * @replay_cnt: PCI replay counter
+ */
+struct hl_info_pci_counters {
+	__u64 rx_throughput;
+	__u64 tx_throughput;
+	__u64 replay_cnt;
+};
+
+#define HL_CLK_THROTTLE_POWER	0x1
+#define HL_CLK_THROTTLE_THERMAL	0x2
+
+/**
+ * struct hl_info_clk_throttle - clock throttling reason
+ * @clk_throttling_reason: each bit represents a clk throttling reason
+ */
+struct hl_info_clk_throttle {
+	__u32 clk_throttling_reason;
+};
+
 /**
  * struct hl_info_cs_counters - command submission counters
  * @out_of_mem_drop_cnt: dropped due to memory allocation issue
-- 
2.17.1


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions
  2020-07-30 13:13 [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
  2020-07-30 13:13 ` [PATCH 2/2] habanalabs: add information about PCIe controller Oded Gabbay
@ 2020-07-30 19:33 ` kernel test robot
  2020-07-30 20:43 ` Daniel Vetter
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2020-07-30 19:33 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers
  Cc: kbuild-all, Ofir Bitton, Greg Kroah-Hartman, Daniel Vetter


[-- Attachment #1: Type: text/plain, Size: 4285 bytes --]

Hi Oded,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on next-20200730]
[cannot apply to linux/master linus/master v5.8-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Oded-Gabbay/habanalabs-Replace-dma-fence-mechanism-with-completions/20200730-211536
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 22362aa30bad6f03b5bcbbeee3cdc61950d40086
config: riscv-allyesconfig (attached as .config)
compiler: riscv64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/misc/habanalabs/common/command_submission.c:41:6: warning: no previous prototype for 'hl_fence_release' [-Wmissing-prototypes]
      41 | void hl_fence_release(struct kref *kref)
         |      ^~~~~~~~~~~~~~~~
>> drivers/misc/habanalabs/common/command_submission.c:101:6: warning: no previous prototype for 'hl_fence_init' [-Wmissing-prototypes]
     101 | void hl_fence_init(struct hl_fence *fence)
         |      ^~~~~~~~~~~~~

vim +/hl_fence_release +41 drivers/misc/habanalabs/common/command_submission.c

    40	
  > 41	void hl_fence_release(struct kref *kref)
    42	{
    43		struct hl_fence *fence =
    44			container_of(kref, struct hl_fence, refcount);
    45		struct hl_cs_compl *hl_cs_cmpl =
    46			container_of(fence, struct hl_cs_compl, base_fence);
    47		struct hl_device *hdev = hl_cs_cmpl->hdev;
    48	
    49		/* EBUSY means the CS was never submitted and hence we don't have
    50		 * an attached hw_sob object that we should handle here
    51		 */
    52		if (fence->error == -EBUSY)
    53			goto free;
    54	
    55		if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
    56				(hl_cs_cmpl->type == CS_TYPE_WAIT)) {
    57	
    58			dev_dbg(hdev->dev,
    59				"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
    60				hl_cs_cmpl->cs_seq,
    61				hl_cs_cmpl->type,
    62				hl_cs_cmpl->hw_sob->sob_id,
    63				hl_cs_cmpl->sob_val);
    64	
    65			/*
    66			 * A signal CS can get completion while the corresponding wait
    67			 * for signal CS is on its way to the PQ. The wait for signal CS
    68			 * will get stuck if the signal CS incremented the SOB to its
    69			 * max value and there are no pending (submitted) waits on this
    70			 * SOB.
    71			 * We do the following to void this situation:
    72			 * 1. The wait for signal CS must get a ref for the signal CS as
    73			 *    soon as possible in cs_ioctl_signal_wait() and put it
    74			 *    before being submitted to the PQ but after it incremented
    75			 *    the SOB refcnt in init_signal_wait_cs().
    76			 * 2. Signal/Wait for signal CS will decrement the SOB refcnt
    77			 *    here.
    78			 * These two measures guarantee that the wait for signal CS will
    79			 * reset the SOB upon completion rather than the signal CS and
    80			 * hence the above scenario is avoided.
    81			 */
    82			kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
    83		}
    84	
    85	free:
    86		kfree_rcu(hl_cs_cmpl, base_fence.rcu);
    87	}
    88	
    89	void hl_fence_put(struct hl_fence *fence)
    90	{
    91		if (fence)
    92			kref_put(&fence->refcount, hl_fence_release);
    93	}
    94	
    95	void hl_fence_get(struct hl_fence *fence)
    96	{
    97		if (fence)
    98			kref_get(&fence->refcount);
    99	}
   100	
 > 101	void hl_fence_init(struct hl_fence *fence)
   102	{
   103		kref_init(&fence->refcount);
   104		fence->error = 0;
   105		init_completion(&fence->completion);
   106	}
   107	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 65216 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions
  2020-07-30 13:13 [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
  2020-07-30 13:13 ` [PATCH 2/2] habanalabs: add information about PCIe controller Oded Gabbay
  2020-07-30 19:33 ` [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions kernel test robot
@ 2020-07-30 20:43 ` Daniel Vetter
  2020-08-01 17:50 ` kernel test robot
  2020-08-01 17:50 ` [RFC PATCH] habanalabs: hl_fence_release() can be static kernel test robot
  4 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2020-07-30 20:43 UTC (permalink / raw)
  To: Oded Gabbay
  Cc: Linux Kernel Mailing List, SW_Drivers, Ofir Bitton, Greg Kroah-Hartman

On Thu, Jul 30, 2020 at 3:13 PM Oded Gabbay <oded.gabbay@gmail.com> wrote:
>
> From: Ofir Bitton <obitton@habana.ai>
>
> habanalabs driver uses dma-fence mechanism for synchronization.
> dma-fence mechanism was designed solely for GPUs, hence we purpose
> a simpler mechanism based on completions to replace current
> dma-fence objects.
>
> Signed-off-by: Ofir Bitton <obitton@habana.ai>
> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

Are you sure you need kfree_rcu for hl_fence? That was a requirement
from dma_fence, because we do _tons_ of rcu protected lockups of those
in gpu drivers (e.g. struct dma_resv). But a quick git grep says
habanalabs driver doesn't use any rcu, so a simple kfree() should be
fine, and allows you to further simplify hl_fence. With that change
this is:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Cheers, Daniel

> ---
>  .../habanalabs/common/command_submission.c    | 93 +++++++++----------
>  drivers/misc/habanalabs/common/context.c      | 13 +--
>  drivers/misc/habanalabs/common/habanalabs.h   | 32 +++++--
>  drivers/misc/habanalabs/common/hw_queue.c     |  2 +-
>  4 files changed, 78 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
> index b9840e368eb5..d19ac641b171 100644
> --- a/drivers/misc/habanalabs/common/command_submission.c
> +++ b/drivers/misc/habanalabs/common/command_submission.c
> @@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref)
>                         hw_sob->q_idx, hw_sob->sob_id);
>  }
>
> -static const char *hl_fence_get_driver_name(struct dma_fence *fence)
> -{
> -       return "HabanaLabs";
> -}
> -
> -static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
> -{
> -       struct hl_cs_compl *hl_cs_compl =
> -               container_of(fence, struct hl_cs_compl, base_fence);
> -
> -       return dev_name(hl_cs_compl->hdev->dev);
> -}
> -
> -static bool hl_fence_enable_signaling(struct dma_fence *fence)
> -{
> -       return true;
> -}
> -
> -static void hl_fence_release(struct dma_fence *fence)
> +void hl_fence_release(struct kref *kref)
>  {
> +       struct hl_fence *fence =
> +               container_of(kref, struct hl_fence, refcount);
>         struct hl_cs_compl *hl_cs_cmpl =
>                 container_of(fence, struct hl_cs_compl, base_fence);
>         struct hl_device *hdev = hl_cs_cmpl->hdev;
> @@ -102,12 +86,24 @@ static void hl_fence_release(struct dma_fence *fence)
>         kfree_rcu(hl_cs_cmpl, base_fence.rcu);
>  }
>
> -static const struct dma_fence_ops hl_fence_ops = {
> -       .get_driver_name = hl_fence_get_driver_name,
> -       .get_timeline_name = hl_fence_get_timeline_name,
> -       .enable_signaling = hl_fence_enable_signaling,
> -       .release = hl_fence_release
> -};
> +void hl_fence_put(struct hl_fence *fence)
> +{
> +       if (fence)
> +               kref_put(&fence->refcount, hl_fence_release);
> +}
> +
> +void hl_fence_get(struct hl_fence *fence)
> +{
> +       if (fence)
> +               kref_get(&fence->refcount);
> +}
> +
> +void hl_fence_init(struct hl_fence *fence)
> +{
> +       kref_init(&fence->refcount);
> +       fence->error = 0;
> +       init_completion(&fence->completion);
> +}
>
>  static void cs_get(struct hl_cs *cs)
>  {
> @@ -336,7 +332,7 @@ static void cs_do_release(struct kref *ref)
>                  * In case the wait for signal CS was submitted, the put occurs
>                  * in init_signal_wait_cs() right before hanging on the PQ.
>                  */
> -               dma_fence_put(cs->signal_fence);
> +               hl_fence_put(cs->signal_fence);
>         }
>
>         /*
> @@ -348,19 +344,18 @@ static void cs_do_release(struct kref *ref)
>         hl_ctx_put(cs->ctx);
>
>         /* We need to mark an error for not submitted because in that case
> -        * the dma fence release flow is different. Mainly, we don't need
> +        * the hl fence release flow is different. Mainly, we don't need
>          * to handle hw_sob for signal/wait
>          */
>         if (cs->timedout)
> -               dma_fence_set_error(cs->fence, -ETIMEDOUT);
> +               cs->fence->error = -ETIMEDOUT;
>         else if (cs->aborted)
> -               dma_fence_set_error(cs->fence, -EIO);
> +               cs->fence->error = -EIO;
>         else if (!cs->submitted)
> -               dma_fence_set_error(cs->fence, -EBUSY);
> -
> -       dma_fence_signal(cs->fence);
> -       dma_fence_put(cs->fence);
> +               cs->fence->error = -EBUSY;
>
> +       complete_all(&cs->fence->completion);
> +       hl_fence_put(cs->fence);
>         cs_counters_aggregate(hdev, cs->ctx);
>
>         kfree(cs->jobs_in_queue_cnt);
> @@ -401,7 +396,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
>                         enum hl_cs_type cs_type, struct hl_cs **cs_new)
>  {
>         struct hl_cs_compl *cs_cmpl;
> -       struct dma_fence *other = NULL;
> +       struct hl_fence *other = NULL;
>         struct hl_cs *cs;
>         int rc;
>
> @@ -434,7 +429,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
>         cs_cmpl->cs_seq = ctx->cs_sequence;
>         other = ctx->cs_pending[cs_cmpl->cs_seq &
>                                 (hdev->asic_prop.max_pending_cs - 1)];
> -       if ((other) && (!dma_fence_is_signaled(other))) {
> +
> +       if (other && !completion_done(&other->completion)) {
>                 dev_dbg(hdev->dev,
>                         "Rejecting CS because of too many in-flights CS\n");
>                 rc = -EAGAIN;
> @@ -448,8 +444,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
>                 goto free_fence;
>         }
>
> -       dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
> -                       ctx->asid, ctx->cs_sequence);
> +       /* init hl_fence */
> +       hl_fence_init(&cs_cmpl->base_fence);
>
>         cs->sequence = cs_cmpl->cs_seq;
>
> @@ -458,9 +454,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
>                                                         &cs_cmpl->base_fence;
>         ctx->cs_sequence++;
>
> -       dma_fence_get(&cs_cmpl->base_fence);
> +       hl_fence_get(&cs_cmpl->base_fence);
>
> -       dma_fence_put(other);
> +       hl_fence_put(other);
>
>         spin_unlock(&ctx->cs_lock);
>
> @@ -773,7 +769,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
>         struct hl_ctx *ctx = hpriv->ctx;
>         struct hl_cs_chunk *cs_chunk_array, *chunk;
>         struct hw_queue_properties *hw_queue_prop;
> -       struct dma_fence *sig_fence = NULL;
> +       struct hl_fence *sig_fence = NULL;
>         struct hl_cs_job *job;
>         struct hl_cs *cs;
>         struct hl_cb *cb;
> @@ -875,14 +871,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
>                         dev_err(hdev->dev,
>                                 "CS seq 0x%llx is not of a signal CS\n",
>                                 signal_seq);
> -                       dma_fence_put(sig_fence);
> +                       hl_fence_put(sig_fence);
>                         rc = -EINVAL;
>                         goto free_signal_seq_array;
>                 }
>
> -               if (dma_fence_is_signaled(sig_fence)) {
> +               if (completion_done(&sig_fence->completion)) {
>                         /* signal CS already finished */
> -                       dma_fence_put(sig_fence);
> +                       hl_fence_put(sig_fence);
>                         rc = 0;
>                         goto free_signal_seq_array;
>                 }
> @@ -894,7 +890,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
>         rc = allocate_cs(hdev, ctx, cs_type, &cs);
>         if (rc) {
>                 if (cs_type == CS_TYPE_WAIT)
> -                       dma_fence_put(sig_fence);
> +                       hl_fence_put(sig_fence);
>                 hl_ctx_put(ctx);
>                 goto free_signal_seq_array;
>         }
> @@ -1154,7 +1150,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
>  static long _hl_cs_wait_ioctl(struct hl_device *hdev,
>                 struct hl_ctx *ctx, u64 timeout_us, u64 seq)
>  {
> -       struct dma_fence *fence;
> +       struct hl_fence *fence;
>         unsigned long timeout;
>         long rc;
>
> @@ -1173,12 +1169,15 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
>                                 "Can't wait on CS %llu because current CS is at seq %llu\n",
>                                 seq, ctx->cs_sequence);
>         } else if (fence) {
> -               rc = dma_fence_wait_timeout(fence, true, timeout);
> +               rc = wait_for_completion_interruptible_timeout(
> +                               &fence->completion, timeout);
> +
>                 if (fence->error == -ETIMEDOUT)
>                         rc = -ETIMEDOUT;
>                 else if (fence->error == -EIO)
>                         rc = -EIO;
> -               dma_fence_put(fence);
> +
> +               hl_fence_put(fence);
>         } else {
>                 dev_dbg(hdev->dev,
>                         "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
> diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
> index 3e375958e73b..b168a9fce817 100644
> --- a/drivers/misc/habanalabs/common/context.c
> +++ b/drivers/misc/habanalabs/common/context.c
> @@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
>          */
>
>         for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
> -               dma_fence_put(ctx->cs_pending[i]);
> +               hl_fence_put(ctx->cs_pending[i]);
>
>         kfree(ctx->cs_pending);
>
> @@ -128,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
>         atomic_set(&ctx->thread_ctx_switch_token, 1);
>         ctx->thread_ctx_switch_wait_token = 0;
>         ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
> -                               sizeof(struct dma_fence *),
> +                               sizeof(struct hl_fence *),
>                                 GFP_KERNEL);
>         if (!ctx->cs_pending)
>                 return -ENOMEM;
> @@ -184,10 +184,10 @@ int hl_ctx_put(struct hl_ctx *ctx)
>         return kref_put(&ctx->refcount, hl_ctx_do_release);
>  }
>
> -struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
> +struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
>  {
>         struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
> -       struct dma_fence *fence;
> +       struct hl_fence *fence;
>
>         spin_lock(&ctx->cs_lock);
>
> @@ -201,8 +201,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
>                 return NULL;
>         }
>
> -       fence = dma_fence_get(
> -                       ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
> +       fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
> +       hl_fence_get(fence);
> +
>         spin_unlock(&ctx->cs_lock);
>
>         return fence;
> diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
> index 018d9d67e8e6..30a399337675 100644
> --- a/drivers/misc/habanalabs/common/habanalabs.h
> +++ b/drivers/misc/habanalabs/common/habanalabs.h
> @@ -15,7 +15,6 @@
>  #include <linux/cdev.h>
>  #include <linux/iopoll.h>
>  #include <linux/irqreturn.h>
> -#include <linux/dma-fence.h>
>  #include <linux/dma-direction.h>
>  #include <linux/scatterlist.h>
>  #include <linux/hashtable.h>
> @@ -342,9 +341,24 @@ struct asic_fixed_properties {
>         u8                              completion_queues_count;
>  };
>
> +/**
> + * struct hl_fence - software synchronization primitive
> + * @completion: fence is implemented using completion
> + * @refcount: refcount for this fence
> + * @rcu: used for releasing fence with kfree_rcu
> + * @error: mark this fence with error
> + *
> + */
> +struct hl_fence {
> +       struct completion       completion;
> +       struct kref             refcount;
> +       struct rcu_head         rcu;
> +       int                     error;
> +};
> +
>  /**
>   * struct hl_cs_compl - command submission completion object.
> - * @base_fence: kernel fence object.
> + * @base_fence: hl fence object.
>   * @lock: spinlock to protect fence.
>   * @hdev: habanalabs device structure.
>   * @hw_sob: the H/W SOB used in this signal/wait CS.
> @@ -353,7 +367,7 @@ struct asic_fixed_properties {
>   * @sob_val: the SOB value that is used in this signal/wait CS.
>   */
>  struct hl_cs_compl {
> -       struct dma_fence        base_fence;
> +       struct hl_fence         base_fence;
>         spinlock_t              lock;
>         struct hl_device        *hdev;
>         struct hl_hw_sob        *hw_sob;
> @@ -800,7 +814,7 @@ struct hl_va_range {
>   * @hdev: pointer to the device structure.
>   * @refcount: reference counter for the context. Context is released only when
>   *             this hits 0l. It is incremented on CS and CS_WAIT.
> - * @cs_pending: array of DMA fence objects representing pending CS.
> + * @cs_pending: array of hl fence objects representing pending CS.
>   * @host_va_range: holds available virtual addresses for host mappings.
>   * @host_huge_va_range: holds available virtual addresses for host mappings
>   *                      with huge pages.
> @@ -832,7 +846,7 @@ struct hl_ctx {
>         struct hl_fpriv         *hpriv;
>         struct hl_device        *hdev;
>         struct kref             refcount;
> -       struct dma_fence        **cs_pending;
> +       struct hl_fence         **cs_pending;
>         struct hl_va_range      *host_va_range;
>         struct hl_va_range      *host_huge_va_range;
>         struct hl_va_range      *dram_va_range;
> @@ -919,8 +933,8 @@ struct hl_cs {
>         struct list_head        job_list;
>         spinlock_t              job_lock;
>         struct kref             refcount;
> -       struct dma_fence        *fence;
> -       struct dma_fence        *signal_fence;
> +       struct hl_fence         *fence;
> +       struct hl_fence         *signal_fence;
>         struct work_struct      finish_work;
>         struct delayed_work     work_tdr;
>         struct list_head        mirror_node;
> @@ -1736,7 +1750,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
>  void hl_ctx_do_release(struct kref *ref);
>  void hl_ctx_get(struct hl_device *hdev,        struct hl_ctx *ctx);
>  int hl_ctx_put(struct hl_ctx *ctx);
> -struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
> +struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
>  void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
>  void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
>
> @@ -1778,6 +1792,8 @@ void hl_cs_rollback_all(struct hl_device *hdev);
>  struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
>                 enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
>  void hl_sob_reset_error(struct kref *ref);
> +void hl_fence_put(struct hl_fence *fence);
> +void hl_fence_get(struct hl_fence *fence);
>
>  void goya_set_asic_funcs(struct hl_device *hdev);
>  void gaudi_set_asic_funcs(struct hl_device *hdev);
> diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
> index 287681646071..65b9aa69a83e 100644
> --- a/drivers/misc/habanalabs/common/hw_queue.c
> +++ b/drivers/misc/habanalabs/common/hw_queue.c
> @@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
>                  * wait CS was submitted.
>                  */
>                 mb();
> -               dma_fence_put(cs->signal_fence);
> +               hl_fence_put(cs->signal_fence);
>                 cs->signal_fence = NULL;
>         }
>  }
> --
> 2.17.1
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] habanalabs: add information about PCIe controller
  2020-07-30 13:13 ` [PATCH 2/2] habanalabs: add information about PCIe controller Oded Gabbay
@ 2020-07-31  0:17   ` kernel test robot
  2020-07-31  9:01   ` kernel test robot
  2020-08-01 19:23   ` kernel test robot
  2 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2020-07-31  0:17 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers; +Cc: kbuild-all, Ofir Bitton


[-- Attachment #1: Type: text/plain, Size: 5852 bytes --]

Hi Oded,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on next-20200730]
[cannot apply to linux/master linus/master v5.8-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Oded-Gabbay/habanalabs-Replace-dma-fence-mechanism-with-completions/20200730-211536
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 22362aa30bad6f03b5bcbbeee3cdc61950d40086
config: xtensa-allyesconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=xtensa 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   In file included from include/linux/byteorder/big_endian.h:5,
                    from arch/xtensa/include/uapi/asm/byteorder.h:8,
                    from arch/xtensa/include/asm/bitops.h:21,
                    from include/linux/bitops.h:29,
                    from include/linux/kernel.h:12,
                    from include/linux/list.h:9,
                    from include/linux/kobject.h:19,
                    from include/linux/cdev.h:5,
                    from drivers/misc/habanalabs/common/habanalabs.h:15,
                    from drivers/misc/habanalabs/common/firmware_if.c:8:
   drivers/misc/habanalabs/common/firmware_if.c: In function 'hl_fw_armcp_pci_counters_get':
>> include/uapi/linux/byteorder/big_endian.h:31:26: warning: conversion from 'long long unsigned int' to '__le32' {aka 'unsigned int'} changes value from '72057594037927936' to '0' [-Woverflow]
      31 | #define __cpu_to_le64(x) ((__force __le64)__swab64((x)))
         |                          ^
   include/linux/byteorder/generic.h:86:21: note: in expansion of macro '__cpu_to_le64'
      86 | #define cpu_to_le64 __cpu_to_le64
         |                     ^~~~~~~~~~~~~
   drivers/misc/habanalabs/common/firmware_if.c:368:14: note: in expansion of macro 'cpu_to_le64'
     368 |  pkt.index = cpu_to_le64(armcp_pcie_throughput_rx);
         |              ^~~~~~~~~~~

vim +31 include/uapi/linux/byteorder/big_endian.h

5921e6f8809b16 David Howells 2012-10-13  14  
5921e6f8809b16 David Howells 2012-10-13  15  #define __constant_htonl(x) ((__force __be32)(__u32)(x))
5921e6f8809b16 David Howells 2012-10-13  16  #define __constant_ntohl(x) ((__force __u32)(__be32)(x))
5921e6f8809b16 David Howells 2012-10-13  17  #define __constant_htons(x) ((__force __be16)(__u16)(x))
5921e6f8809b16 David Howells 2012-10-13  18  #define __constant_ntohs(x) ((__force __u16)(__be16)(x))
5921e6f8809b16 David Howells 2012-10-13  19  #define __constant_cpu_to_le64(x) ((__force __le64)___constant_swab64((x)))
5921e6f8809b16 David Howells 2012-10-13  20  #define __constant_le64_to_cpu(x) ___constant_swab64((__force __u64)(__le64)(x))
5921e6f8809b16 David Howells 2012-10-13  21  #define __constant_cpu_to_le32(x) ((__force __le32)___constant_swab32((x)))
5921e6f8809b16 David Howells 2012-10-13  22  #define __constant_le32_to_cpu(x) ___constant_swab32((__force __u32)(__le32)(x))
5921e6f8809b16 David Howells 2012-10-13  23  #define __constant_cpu_to_le16(x) ((__force __le16)___constant_swab16((x)))
5921e6f8809b16 David Howells 2012-10-13  24  #define __constant_le16_to_cpu(x) ___constant_swab16((__force __u16)(__le16)(x))
5921e6f8809b16 David Howells 2012-10-13  25  #define __constant_cpu_to_be64(x) ((__force __be64)(__u64)(x))
5921e6f8809b16 David Howells 2012-10-13  26  #define __constant_be64_to_cpu(x) ((__force __u64)(__be64)(x))
5921e6f8809b16 David Howells 2012-10-13  27  #define __constant_cpu_to_be32(x) ((__force __be32)(__u32)(x))
5921e6f8809b16 David Howells 2012-10-13  28  #define __constant_be32_to_cpu(x) ((__force __u32)(__be32)(x))
5921e6f8809b16 David Howells 2012-10-13  29  #define __constant_cpu_to_be16(x) ((__force __be16)(__u16)(x))
5921e6f8809b16 David Howells 2012-10-13  30  #define __constant_be16_to_cpu(x) ((__force __u16)(__be16)(x))
5921e6f8809b16 David Howells 2012-10-13 @31  #define __cpu_to_le64(x) ((__force __le64)__swab64((x)))
5921e6f8809b16 David Howells 2012-10-13  32  #define __le64_to_cpu(x) __swab64((__force __u64)(__le64)(x))
5921e6f8809b16 David Howells 2012-10-13  33  #define __cpu_to_le32(x) ((__force __le32)__swab32((x)))
5921e6f8809b16 David Howells 2012-10-13  34  #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
5921e6f8809b16 David Howells 2012-10-13  35  #define __cpu_to_le16(x) ((__force __le16)__swab16((x)))
5921e6f8809b16 David Howells 2012-10-13  36  #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
5921e6f8809b16 David Howells 2012-10-13  37  #define __cpu_to_be64(x) ((__force __be64)(__u64)(x))
5921e6f8809b16 David Howells 2012-10-13  38  #define __be64_to_cpu(x) ((__force __u64)(__be64)(x))
5921e6f8809b16 David Howells 2012-10-13  39  #define __cpu_to_be32(x) ((__force __be32)(__u32)(x))
5921e6f8809b16 David Howells 2012-10-13  40  #define __be32_to_cpu(x) ((__force __u32)(__be32)(x))
5921e6f8809b16 David Howells 2012-10-13  41  #define __cpu_to_be16(x) ((__force __be16)(__u16)(x))
5921e6f8809b16 David Howells 2012-10-13  42  #define __be16_to_cpu(x) ((__force __u16)(__be16)(x))
5921e6f8809b16 David Howells 2012-10-13  43  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 64496 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] habanalabs: add information about PCIe controller
  2020-07-30 13:13 ` [PATCH 2/2] habanalabs: add information about PCIe controller Oded Gabbay
  2020-07-31  0:17   ` kernel test robot
@ 2020-07-31  9:01   ` kernel test robot
  2020-08-01 19:23   ` kernel test robot
  2 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2020-07-31  9:01 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers
  Cc: kbuild-all, clang-built-linux, Ofir Bitton


[-- Attachment #1: Type: text/plain, Size: 3969 bytes --]

Hi Oded,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on next-20200730]
[cannot apply to linux/master linus/master v5.8-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Oded-Gabbay/habanalabs-Replace-dma-fence-mechanism-with-completions/20200730-211536
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 22362aa30bad6f03b5bcbbeee3cdc61950d40086
config: arm-randconfig-r011-20200731 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project c23ae3f18ee3ff11671f4c62ffc66d150b1bcdc2)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install arm cross compiling tool for clang build
        # apt-get install binutils-arm-linux-gnueabi
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> drivers/misc/habanalabs/common/firmware_if.c:368:14: warning: implicit conversion from '__le64' (aka 'unsigned long long') to '__le32' (aka 'unsigned int') changes value from 72057594037927936 to 0 [-Wconstant-conversion]
           pkt.index = cpu_to_le64(armcp_pcie_throughput_rx);
                     ~ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/byteorder/generic.h:86:21: note: expanded from macro 'cpu_to_le64'
   #define cpu_to_le64 __cpu_to_le64
                       ^
   include/uapi/linux/byteorder/big_endian.h:31:27: note: expanded from macro '__cpu_to_le64'
   #define __cpu_to_le64(x) ((__force __le64)__swab64((x)))
                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   1 warning generated.

vim +368 drivers/misc/habanalabs/common/firmware_if.c

   356	
   357	int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
   358			struct hl_info_pci_counters *counters)
   359	{
   360		struct armcp_packet pkt = {};
   361		long result;
   362		int rc;
   363	
   364		pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET <<
   365				ARMCP_PKT_CTL_OPCODE_SHIFT);
   366	
   367		/* Fetch PCI rx counter */
 > 368		pkt.index = cpu_to_le64(armcp_pcie_throughput_rx);
   369		rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
   370						HL_ARMCP_INFO_TIMEOUT_USEC, &result);
   371		if (rc) {
   372			dev_err(hdev->dev,
   373				"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
   374			return rc;
   375		}
   376		counters->rx_throughput = result;
   377	
   378		/* Fetch PCI tx counter */
   379		pkt.index = cpu_to_le64(armcp_pcie_throughput_tx);
   380		rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
   381						HL_ARMCP_INFO_TIMEOUT_USEC, &result);
   382		if (rc) {
   383			dev_err(hdev->dev,
   384				"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
   385			return rc;
   386		}
   387		counters->tx_throughput = result;
   388	
   389		/* Fetch PCI replay counter */
   390		pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET <<
   391				ARMCP_PKT_CTL_OPCODE_SHIFT);
   392	
   393		rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
   394				HL_ARMCP_INFO_TIMEOUT_USEC, &result);
   395		if (rc) {
   396			dev_err(hdev->dev,
   397				"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
   398			return rc;
   399		}
   400		counters->replay_cnt = (u32) result;
   401	
   402		return rc;
   403	}
   404	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 36369 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions
  2020-07-30 13:13 [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
                   ` (2 preceding siblings ...)
  2020-07-30 20:43 ` Daniel Vetter
@ 2020-08-01 17:50 ` kernel test robot
  2020-08-01 17:50 ` [RFC PATCH] habanalabs: hl_fence_release() can be static kernel test robot
  4 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2020-08-01 17:50 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers
  Cc: kbuild-all, Ofir Bitton, Greg Kroah-Hartman, Daniel Vetter


[-- Attachment #1: Type: text/plain, Size: 1552 bytes --]

Hi Oded,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[cannot apply to linux/master linus/master v5.8-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Oded-Gabbay/habanalabs-Replace-dma-fence-mechanism-with-completions/20200730-211536
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 22362aa30bad6f03b5bcbbeee3cdc61950d40086
config: x86_64-randconfig-s022-20200801 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-14) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.2-115-g5fc204f2-dirty
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> drivers/misc/habanalabs/common/command_submission.c:41:6: sparse: sparse: symbol 'hl_fence_release' was not declared. Should it be static?
>> drivers/misc/habanalabs/common/command_submission.c:101:6: sparse: sparse: symbol 'hl_fence_init' was not declared. Should it be static?

Please review and possibly fold the followup patch.

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 35016 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [RFC PATCH] habanalabs: hl_fence_release() can be static
  2020-07-30 13:13 [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
                   ` (3 preceding siblings ...)
  2020-08-01 17:50 ` kernel test robot
@ 2020-08-01 17:50 ` kernel test robot
  4 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2020-08-01 17:50 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers
  Cc: kbuild-all, Ofir Bitton, Greg Kroah-Hartman, Daniel Vetter


Signed-off-by: kernel test robot <lkp@intel.com>
---
 command_submission.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index d19ac641b1719..e482d8d135602 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -38,7 +38,7 @@ void hl_sob_reset_error(struct kref *ref)
 			hw_sob->q_idx, hw_sob->sob_id);
 }
 
-void hl_fence_release(struct kref *kref)
+static void hl_fence_release(struct kref *kref)
 {
 	struct hl_fence *fence =
 		container_of(kref, struct hl_fence, refcount);
@@ -98,7 +98,7 @@ void hl_fence_get(struct hl_fence *fence)
 		kref_get(&fence->refcount);
 }
 
-void hl_fence_init(struct hl_fence *fence)
+static void hl_fence_init(struct hl_fence *fence)
 {
 	kref_init(&fence->refcount);
 	fence->error = 0;

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] habanalabs: add information about PCIe controller
  2020-07-30 13:13 ` [PATCH 2/2] habanalabs: add information about PCIe controller Oded Gabbay
  2020-07-31  0:17   ` kernel test robot
  2020-07-31  9:01   ` kernel test robot
@ 2020-08-01 19:23   ` kernel test robot
  2 siblings, 0 replies; 9+ messages in thread
From: kernel test robot @ 2020-08-01 19:23 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers; +Cc: kbuild-all, Ofir Bitton


[-- Attachment #1: Type: text/plain, Size: 3878 bytes --]

Hi Oded,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[cannot apply to linux/master linus/master v5.8-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Oded-Gabbay/habanalabs-Replace-dma-fence-mechanism-with-completions/20200730-211536
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 22362aa30bad6f03b5bcbbeee3cdc61950d40086
config: x86_64-randconfig-s022-20200801 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-14) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.2-115-g5fc204f2-dirty
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


sparse warnings: (new ones prefixed by >>)

>> drivers/misc/habanalabs/common/firmware_if.c:368:19: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted __le32 [assigned] [usertype] index @@     got restricted __le64 [usertype] @@
>> drivers/misc/habanalabs/common/firmware_if.c:368:19: sparse:     expected restricted __le32 [assigned] [usertype] index
>> drivers/misc/habanalabs/common/firmware_if.c:368:19: sparse:     got restricted __le64 [usertype]
>> drivers/misc/habanalabs/common/firmware_if.c:379:19: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted __le32 [addressable] [assigned] [usertype] index @@     got restricted __le64 [usertype] @@
>> drivers/misc/habanalabs/common/firmware_if.c:379:19: sparse:     expected restricted __le32 [addressable] [assigned] [usertype] index
   drivers/misc/habanalabs/common/firmware_if.c:379:19: sparse:     got restricted __le64 [usertype]

vim +368 drivers/misc/habanalabs/common/firmware_if.c

   356	
   357	int hl_fw_armcp_pci_counters_get(struct hl_device *hdev,
   358			struct hl_info_pci_counters *counters)
   359	{
   360		struct armcp_packet pkt = {};
   361		long result;
   362		int rc;
   363	
   364		pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET <<
   365				ARMCP_PKT_CTL_OPCODE_SHIFT);
   366	
   367		/* Fetch PCI rx counter */
 > 368		pkt.index = cpu_to_le64(armcp_pcie_throughput_rx);
   369		rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
   370						HL_ARMCP_INFO_TIMEOUT_USEC, &result);
   371		if (rc) {
   372			dev_err(hdev->dev,
   373				"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
   374			return rc;
   375		}
   376		counters->rx_throughput = result;
   377	
   378		/* Fetch PCI tx counter */
 > 379		pkt.index = cpu_to_le64(armcp_pcie_throughput_tx);
   380		rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
   381						HL_ARMCP_INFO_TIMEOUT_USEC, &result);
   382		if (rc) {
   383			dev_err(hdev->dev,
   384				"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
   385			return rc;
   386		}
   387		counters->tx_throughput = result;
   388	
   389		/* Fetch PCI replay counter */
   390		pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET <<
   391				ARMCP_PKT_CTL_OPCODE_SHIFT);
   392	
   393		rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
   394				HL_ARMCP_INFO_TIMEOUT_USEC, &result);
   395		if (rc) {
   396			dev_err(hdev->dev,
   397				"Failed to handle ArmCP PCI info pkt, error %d\n", rc);
   398			return rc;
   399		}
   400		counters->replay_cnt = (u32) result;
   401	
   402		return rc;
   403	}
   404	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 35016 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, back to index

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-30 13:13 [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions Oded Gabbay
2020-07-30 13:13 ` [PATCH 2/2] habanalabs: add information about PCIe controller Oded Gabbay
2020-07-31  0:17   ` kernel test robot
2020-07-31  9:01   ` kernel test robot
2020-08-01 19:23   ` kernel test robot
2020-07-30 19:33 ` [PATCH 1/2] habanalabs: Replace dma-fence mechanism with completions kernel test robot
2020-07-30 20:43 ` Daniel Vetter
2020-08-01 17:50 ` kernel test robot
2020-08-01 17:50 ` [RFC PATCH] habanalabs: hl_fence_release() can be static kernel test robot

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git