linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/7] habanalabs: remove rate limiters from GAUDI
@ 2020-06-16  6:13 Oded Gabbay
  2020-06-16  6:13 ` [PATCH 2/7] habanalabs: Use pending CS amount per ASIC Oded Gabbay
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Oded Gabbay @ 2020-06-16  6:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: gregkh

We no longer need to initialize the rate limiters in GAUDI A1.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/gaudi/gaudi.c    | 126 +----------------------
 drivers/misc/habanalabs/habanalabs_drv.c |   1 -
 2 files changed, 1 insertion(+), 126 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 61f88e9884ce..4d69727bb53b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1638,8 +1638,8 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
 	uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
 
 	hbm0_wr = 0x33333333;
-	hbm1_wr = 0x33333333;
 	hbm0_rd = 0x77777777;
+	hbm1_wr = 0x55555555;
 	hbm1_rd = 0xDDDDDDDD;
 
 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
@@ -1689,125 +1689,6 @@ static void gaudi_init_hbm_cred(struct hl_device *hdev)
 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
 }
 
-static void gaudi_init_rate_limiter(struct hl_device *hdev)
-{
-	u32 nr, nf, od, sat, rst, timeout;
-	u64 freq;
-
-	nr = RREG32(mmPSOC_HBM_PLL_NR);
-	nf = RREG32(mmPSOC_HBM_PLL_NF);
-	od = RREG32(mmPSOC_HBM_PLL_OD);
-	freq = (50 * (nf + 1)) / ((nr + 1) * (od + 1));
-
-	dev_dbg(hdev->dev, "HBM frequency is %lluMHz\n", freq);
-
-	/* Configuration is for five (5) DDMA channels */
-	if (freq == 800) {
-		sat = 4;
-		rst = 11;
-		timeout = 15;
-	} else if (freq == 900) {
-		sat = 4;
-		rst = 15;
-		timeout = 16;
-	} else if (freq == 950) {
-		sat = 4;
-		rst = 15;
-		timeout = 15;
-	} else {
-		dev_warn(hdev->dev,
-			"unsupported HBM frequency %lluMHz, no rate-limiters\n",
-			freq);
-		return;
-	}
-
-	WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_0, 0x111);
-	WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_1, 0x111);
-	WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_0, 0x111);
-	WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_1, 0x111);
-	WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_0, 0x111);
-	WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_1, 0x111);
-	WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_0, 0x111);
-	WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_1, 0x111);
-
-	if (!hdev->rl_enable) {
-		dev_info(hdev->dev, "Rate limiters disabled\n");
-		return;
-	}
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_SAT, sat);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_SAT, sat);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_SAT, sat);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_SAT, sat);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_SAT, sat);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_SAT, sat);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_SAT, sat);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_SAT, sat);
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_RST, rst);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_RST, rst);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_RST, rst);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_RST, rst);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_RST, rst);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_RST, rst);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_RST, rst);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_RST, rst);
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_EN, 1);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_EN, 1);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_EN, 1);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_EN, 1);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_EN, 1);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_EN, 1);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_EN, 1);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_EN, 1);
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_SAT, sat);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_SAT, sat);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_SAT, sat);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_SAT, sat);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_SAT, sat);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_SAT, sat);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_SAT, sat);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_SAT, sat);
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_RST, rst);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_RST, rst);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_RST, rst);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_RST, rst);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_RST, rst);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_RST, rst);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_RST, rst);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_RST, rst);
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
-
-	WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_EN, 1);
-	WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_EN, 1);
-	WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_EN, 1);
-	WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_EN, 1);
-	WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_EN, 1);
-	WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_EN, 1);
-	WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_EN, 1);
-	WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_EN, 1);
-}
-
 static void gaudi_init_golden_registers(struct hl_device *hdev)
 {
 	u32 tpc_offset;
@@ -1817,8 +1698,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
 
 	gaudi_init_hbm_cred(hdev);
 
-	gaudi_init_rate_limiter(hdev);
-
 	gaudi_disable_clock_gating(hdev);
 
 	for (tpc_id = 0, tpc_offset = 0;
@@ -1839,9 +1718,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
-
-	/* WA for H3-2081 */
-	WREG32(mmPCIE_WRAP_MAX_OUTSTAND, 0x10ff);
 }
 
 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 8652c7e5d7f1..f38664b03865 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -238,7 +238,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
 	hdev->axi_drain = 0;
 	hdev->sram_scrambler_enable = 1;
 	hdev->dram_scrambler_enable = 1;
-	hdev->rl_enable = 1;
 	hdev->bmc_enable = 1;
 	hdev->hard_reset_on_fw_events = 1;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/7] habanalabs: Use pending CS amount per ASIC
  2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
@ 2020-06-16  6:13 ` Oded Gabbay
  2020-06-16  6:13 ` [PATCH 3/7] habanalabs: sync stream generic functionality Oded Gabbay
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Oded Gabbay @ 2020-06-16  6:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: gregkh, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Training schemes requires much more concurrent command submissions than
inference does. In addition, training command submissions can be completed
in a non serialized manner. Hence, we add support in which each ASIC will
be able to configure the amount of concurrent pending command submissions,
rather than use a predefined amount. This change will enhance performance
by allowing the user to add more concurrent work without waiting for the
previous work to be completed.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/command_submission.c |  6 ++++--
 drivers/misc/habanalabs/context.c            | 14 +++++++++++---
 drivers/misc/habanalabs/gaudi/gaudi.c        |  2 ++
 drivers/misc/habanalabs/gaudi/gaudiP.h       |  6 ++++++
 drivers/misc/habanalabs/goya/goya.c          |  2 ++
 drivers/misc/habanalabs/goya/goyaP.h         |  6 ++++++
 drivers/misc/habanalabs/habanalabs.h         |  9 +++++----
 drivers/misc/habanalabs/hw_queue.c           |  2 +-
 8 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index f82974a916c3..e156803f4a99 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -405,7 +405,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	spin_lock(&ctx->cs_lock);
 
 	cs_cmpl->cs_seq = ctx->cs_sequence;
-	other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
+	other = ctx->cs_pending[cs_cmpl->cs_seq &
+				(hdev->asic_prop.max_pending_cs - 1)];
 	if ((other) && (!dma_fence_is_signaled(other))) {
 		spin_unlock(&ctx->cs_lock);
 		dev_dbg(hdev->dev,
@@ -419,7 +420,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 
 	cs->sequence = cs_cmpl->cs_seq;
 
-	ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
+	ctx->cs_pending[cs_cmpl->cs_seq &
+			(hdev->asic_prop.max_pending_cs - 1)] =
 							&cs_cmpl->base_fence;
 	ctx->cs_sequence++;
 
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index ec92b3506b1f..1b96fefa4a65 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -22,9 +22,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	 * to this function unless the ref count is 0
 	 */
 
-	for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
+	for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
 		dma_fence_put(ctx->cs_pending[i]);
 
+	kfree(ctx->cs_pending);
+
 	if (ctx->asid != HL_KERNEL_ASID_ID) {
 		/* The engines are stopped as there is no executing CS, but the
 		 * Coresight might be still working by accessing addresses
@@ -126,6 +128,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	spin_lock_init(&ctx->cs_lock);
 	atomic_set(&ctx->thread_ctx_switch_token, 1);
 	ctx->thread_ctx_switch_wait_token = 0;
+	ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
+				sizeof(struct dma_fence *),
+				GFP_KERNEL);
+	if (!ctx->cs_pending)
+		return -ENOMEM;
 
 	if (is_kernel_ctx) {
 		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
@@ -170,6 +177,7 @@ int hl_ctx_put(struct hl_ctx *ctx)
 
 struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 {
+	struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
 	struct dma_fence *fence;
 
 	spin_lock(&ctx->cs_lock);
@@ -179,13 +187,13 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
+	if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
 		spin_unlock(&ctx->cs_lock);
 		return NULL;
 	}
 
 	fence = dma_fence_get(
-			ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
+			ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
 	spin_unlock(&ctx->cs_lock);
 
 	return fence;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4d69727bb53b..35e9080f6976 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -429,6 +429,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 	strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
 					CARD_NAME_MAX_LEN);
 
+	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
+
 	return 0;
 }
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index a46530d375fa..76c3f840e05a 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -57,6 +57,12 @@
 
 #define GAUDI_DEFAULT_CARD_NAME		"HL2000"
 
+#define GAUDI_MAX_PENDING_CS		1024
+
+#if !IS_MAX_PENDING_CS_VALID(GAUDI_MAX_PENDING_CS)
+#error "GAUDI_MAX_PENDING_CS must be power of 2 and greater than 1"
+#endif
+
 #define PCI_DMA_NUMBER_OF_CHNLS		3
 #define HBM_DMA_NUMBER_OF_CHNLS		5
 #define DMA_NUMBER_OF_CHNLS		(PCI_DMA_NUMBER_OF_CHNLS + \
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 0d2952bb58df..e872099a3f7a 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -426,6 +426,8 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 
 	strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
 		CARD_NAME_MAX_LEN);
+
+	prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 }
 
 /*
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index d36f8d90c9c9..9d8a1761252d 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -57,6 +57,12 @@
 
 #define GOYA_DEFAULT_CARD_NAME		"HL1000"
 
+#define GOYA_MAX_PENDING_CS		64
+
+#if !IS_MAX_PENDING_CS_VALID(GOYA_MAX_PENDING_CS)
+#error "GOYA_MAX_PENDING_CS must be power of 2 and greater than 1"
+#endif
+
 /* DRAM Memory Map */
 
 #define CPU_FW_IMAGE_SIZE		0x10000000	/* 256MB */
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 1ecdcf8b763a..64d9b2dd3e19 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -42,9 +42,6 @@
 
 #define HL_MAX_QUEUES			128
 
-/* MUST BE POWER OF 2 and larger than 1 */
-#define HL_MAX_PENDING_CS		64
-
 #define HL_IDLE_BUSY_TS_ARR_SIZE	4096
 
 /* Memory */
@@ -61,6 +58,9 @@
 
 #define HL_MAX_SOB_VAL			(1 << 15)
 
+#define IS_POWER_OF_2(n)		(n != 0 && ((n & (n - 1)) == 0))
+#define IS_MAX_PENDING_CS_VALID(n)	(IS_POWER_OF_2(n) && (n > 1))
+
 /**
  * struct pgt_info - MMU hop page info.
  * @node: hash linked-list node for the pgts shadow hash of pgts.
@@ -285,6 +285,7 @@ struct asic_fixed_properties {
 	u32				high_pll;
 	u32				cb_pool_cb_cnt;
 	u32				cb_pool_cb_size;
+	u32				max_pending_cs;
 	u8				tpc_enabled_mask;
 	u8				completion_queues_count;
 };
@@ -782,7 +783,7 @@ struct hl_ctx {
 	struct hl_fpriv		*hpriv;
 	struct hl_device	*hdev;
 	struct kref		refcount;
-	struct dma_fence	*cs_pending[HL_MAX_PENDING_CS];
+	struct dma_fence	**cs_pending;
 	struct hl_va_range	*host_va_range;
 	struct hl_va_range	*host_huge_va_range;
 	struct hl_va_range	*dram_va_range;
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index f4434b39ef1b..29b96d24edc2 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -376,7 +376,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
 	 * write address offset in the SM block (QMAN LBW message).
 	 * The write address offset is calculated as "COMP_OFFSET << 2".
 	 */
-	offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1);
+	offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
 	ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
 		((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/7] habanalabs: sync stream generic functionality
  2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
  2020-06-16  6:13 ` [PATCH 2/7] habanalabs: Use pending CS amount per ASIC Oded Gabbay
@ 2020-06-16  6:13 ` Oded Gabbay
  2020-06-16  6:13 ` [PATCH 4/7] habanalabs: Use mask instead of shift in sync stream registers Oded Gabbay
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Oded Gabbay @ 2020-06-16  6:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: gregkh, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Currently sync stream is limited only for external queues. We want to
remove this constraint by adding a new queue property dedicated for sync
stream. In addition we move the initialization and reset methods to the
common code since we can re-use them with slight changes.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/command_submission.c |  6 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c        | 46 ++-----------------
 drivers/misc/habanalabs/gaudi/gaudiP.h       |  2 -
 drivers/misc/habanalabs/goya/goya.c          | 12 -----
 drivers/misc/habanalabs/habanalabs.h         | 19 ++++++--
 drivers/misc/habanalabs/hw_queue.c           | 48 ++++++++++++++++++--
 6 files changed, 67 insertions(+), 66 deletions(-)

diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index e156803f4a99..7b410bff72e0 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -727,6 +727,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	struct hl_cs_job *job;
 	struct hl_cs *cs;
 	struct hl_cb *cb;
+	enum hl_queue_type q_type;
 	u64 *signal_seq_arr = NULL, signal_seq;
 	u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
 	int rc;
@@ -759,9 +760,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 	chunk = &cs_chunk_array[0];
 	q_idx = chunk->queue_index;
 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+	q_type = hw_queue_prop->type;
 
 	if ((q_idx >= HL_MAX_QUEUES) ||
-			(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
+			(!hw_queue_prop->supports_sync_stream)) {
 		dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
 		rc = -EINVAL;
 		goto free_cs_chunk_array;
@@ -858,7 +860,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 	*cs_seq = cs->sequence;
 
-	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+	job = hl_cs_allocate_job(hdev, q_type, true);
 	if (!job) {
 		dev_err(hdev->dev, "Failed to allocate a new job\n");
 		rc = -ENOMEM;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 35e9080f6976..9ce032466243 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -345,10 +345,12 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 			prop->hw_queues_props[i].driver_only = 0;
 			prop->hw_queues_props[i].requires_kernel_cb = 1;
+			prop->hw_queues_props[i].supports_sync_stream = 1;
 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 			prop->hw_queues_props[i].driver_only = 1;
 			prop->hw_queues_props[i].requires_kernel_cb = 0;
+			prop->hw_queues_props[i].supports_sync_stream = 0;
 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 			prop->hw_queues_props[i].driver_only = 0;
@@ -357,6 +359,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 			prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
 			prop->hw_queues_props[i].driver_only = 0;
 			prop->hw_queues_props[i].requires_kernel_cb = 0;
+			prop->hw_queues_props[i].supports_sync_stream = 0;
 		}
 	}
 
@@ -364,7 +367,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
 		prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
 
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
-
+	prop->sync_stream_first_sob = 0;
+	prop->sync_stream_first_mon = 0;
 	prop->dram_base_address = DRAM_PHYS_BASE;
 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
 	prop->dram_end_address = prop->dram_base_address +
@@ -6263,44 +6267,6 @@ static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
 	return gaudi_cq_assignment[cq_idx];
 }
 
-static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
-{
-	struct gaudi_device *gaudi = hdev->asic_specific;
-	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-	struct hl_hw_sob *hw_sob;
-	int sob, ext_idx = gaudi->ext_queue_idx++;
-
-	/*
-	 * The external queues might not sit sequentially, hence use the
-	 * real external queue index for the SOB/MON base id.
-	 */
-	hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
-	hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
-	hw_queue->next_sob_val = 1;
-	hw_queue->curr_sob_offset = 0;
-
-	for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
-		hw_sob = &hw_queue->hw_sob[sob];
-		hw_sob->hdev = hdev;
-		hw_sob->sob_id = hw_queue->base_sob_id + sob;
-		hw_sob->q_idx = q_idx;
-		kref_init(&hw_sob->kref);
-	}
-}
-
-static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
-{
-	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-
-	/*
-	 * In case we got here due to a stuck CS, the refcnt might be bigger
-	 * than 1 and therefore we reset it.
-	 */
-	kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
-	hw_queue->curr_sob_offset = 0;
-	hw_queue->next_sob_val = 1;
-}
-
 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
 {
 	return sizeof(struct packet_msg_short) +
@@ -6603,8 +6569,6 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.read_device_fw_version = gaudi_read_device_fw_version,
 	.load_firmware_to_device = gaudi_load_firmware_to_device,
 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
-	.ext_queue_init = gaudi_ext_queue_init,
-	.ext_queue_reset = gaudi_ext_queue_reset,
 	.get_signal_cb_size = gaudi_get_signal_cb_size,
 	.get_wait_cb_size = gaudi_get_wait_cb_size,
 	.gen_signal_cb = gaudi_gen_signal_cb,
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 76c3f840e05a..8c654711d543 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -234,7 +234,6 @@ struct gaudi_internal_qman_info {
  *                      engine.
  * @multi_msi_mode: whether we are working in multi MSI single MSI mode.
  *                  Multi MSI is possible only with IOMMU enabled.
- * @ext_queue_idx: helper index for external queues initialization.
  */
 struct gaudi_device {
 	int (*armcp_info_get)(struct hl_device *hdev);
@@ -253,7 +252,6 @@ struct gaudi_device {
 	u32				events_stat_aggregate[GAUDI_EVENT_SIZE];
 	u32				hw_cap_initialized;
 	u8				multi_msi_mode;
-	u8				ext_queue_idx;
 };
 
 void gaudi_init_security(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index e872099a3f7a..547fd766667a 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5160,16 +5160,6 @@ u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
 	return cq_idx;
 }
 
-static void goya_ext_queue_init(struct hl_device *hdev, u32 q_idx)
-{
-
-}
-
-static void goya_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
-{
-
-}
-
 static u32 goya_get_signal_cb_size(struct hl_device *hdev)
 {
 	return 0;
@@ -5283,8 +5273,6 @@ static const struct hl_asic_funcs goya_funcs = {
 	.read_device_fw_version = goya_read_device_fw_version,
 	.load_firmware_to_device = goya_load_firmware_to_device,
 	.load_boot_fit_to_device = goya_load_boot_fit_to_device,
-	.ext_queue_init = goya_ext_queue_init,
-	.ext_queue_reset = goya_ext_queue_reset,
 	.get_signal_cb_size = goya_get_signal_cb_size,
 	.get_wait_cb_size = goya_get_wait_cb_size,
 	.gen_signal_cb = goya_gen_signal_cb,
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 64d9b2dd3e19..8cd4b55d0608 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -50,6 +50,10 @@
 /* MMU */
 #define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 
+/*
+ * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
+ * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
+ */
 #define HL_RSVD_SOBS			4
 #define HL_RSVD_MONS			2
 
@@ -141,11 +145,13 @@ struct hl_hw_sob {
  *               false otherwise.
  * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
  *                      queue, false otherwise (a CB address must be provided).
+ * @supports_sync_stream: True if queue supports sync stream
  */
 struct hw_queue_properties {
 	enum hl_queue_type	type;
 	u8			driver_only;
 	u8			requires_kernel_cb;
+	u8			supports_sync_stream;
 };
 
 /**
@@ -245,6 +251,9 @@ struct hl_mmu_properties {
  * @cb_pool_cb_cnt: number of CBs in the CB pool.
  * @cb_pool_cb_size: size of each CB in the CB pool.
  * @tpc_enabled_mask: which TPCs are enabled.
+ * @sync_stream_first_sob: first sync object available for sync stream use
+ * @sync_stream_first_mon: first monitor available for sync stream use
+ * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  */
 struct asic_fixed_properties {
@@ -286,6 +295,8 @@ struct asic_fixed_properties {
 	u32				cb_pool_cb_cnt;
 	u32				cb_pool_cb_size;
 	u32				max_pending_cs;
+	u16				sync_stream_first_sob;
+	u16				sync_stream_first_mon;
 	u8				tpc_enabled_mask;
 	u8				completion_queues_count;
 };
@@ -423,6 +434,7 @@ struct hl_cs_job;
  *         exist).
  * @curr_sob_offset: the id offset to the currently used SOB from the
  *                   HL_RSVD_SOBS that are being used by this queue.
+ * @supports_sync_stream: True if queue supports sync stream
  */
 struct hl_hw_queue {
 	struct hl_hw_sob	hw_sob[HL_RSVD_SOBS];
@@ -441,6 +453,7 @@ struct hl_hw_queue {
 	u16			base_mon_id;
 	u8			valid;
 	u8			curr_sob_offset;
+	u8			supports_sync_stream;
 };
 
 /**
@@ -603,8 +616,6 @@ enum hl_pll_frequency {
  *                          contained in registers
  * @load_firmware_to_device: load the firmware to the device's memory
  * @load_boot_fit_to_device: load boot fit to device's memory
- * @ext_queue_init: Initialize the given external queue.
- * @ext_queue_reset: Reset the given external queue.
  * @get_signal_cb_size: Get signal CB size.
  * @get_wait_cb_size: Get wait CB size.
  * @gen_signal_cb: Generate a signal CB.
@@ -707,8 +718,6 @@ struct hl_asic_funcs {
 					enum hl_fw_component fwc);
 	int (*load_firmware_to_device)(struct hl_device *hdev);
 	int (*load_boot_fit_to_device)(struct hl_device *hdev);
-	void (*ext_queue_init)(struct hl_device *hdev, u32 hw_queue_id);
-	void (*ext_queue_reset)(struct hl_device *hdev, u32 hw_queue_id);
 	u32 (*get_signal_cb_size)(struct hl_device *hdev);
 	u32 (*get_wait_cb_size)(struct hl_device *hdev);
 	void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
@@ -1436,6 +1445,7 @@ struct hl_device_idle_busy_ts {
  * @cdev_sysfs_created: were char devices and sysfs nodes created.
  * @stop_on_err: true if engines should stop on error.
  * @supports_sync_stream: is sync stream supported.
+ * @sync_stream_queue_idx: helper index for sync stream queues initialization.
  * @supports_coresight: is CoreSight supported.
  * @supports_soft_reset: is soft reset supported.
  */
@@ -1523,6 +1533,7 @@ struct hl_device {
 	u8				cdev_sysfs_created;
 	u8				stop_on_err;
 	u8				supports_sync_stream;
+	u8				sync_stream_queue_idx;
 	u8				supports_coresight;
 	u8				supports_soft_reset;
 
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 29b96d24edc2..a8fa8021c617 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -663,9 +663,6 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 	q->ci = 0;
 	q->pi = 0;
 
-	if (!is_cpu_queue)
-		hdev->asic_funcs->ext_queue_init(hdev, q->hw_queue_id);
-
 	return 0;
 
 free_queue:
@@ -732,6 +729,42 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 	return 0;
 }
 
+static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
+{
+	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_hw_sob *hw_sob;
+	int sob, queue_idx = hdev->sync_stream_queue_idx++;
+
+	hw_queue->base_sob_id =
+		prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
+	hw_queue->base_mon_id =
+		prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
+	hw_queue->next_sob_val = 1;
+	hw_queue->curr_sob_offset = 0;
+
+	for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
+		hw_sob = &hw_queue->hw_sob[sob];
+		hw_sob->hdev = hdev;
+		hw_sob->sob_id = hw_queue->base_sob_id + sob;
+		hw_sob->q_idx = q_idx;
+		kref_init(&hw_sob->kref);
+	}
+}
+
+void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
+{
+	struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+
+	/*
+	 * In case we got here due to a stuck CS, the refcnt might be bigger
+	 * than 1 and therefore we reset it.
+	 */
+	kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
+	hw_queue->curr_sob_offset = 0;
+	hw_queue->next_sob_val = 1;
+}
+
 /*
  * queue_init - main initialization function for H/W queue object
  *
@@ -774,6 +807,9 @@ static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 		break;
 	}
 
+	if (q->supports_sync_stream)
+		sync_stream_queue_init(hdev, q->hw_queue_id);
+
 	if (rc)
 		return rc;
 
@@ -848,6 +884,8 @@ int hl_hw_queues_create(struct hl_device *hdev)
 			i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
 
 		q->queue_type = asic->hw_queues_props[i].type;
+		q->supports_sync_stream =
+				asic->hw_queues_props[i].supports_sync_stream;
 		rc = queue_init(hdev, q, i);
 		if (rc) {
 			dev_err(hdev->dev,
@@ -889,7 +927,7 @@ void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
 			continue;
 		q->pi = q->ci = 0;
 
-		if (q->queue_type == QUEUE_TYPE_EXT)
-			hdev->asic_funcs->ext_queue_reset(hdev, q->hw_queue_id);
+		if (q->supports_sync_stream)
+			sync_stream_queue_reset(hdev, q->hw_queue_id);
 	}
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/7] habanalabs: Use mask instead of shift in sync stream registers
  2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
  2020-06-16  6:13 ` [PATCH 2/7] habanalabs: Use pending CS amount per ASIC Oded Gabbay
  2020-06-16  6:13 ` [PATCH 3/7] habanalabs: sync stream generic functionality Oded Gabbay
@ 2020-06-16  6:13 ` Oded Gabbay
  2020-06-16  6:13 ` [PATCH 5/7] uapi/habanalabs: fix some comments Oded Gabbay
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Oded Gabbay @ 2020-06-16  6:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: gregkh, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Use proper bitfield masks instead of shifting values when configuring
packets sent to device.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/gaudi/gaudi.c         | 70 ++++++++++---------
 .../habanalabs/include/gaudi/gaudi_packets.h  |  4 +-
 2 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9ce032466243..1ac7eb2498ef 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -21,6 +21,7 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/iommu.h>
 #include <linux/seq_file.h>
+#include <linux/bitfield.h>
 
 /*
  * Gaudi security scheme:
@@ -6289,16 +6290,17 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
 	pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
 	memset(pkt, 0, sizeof(*pkt));
 
-	value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
-	value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
+	/* Inc by 1, Mode ADD */
+	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
+	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
 
-	ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
-	ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
-	ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
-	ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
-	ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
-	ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
-	ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
+	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
 
 	pkt->value = cpu_to_le32(value);
 	pkt->ctl = cpu_to_le32(ctl);
@@ -6311,12 +6313,12 @@ static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
 
 	memset(pkt, 0, pkt_size);
 
-	ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
-	ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
-	ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
-	ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
-	ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
-	ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
+	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
 
 	pkt->value = cpu_to_le32(value);
 	pkt->ctl = cpu_to_le32(ctl);
@@ -6332,18 +6334,19 @@ static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
 
 	memset(pkt, 0, pkt_size);
 
-	value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
-	value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
-	value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
-	value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
+	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
+	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
+	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
+			0); /* GREATER OR EQUAL*/
+	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
 
-	ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
-	ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
-	ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
-	ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
-	ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
-	ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
-	ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
+	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
 
 	pkt->value = cpu_to_le32(value);
 	pkt->ctl = cpu_to_le32(ctl);
@@ -6357,15 +6360,14 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
 
 	memset(pkt, 0, pkt_size);
 
-	cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
-	cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
-	cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
+	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
+	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
+	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
 
-	ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
-	ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
-	ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
-	ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
-	ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
+	ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
 
 	pkt->cfg = cpu_to_le32(cfg);
 	pkt->ctl = cpu_to_le32(ctl);
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
index 9a5800b0086b..02d62bc53f7f 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
@@ -85,7 +85,7 @@ struct packet_msg_long {
 };
 
 #define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT	0
-#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK	0x0000EFFF
+#define GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK	0x00007FFF
 
 #define GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT	31
 #define GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK	0x80000000
@@ -141,7 +141,7 @@ struct packet_msg_prot {
 #define GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK	0x00FF0000
 
 #define GAUDI_PKT_FENCE_CFG_ID_SHIFT		30
-#define GAUDI_PKT_FENCE_CFG_ID_MASK		0xC000000
+#define GAUDI_PKT_FENCE_CFG_ID_MASK		0xC0000000
 
 #define GAUDI_PKT_FENCE_CTL_PRED_SHIFT		0
 #define GAUDI_PKT_FENCE_CTL_PRED_MASK		0x0000001F
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 5/7] uapi/habanalabs: fix some comments
  2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
                   ` (2 preceding siblings ...)
  2020-06-16  6:13 ` [PATCH 4/7] habanalabs: Use mask instead of shift in sync stream registers Oded Gabbay
@ 2020-06-16  6:13 ` Oded Gabbay
  2020-06-18 10:53   ` Omer Shpigelman
  2020-06-16  6:13 ` [PATCH 6/7] habanalabs: align armcp_packet structure to 8 bytes Oded Gabbay
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 10+ messages in thread
From: Oded Gabbay @ 2020-06-16  6:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: gregkh

MAP/UNMAP are done also for device memory.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/misc/habanalabs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index f6267a8d7416..f218d1c62c62 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -530,13 +530,13 @@ union hl_wait_cs_args {
 	struct hl_wait_cs_out out;
 };
 
-/* Opcode to alloc device memory */
+/* Opcode to allocate device memory */
 #define HL_MEM_OP_ALLOC			0
 /* Opcode to free previously allocated device memory */
 #define HL_MEM_OP_FREE			1
-/* Opcode to map host memory */
+/* Opcode to map host and device memory */
 #define HL_MEM_OP_MAP			2
-/* Opcode to unmap previously mapped host memory */
+/* Opcode to unmap previously mapped host and device memory */
 #define HL_MEM_OP_UNMAP			3
 
 /* Memory flags */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 6/7] habanalabs: align armcp_packet structure to 8 bytes
  2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
                   ` (3 preceding siblings ...)
  2020-06-16  6:13 ` [PATCH 5/7] uapi/habanalabs: fix some comments Oded Gabbay
@ 2020-06-16  6:13 ` Oded Gabbay
  2020-06-18 10:58   ` Omer Shpigelman
  2020-06-16  6:13 ` [PATCH 7/7] habanalabs: calculate trace frequency from PLL Oded Gabbay
  2020-06-18 10:50 ` [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Omer Shpigelman
  6 siblings, 1 reply; 10+ messages in thread
From: Oded Gabbay @ 2020-06-16  6:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: gregkh

Once there is a 64-bit field in a structure, GCC compiler for ARM aligns
the structure to 8 bytes. In order to avoid confusion when these
structures are being passed between CPUs from different architectures, we
explicitly align the structure to 8 bytes.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/include/armcp_if.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index a34fc39ad87e..dea7c90faafa 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -276,6 +276,8 @@ struct armcp_packet {
 		/* For get Armcp info/EEPROM data */
 		__le32 data_max_size;
 	};
+
+	__le32 reserved;
 };
 
 struct armcp_unmask_irq_arr_packet {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 7/7] habanalabs: calculate trace frequency from PLL
  2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
                   ` (4 preceding siblings ...)
  2020-06-16  6:13 ` [PATCH 6/7] habanalabs: align armcp_packet structure to 8 bytes Oded Gabbay
@ 2020-06-16  6:13 ` Oded Gabbay
  2020-06-18 10:50 ` [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Omer Shpigelman
  6 siblings, 0 replies; 10+ messages in thread
From: Oded Gabbay @ 2020-06-16  6:13 UTC (permalink / raw)
  To: linux-kernel, SW_Drivers; +Cc: gregkh, Adam Aharon

From: Adam Aharon <aaharon@habana.ai>

The profiler needs to know the PLL values for correctly showing the
profiling data. Because our firmware can use different PLL configurations,
we need to read the PLL values from the ASIC to pass them to the profiler.

Signed-off-by: Adam Aharon <aaharon@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/gaudi/gaudi.c         |  33 ++++-
 .../misc/habanalabs/gaudi/gaudi_coresight.c   |   6 +-
 drivers/misc/habanalabs/goya/goya.c           |  33 ++++-
 drivers/misc/habanalabs/goya/goya_coresight.c |   6 +-
 drivers/misc/habanalabs/habanalabs.h          |  11 ++
 .../include/gaudi/asic_reg/gaudi_regs.h       |   1 +
 .../gaudi/asic_reg/psoc_cpu_pll_regs.h        | 114 ++++++++++++++++++
 7 files changed, 194 insertions(+), 10 deletions(-)
 create mode 100644 drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_cpu_pll_regs.h

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 1ac7eb2498ef..1bde7e6eeb9b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -555,11 +555,36 @@ static int gaudi_early_fini(struct hl_device *hdev)
 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u32 trace_freq = 0;
+	u32 pll_clk = 0;
+	u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
+	u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
+	u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
+	u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
+	u32 od = RREG32(mmPSOC_CPU_PLL_OD);
+
+	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
+		if (div_sel == DIV_SEL_REF_CLK)
+			trace_freq = PLL_REF_CLK;
+		else
+			trace_freq = PLL_REF_CLK / (div_fctr + 1);
+	} else if (div_sel == DIV_SEL_PLL_CLK ||
+					div_sel == DIV_SEL_DIVIDED_PLL) {
+		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
+		if (div_sel == DIV_SEL_PLL_CLK)
+			trace_freq = pll_clk;
+		else
+			trace_freq = pll_clk / (div_fctr + 1);
+	} else {
+		dev_warn(hdev->dev,
+			"Received invalid div select value: %d", div_sel);
+	}
 
-	prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
-	prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
-	prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
-	prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+	prop->psoc_timestamp_frequency = trace_freq;
+	prop->psoc_pci_pll_nr = nr;
+	prop->psoc_pci_pll_nf = nf;
+	prop->psoc_pci_pll_od = od;
+	prop->psoc_pci_pll_div_factor = div_fctr;
 }
 
 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index bf0e062d7b87..c32322cb1728 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -392,6 +392,7 @@ static int gaudi_config_stm(struct hl_device *hdev,
 {
 	struct hl_debug_params_stm *input;
 	u64 base_reg;
+	u32 frequency;
 	int rc;
 
 	if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
@@ -420,7 +421,10 @@ static int gaudi_config_stm(struct hl_device *hdev,
 		WREG32(base_reg + 0xE00, lower_32_bits(input->sp_mask));
 		WREG32(base_reg + 0xEF4, input->id);
 		WREG32(base_reg + 0xDF4, 0x80);
-		WREG32(base_reg + 0xE8C, input->frequency);
+		frequency = hdev->asic_prop.psoc_timestamp_frequency;
+		if (frequency == 0)
+			frequency = input->frequency;
+		WREG32(base_reg + 0xE8C, frequency);
 		WREG32(base_reg + 0xE90, 0x7FF);
 
 		/* SW-2176 - SW WA for HW bug */
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 547fd766667a..6816f6217ee1 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -594,11 +594,36 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u32 trace_freq = 0;
+	u32 pll_clk = 0;
+	u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+	u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
+	u32 nr = RREG32(mmPSOC_PCI_PLL_NR);
+	u32 nf = RREG32(mmPSOC_PCI_PLL_NF);
+	u32 od = RREG32(mmPSOC_PCI_PLL_OD);
+
+	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
+		if (div_sel == DIV_SEL_REF_CLK)
+			trace_freq = PLL_REF_CLK;
+		else
+			trace_freq = PLL_REF_CLK / (div_fctr + 1);
+	} else if (div_sel == DIV_SEL_PLL_CLK ||
+					div_sel == DIV_SEL_DIVIDED_PLL) {
+		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
+		if (div_sel == DIV_SEL_PLL_CLK)
+			trace_freq = pll_clk;
+		else
+			trace_freq = pll_clk / (div_fctr + 1);
+	} else {
+		dev_warn(hdev->dev,
+			"Received invalid div select value: %d", div_sel);
+	}
 
-	prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
-	prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
-	prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
-	prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
+	prop->psoc_timestamp_frequency = trace_freq;
+	prop->psoc_pci_pll_nr = nr;
+	prop->psoc_pci_pll_nf = nf;
+	prop->psoc_pci_pll_od = od;
+	prop->psoc_pci_pll_div_factor = div_fctr;
 }
 
 int goya_late_init(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index 1258724ea510..472246e3f4cd 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -232,6 +232,7 @@ static int goya_config_stm(struct hl_device *hdev,
 {
 	struct hl_debug_params_stm *input;
 	u64 base_reg;
+	u32 frequency;
 	int rc;
 
 	if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
@@ -264,7 +265,10 @@ static int goya_config_stm(struct hl_device *hdev,
 		WREG32(base_reg + 0xE20, 0xFFFFFFFF);
 		WREG32(base_reg + 0xEF4, input->id);
 		WREG32(base_reg + 0xDF4, 0x80);
-		WREG32(base_reg + 0xE8C, input->frequency);
+		frequency = hdev->asic_prop.psoc_timestamp_frequency;
+		if (frequency == 0)
+			frequency = input->frequency;
+		WREG32(base_reg + 0xE8C, frequency);
 		WREG32(base_reg + 0xE90, 0x7FF);
 		WREG32(base_reg + 0xE80, 0x27 | (input->id << 16));
 	} else {
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 8cd4b55d0608..4e68a41cce77 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -247,6 +247,7 @@ struct hl_mmu_properties {
  * @psoc_pci_pll_nf: PCI PLL NF value.
  * @psoc_pci_pll_od: PCI PLL OD value.
  * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
+ * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
  * @high_pll: high PLL frequency used by the device.
  * @cb_pool_cb_cnt: number of CBs in the CB pool.
  * @cb_pool_cb_size: size of each CB in the CB pool.
@@ -291,6 +292,7 @@ struct asic_fixed_properties {
 	u32				psoc_pci_pll_nf;
 	u32				psoc_pci_pll_od;
 	u32				psoc_pci_pll_div_factor;
+	u32				psoc_timestamp_frequency;
 	u32				high_pll;
 	u32				cb_pool_cb_cnt;
 	u32				cb_pool_cb_size;
@@ -533,6 +535,15 @@ enum hl_pll_frequency {
 	PLL_LAST
 };
 
+#define PLL_REF_CLK 50
+
+enum div_select_defs {
+	DIV_SEL_REF_CLK = 0,
+	DIV_SEL_PLL_CLK = 1,
+	DIV_SEL_DIVIDED_REF = 2,
+	DIV_SEL_DIVIDED_PLL = 3,
+};
+
 /**
  * struct hl_asic_funcs - ASIC specific functions that are can be called from
  *                        common code.
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
index 85e3b5148595..62078077aee5 100644
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
@@ -91,6 +91,7 @@
 
 #include "psoc_pci_pll_regs.h"
 #include "psoc_hbm_pll_regs.h"
+#include "psoc_cpu_pll_regs.h"
 
 #define GAUDI_ECC_MEM_SEL_OFFSET	0xF18
 #define GAUDI_ECC_ADDRESS_OFFSET	0xF1C
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_cpu_pll_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_cpu_pll_regs.h
new file mode 100644
index 000000000000..2585c70f59ef
--- /dev/null
+++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_cpu_pll_regs.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ **       DO NOT EDIT BELOW        **
+ ************************************/
+
+#ifndef ASIC_REG_PSOC_CPU_PLL_REGS_H_
+#define ASIC_REG_PSOC_CPU_PLL_REGS_H_
+
+/*
+ *****************************************
+ *   PSOC_CPU_PLL (Prototype: PLL)
+ *****************************************
+ */
+
+#define mmPSOC_CPU_PLL_NR                                            0xC70100
+
+#define mmPSOC_CPU_PLL_NF                                            0xC70104
+
+#define mmPSOC_CPU_PLL_OD                                            0xC70108
+
+#define mmPSOC_CPU_PLL_NB                                            0xC7010C
+
+#define mmPSOC_CPU_PLL_CFG                                           0xC70110
+
+#define mmPSOC_CPU_PLL_LOSE_MASK                                     0xC70120
+
+#define mmPSOC_CPU_PLL_LOCK_INTR                                     0xC70128
+
+#define mmPSOC_CPU_PLL_LOCK_BYPASS                                   0xC7012C
+
+#define mmPSOC_CPU_PLL_DATA_CHNG                                     0xC70130
+
+#define mmPSOC_CPU_PLL_RST                                           0xC70134
+
+#define mmPSOC_CPU_PLL_SLIP_WD_CNTR                                  0xC70150
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_0                                  0xC70200
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_1                                  0xC70204
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_2                                  0xC70208
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_3                                  0xC7020C
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_0                              0xC70220
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_1                              0xC70224
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_2                              0xC70228
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_CMD_3                              0xC7022C
+
+#define mmPSOC_CPU_PLL_DIV_SEL_0                                     0xC70280
+
+#define mmPSOC_CPU_PLL_DIV_SEL_1                                     0xC70284
+
+#define mmPSOC_CPU_PLL_DIV_SEL_2                                     0xC70288
+
+#define mmPSOC_CPU_PLL_DIV_SEL_3                                     0xC7028C
+
+#define mmPSOC_CPU_PLL_DIV_EN_0                                      0xC702A0
+
+#define mmPSOC_CPU_PLL_DIV_EN_1                                      0xC702A4
+
+#define mmPSOC_CPU_PLL_DIV_EN_2                                      0xC702A8
+
+#define mmPSOC_CPU_PLL_DIV_EN_3                                      0xC702AC
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_0                             0xC702C0
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_1                             0xC702C4
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_2                             0xC702C8
+
+#define mmPSOC_CPU_PLL_DIV_FACTOR_BUSY_3                             0xC702CC
+
+#define mmPSOC_CPU_PLL_CLK_GATER                                     0xC70300
+
+#define mmPSOC_CPU_PLL_CLK_RLX_0                                     0xC70310
+
+#define mmPSOC_CPU_PLL_CLK_RLX_1                                     0xC70314
+
+#define mmPSOC_CPU_PLL_CLK_RLX_2                                     0xC70318
+
+#define mmPSOC_CPU_PLL_CLK_RLX_3                                     0xC7031C
+
+#define mmPSOC_CPU_PLL_REF_CNTR_PERIOD                               0xC70400
+
+#define mmPSOC_CPU_PLL_REF_LOW_THRESHOLD                             0xC70410
+
+#define mmPSOC_CPU_PLL_REF_HIGH_THRESHOLD                            0xC70420
+
+#define mmPSOC_CPU_PLL_PLL_NOT_STABLE                                0xC70430
+
+#define mmPSOC_CPU_PLL_FREQ_CALC_EN                                  0xC70440
+
+#define mmPSOC_CPU_PLL_RLX_BITMAP_CFG                                0xC70500
+
+#define mmPSOC_CPU_PLL_RLX_BITMAP_0                                  0xC70510
+
+#define mmPSOC_CPU_PLL_RLX_BITMAP_1                                  0xC70514
+
+#define mmPSOC_CPU_PLL_RLX_BITMAP_2                                  0xC70518
+
+#define mmPSOC_CPU_PLL_RLX_BITMAP_3                                  0xC7051C
+
+#endif /* ASIC_REG_PSOC_CPU_PLL_REGS_H_ */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* RE: [PATCH 1/7] habanalabs: remove rate limiters from GAUDI
  2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
                   ` (5 preceding siblings ...)
  2020-06-16  6:13 ` [PATCH 7/7] habanalabs: calculate trace frequency from PLL Oded Gabbay
@ 2020-06-18 10:50 ` Omer Shpigelman
  6 siblings, 0 replies; 10+ messages in thread
From: Omer Shpigelman @ 2020-06-18 10:50 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers; +Cc: gregkh

On Tue, Jun 16, 2020 at 9:13 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> We no longer need to initialize the rate limiters in GAUDI A1.
> 
> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 5/7] uapi/habanalabs: fix some comments
  2020-06-16  6:13 ` [PATCH 5/7] uapi/habanalabs: fix some comments Oded Gabbay
@ 2020-06-18 10:53   ` Omer Shpigelman
  0 siblings, 0 replies; 10+ messages in thread
From: Omer Shpigelman @ 2020-06-18 10:53 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers; +Cc: gregkh

On Tue, Jun 16, 2020 at 09:13 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> MAP/UNMAP are done also for device memory.
> 
> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH 6/7] habanalabs: align armcp_packet structure to 8 bytes
  2020-06-16  6:13 ` [PATCH 6/7] habanalabs: align armcp_packet structure to 8 bytes Oded Gabbay
@ 2020-06-18 10:58   ` Omer Shpigelman
  0 siblings, 0 replies; 10+ messages in thread
From: Omer Shpigelman @ 2020-06-18 10:58 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, SW_Drivers; +Cc: gregkh

On Tue, Jun 13, 2020 at 09:13 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> Once there is a 64-bit field in a structure, GCC compiler for ARM aligns the
> structure to 8 bytes. In order to avoid confusion when these structures are
> being passed between CPUs from different architectures, we explicitly align
> the structure to 8 bytes.
> 
> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-06-18 10:58 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-16  6:13 [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Oded Gabbay
2020-06-16  6:13 ` [PATCH 2/7] habanalabs: Use pending CS amount per ASIC Oded Gabbay
2020-06-16  6:13 ` [PATCH 3/7] habanalabs: sync stream generic functionality Oded Gabbay
2020-06-16  6:13 ` [PATCH 4/7] habanalabs: Use mask instead of shift in sync stream registers Oded Gabbay
2020-06-16  6:13 ` [PATCH 5/7] uapi/habanalabs: fix some comments Oded Gabbay
2020-06-18 10:53   ` Omer Shpigelman
2020-06-16  6:13 ` [PATCH 6/7] habanalabs: align armcp_packet structure to 8 bytes Oded Gabbay
2020-06-18 10:58   ` Omer Shpigelman
2020-06-16  6:13 ` [PATCH 7/7] habanalabs: calculate trace frequency from PLL Oded Gabbay
2020-06-18 10:50 ` [PATCH 1/7] habanalabs: remove rate limiters from GAUDI Omer Shpigelman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).