linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] habanalabs: fetch pll frequency from firmware
@ 2020-11-18 13:50 Oded Gabbay
  2020-11-18 13:50 ` [PATCH] habanalabs: mmu map wrapper for sizes larger than a page Oded Gabbay
  2020-11-18 13:50 ` [PATCH] habanalabs: print CS type when it is stuck Oded Gabbay
  0 siblings, 2 replies; 4+ messages in thread
From: Oded Gabbay @ 2020-11-18 13:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers, Alon Mizrahi

From: Alon Mizrahi <amizrahi@habana.ai>

Once firmware security is enabled, driver must fetch pll frequencies
through the firmware message interface instead of reading the registers
directly.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c  |  24 ++--
 drivers/misc/habanalabs/common/habanalabs.h   |   9 +-
 .../misc/habanalabs/common/habanalabs_ioctl.c |  22 +++
 drivers/misc/habanalabs/gaudi/gaudi.c         | 135 ++++++++++++------
 drivers/misc/habanalabs/gaudi/gaudiP.h        |   8 ++
 drivers/misc/habanalabs/goya/goya.c           |   2 +-
 .../misc/habanalabs/include/common/cpucp_if.h |  40 +++++-
 .../habanalabs/include/common/hl_boot_if.h    |   4 +
 .../include/gaudi/asic_reg/gaudi_regs.h       |  14 +-
 .../gaudi/asic_reg/psoc_hbm_pll_regs.h        | 114 ---------------
 .../gaudi/asic_reg/psoc_pci_pll_regs.h        | 114 ---------------
 include/uapi/misc/habanalabs.h                |   9 ++
 12 files changed, 199 insertions(+), 296 deletions(-)
 delete mode 100644 drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h
 delete mode 100644 drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 8f70d0bbe5e1..c4a8d6ca34bb 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -279,7 +279,8 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
 	return rc;
 }
 
-int hl_fw_cpucp_info_get(struct hl_device *hdev)
+int hl_fw_cpucp_info_get(struct hl_device *hdev,
+			u32 cpu_security_boot_status_reg)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	struct cpucp_packet pkt = {};
@@ -324,6 +325,11 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev)
 		goto out;
 	}
 
+	/* Read FW application security bits again */
+	if (hdev->asic_prop.fw_security_status_valid)
+		hdev->asic_prop.fw_app_security_map =
+				RREG32(cpu_security_boot_status_reg);
+
 out:
 	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
 			sizeof(struct cpucp_info), cpucp_info_cpu_addr);
@@ -446,10 +452,8 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
 	return rc;
 }
 
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
-		enum cpucp_pll_type_attributes pll_type,
-		enum cpucp_pll_reg_attributes pll_reg,
-		u32 *pll_info)
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+		u16 *pll_freq_arr)
 {
 	struct cpucp_packet pkt;
 	u64 result;
@@ -457,17 +461,19 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
 
 	memset(&pkt, 0, sizeof(pkt));
 
-	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_REG_GET <<
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
 				CPUCP_PKT_CTL_OPCODE_SHIFT);
-	pkt.pll_type = __cpu_to_le16(pll_type);
-	pkt.pll_reg = __cpu_to_le16(pll_reg);
+	pkt.pll_type = __cpu_to_le16(pll_index);
 
 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
 	if (rc)
 		dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
 
-	*pll_info = (u32) result;
+	pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
+	pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
+	pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
+	pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
 
 	return rc;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index e1db8301ecbd..9c9c8b24c47a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2191,16 +2191,15 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
 void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 					void *vaddr);
 int hl_fw_send_heartbeat(struct hl_device *hdev);
-int hl_fw_cpucp_info_get(struct hl_device *hdev);
+int hl_fw_cpucp_info_get(struct hl_device *hdev,
+			u32 cpu_security_boot_status_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
-		enum cpucp_pll_type_attributes pll_type,
-		enum cpucp_pll_reg_attributes pll_reg,
-		u32 *pll_info);
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+		u16 *pll_freq_arr);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index ba8217fc9425..32e6af1db4e3 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -403,6 +403,25 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv,
 		min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
 }
 
+static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_pll_frequency_info freq_info = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hl_fw_cpucp_pll_info_get(hdev, args->pll_index, freq_info.output);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &freq_info,
+		min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -480,6 +499,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_TOTAL_ENERGY:
 		return total_energy_consumption_info(hpriv, args);
 
+	case HL_INFO_PLL_FREQUENCY:
+		return pll_frequency_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index c44a883d9787..4d1d57f9cd74 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -103,6 +103,8 @@
 
 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
 
+#define GAUDI_PLL_MAX 10
+
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -149,6 +151,19 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
 };
 
+static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
+	[CPU_PLL] = mmPSOC_CPU_PLL_NR,
+	[PCI_PLL] = mmPSOC_PCI_PLL_NR,
+	[SRAM_PLL] = mmSRAM_W_PLL_NR,
+	[HBM_PLL] = mmPSOC_HBM_PLL_NR,
+	[NIC_PLL] = mmNIC0_PLL_NR,
+	[DMA_PLL] = mmDMA_W_PLL_NR,
+	[MESH_PLL] = mmMESH_W_PLL_NR,
+	[MME_PLL] = mmPSOC_MME_PLL_NR,
+	[TPC_PLL] = mmPSOC_TPC_PLL_NR,
+	[IF_PLL] = mmIF_W_PLL_NR
+};
+
 static inline bool validate_packet_id(enum packet_id id)
 {
 	switch (id) {
@@ -688,61 +703,93 @@ static int gaudi_early_fini(struct hl_device *hdev)
 }
 
 /**
- * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
+ * gaudi_fetch_pll_frequency - Fetch PLL frequency values
  *
  * @hdev: pointer to hl_device structure
+ * @pll_index: index of the pll to fetch frequency from
+ * @pll_freq: pointer to store the pll frequency in MHz in each of the available
+ *            outputs. if a certain output is not available a 0 will be set
  *
  */
-static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
+static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
+				enum gaudi_pll_index pll_index,
+				u16 *pll_freq_arr)
 {
-	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u32 trace_freq = 0, pll_clk = 0;
-	u32 div_fctr, div_sel, nr, nf, od;
-	int rc;
+	u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel,
+			pll_base_addr = gaudi_pll_base_addresses[pll_index];
+	u16 freq = 0;
+	int i, rc;
+
+	if (hdev->asic_prop.fw_security_status_valid &&
+			(hdev->asic_prop.fw_app_security_map &
+					CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
+		rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
 
-	if (hdev->asic_prop.fw_security_disabled) {
-		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
-		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
-		nr = RREG32(mmPSOC_CPU_PLL_NR);
-		nf = RREG32(mmPSOC_CPU_PLL_NF);
-		od = RREG32(mmPSOC_CPU_PLL_OD);
-	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_div_factor_reg, &div_fctr);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_div_sel_reg, &div_sel);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_nr_reg, &nr);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_nf_reg, &nf);
-		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
-				cpucp_pll_od_reg, &od);
 		if (rc)
 			return rc;
-	}
-
-	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
-		if (div_sel == DIV_SEL_REF_CLK)
-			trace_freq = PLL_REF_CLK;
-		else
-			trace_freq = PLL_REF_CLK / (div_fctr + 1);
-	} else if (div_sel == DIV_SEL_PLL_CLK ||
+	} else if (hdev->asic_prop.fw_security_disabled) {
+		/* Backward compatibility */
+		nr = RREG32(pll_base_addr + PLL_NR_OFFSET);
+		nf = RREG32(pll_base_addr + PLL_NF_OFFSET);
+		od = RREG32(pll_base_addr + PLL_OD_OFFSET);
+
+		for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) {
+			div_fctr = RREG32(pll_base_addr +
+					PLL_DIV_FACTOR_0_OFFSET + i * 4);
+			div_sel = RREG32(pll_base_addr +
+					PLL_DIV_SEL_0_OFFSET + i * 4);
+
+			if (div_sel == DIV_SEL_REF_CLK ||
+				div_sel == DIV_SEL_DIVIDED_REF) {
+				if (div_sel == DIV_SEL_REF_CLK)
+					freq = PLL_REF_CLK;
+				else
+					freq = PLL_REF_CLK / (div_fctr + 1);
+			} else if (div_sel == DIV_SEL_PLL_CLK ||
 					div_sel == DIV_SEL_DIVIDED_PLL) {
-		pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
-		if (div_sel == DIV_SEL_PLL_CLK)
-			trace_freq = pll_clk;
-		else
-			trace_freq = pll_clk / (div_fctr + 1);
+				pll_clk = PLL_REF_CLK * (nf + 1) /
+						((nr + 1) * (od + 1));
+				if (div_sel == DIV_SEL_PLL_CLK)
+					freq = pll_clk;
+				else
+					freq = pll_clk / (div_fctr + 1);
+			} else {
+				dev_warn(hdev->dev,
+					"Received invalid div select value: %d",
+					div_sel);
+			}
+
+			pll_freq_arr[i] = freq;
+		}
 	} else {
-		dev_warn(hdev->dev,
-			"Received invalid div select value: %d", div_sel);
+		dev_err(hdev->dev, "Failed to fetch PLL frequency values\n");
+		return -EIO;
 	}
 
-	prop->psoc_timestamp_frequency = trace_freq;
-	prop->psoc_pci_pll_nr = nr;
-	prop->psoc_pci_pll_nf = nf;
-	prop->psoc_pci_pll_od = od;
-	prop->psoc_pci_pll_div_factor = div_fctr;
+	return 0;
+}
+
+/**
+ * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u16 pll_freq[HL_PLL_NUM_OUTPUTS];
+	int rc;
+
+	rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
+	if (rc)
+		return rc;
+
+	prop->psoc_timestamp_frequency = pll_freq[2];
+	prop->psoc_pci_pll_nr = 0;
+	prop->psoc_pci_pll_nf = 0;
+	prop->psoc_pci_pll_od = 0;
+	prop->psoc_pci_pll_div_factor = 0;
 
 	return 0;
 }
@@ -7449,7 +7496,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev);
+	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
 	if (rc)
 		return rc;
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h
index 1a5e681c720d..f2d91f4fcffe 100644
--- a/drivers/misc/habanalabs/gaudi/gaudiP.h
+++ b/drivers/misc/habanalabs/gaudi/gaudiP.h
@@ -14,6 +14,7 @@
 #include "../include/gaudi/gaudi_packets.h"
 #include "../include/gaudi/gaudi.h"
 #include "../include/gaudi/gaudi_async_events.h"
+#include "../include/gaudi/gaudi_fw_if.h"
 
 #define NUMBER_OF_EXT_HW_QUEUES		8
 #define NUMBER_OF_CMPLT_QUEUES		NUMBER_OF_EXT_HW_QUEUES
@@ -104,6 +105,13 @@
 #define MME_ACC_OFFSET		(mmMME1_ACC_BASE - mmMME0_ACC_BASE)
 #define SRAM_BANK_OFFSET	(mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE)
 
+#define PLL_NR_OFFSET		0
+#define PLL_NF_OFFSET		(mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR)
+#define PLL_OD_OFFSET		(mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR)
+#define PLL_DIV_FACTOR_0_OFFSET	(mmPSOC_CPU_PLL_DIV_FACTOR_0 - \
+				mmPSOC_CPU_PLL_NR)
+#define PLL_DIV_SEL_0_OFFSET	(mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR)
+
 #define NUM_OF_SOB_IN_BLOCK		\
 	(((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \
 	mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 342227b93778..d91f553b8595 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5156,7 +5156,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
 	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	rc = hl_fw_cpucp_info_get(hdev);
+	rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
 	if (rc)
 		return rc;
 
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 759c068b2b7a..554f82271d5f 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -252,10 +252,26 @@ enum pq_init_status {
  *       The packet's arguments specify the desired sensor and the field to
  *       set.
  *
- * CPUCP_PACKET_PLL_REG_GET
- *       Fetch register of PLL from the required PLL IP.
- *       The packet's arguments specify the PLL IP and the register to get.
- *       Each register is 32-bit value which is returned in result field.
+ * CPUCP_PACKET_PCIE_THROUGHPUT_GET
+ *       Get throughput of PCIe.
+ *       The packet's arguments specify the transaction direction (TX/RX).
+ *       The window measurement is 10[msec], and the return value is in KB/sec.
+ *
+ * CPUCP_PACKET_PCIE_REPLAY_CNT_GET
+ *       Replay count measures number of "replay" events, which is basicly
+ *       number of retries done by PCIe.
+ *
+ * CPUCP_PACKET_TOTAL_ENERGY_GET
+ *       Total Energy is measurement of energy from the time FW Linux
+ *       is loaded. It is calculated by multiplying the average power
+ *       by time (passed from armcp start). The units are in MilliJouls.
+ *
+ * CPUCP_PACKET_PLL_INFO_GET
+ *       Fetch frequencies of PLL from the required PLL IP.
+ *       The packet's arguments specify the device PLL type
+ *       Pll type is the PLL from device pll_index enum.
+ *       The result is composed of 4 outputs, each is 16-bit
+ *       frequency in MHz.
  *
  */
 
@@ -289,7 +305,7 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_PCIE_THROUGHPUT_GET,	/* internal */
 	CPUCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
 	CPUCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
-	CPUCP_PACKET_PLL_REG_GET,		/* internal */
+	CPUCP_PACKET_PLL_INFO_GET,		/* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
@@ -300,6 +316,15 @@ enum cpucp_packet_id {
 #define CPUCP_PKT_CTL_OPCODE_SHIFT	16
 #define CPUCP_PKT_CTL_OPCODE_MASK	0x1FFF0000
 
+#define CPUCP_PKT_RES_PLL_OUT0_SHIFT	0
+#define CPUCP_PKT_RES_PLL_OUT0_MASK	0x000000000000FFFF
+#define CPUCP_PKT_RES_PLL_OUT1_SHIFT	16
+#define CPUCP_PKT_RES_PLL_OUT1_MASK	0x00000000FFFF0000
+#define CPUCP_PKT_RES_PLL_OUT2_SHIFT	32
+#define CPUCP_PKT_RES_PLL_OUT2_MASK	0x0000FFFF00000000
+#define CPUCP_PKT_RES_PLL_OUT3_SHIFT	48
+#define CPUCP_PKT_RES_PLL_OUT3_MASK	0xFFFF000000000000
+
 struct cpucp_packet {
 	union {
 		__le64 value;	/* For SET packets */
@@ -324,8 +349,9 @@ struct cpucp_packet {
 			__u8 pad; /* unused */
 		};
 
-		struct {/* For PLL register fetch */
+		struct {/* For PLL info fetch */
 			__le16 pll_type;
+			/* TODO pll_reg is kept temporary before removal */
 			__le16 pll_reg;
 		};
 
@@ -404,6 +430,7 @@ enum cpucp_pcie_throughput_attributes {
 	cpucp_pcie_throughput_rx
 };
 
+/* TODO temporary kept before removal */
 enum cpucp_pll_reg_attributes {
 	cpucp_pll_nr_reg,
 	cpucp_pll_nf_reg,
@@ -412,6 +439,7 @@ enum cpucp_pll_reg_attributes {
 	cpucp_pll_div_sel_reg
 };
 
+/* TODO temporary kept before removal */
 enum cpucp_pll_type_attributes {
 	cpucp_pll_cpu,
 	cpucp_pll_pci,
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 60916780df35..68ac15c53f37 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -131,6 +131,9 @@
  *					receiving the halt-machine event.
  *					Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_PLL_INFO_EN	FW retrieval of PLL info is enabled.
+ *					Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED		Device status register enabled.
  *					This is a main indication that the
  *					running FW populates the device status
@@ -150,6 +153,7 @@
 #define CPU_BOOT_DEV_STS0_SRAM_SCR_EN			(1 << 8)
 #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN			(1 << 9)
 #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN		(1 << 10)
+#define CPU_BOOT_DEV_STS0_PLL_INFO_EN			(1 << 11)
 #define CPU_BOOT_DEV_STS0_ENABLED			(1 << 31)
 
 enum cpu_boot_status {
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
index df21a40691e5..5bb54b34a8ae 100644
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
+++ b/drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
@@ -81,6 +81,7 @@
 #include "sif_rtr_ctrl_6_regs.h"
 #include "sif_rtr_ctrl_7_regs.h"
 #include "psoc_etr_regs.h"
+#include "psoc_cpu_pll_regs.h"
 
 #include "dma0_qm_masks.h"
 #include "mme0_qm_masks.h"
@@ -102,9 +103,6 @@
 
 #include "nic0_qm0_masks.h"
 
-#include "psoc_hbm_pll_regs.h"
-#include "psoc_cpu_pll_regs.h"
-
 #define GAUDI_ECC_MEM_SEL_OFFSET		0xF18
 #define GAUDI_ECC_ADDRESS_OFFSET		0xF1C
 #define GAUDI_ECC_SYNDROME_OFFSET		0xF20
@@ -307,4 +305,14 @@
 #define mmPCIE_AUX_FLR_CTRL                                          0xC07394
 #define mmPCIE_AUX_DBI                                               0xC07490
 
+#define mmPSOC_PCI_PLL_NR                                            0xC72100
+#define mmSRAM_W_PLL_NR                                              0x4C8100
+#define mmPSOC_HBM_PLL_NR                                            0xC74100
+#define mmNIC0_PLL_NR                                                0xCF9100
+#define mmDMA_W_PLL_NR                                               0x487100
+#define mmMESH_W_PLL_NR                                              0x4C7100
+#define mmPSOC_MME_PLL_NR                                            0xC71100
+#define mmPSOC_TPC_PLL_NR                                            0xC73100
+#define mmIF_W_PLL_NR                                                0x488100
+
 #endif /* ASIC_REG_GAUDI_REGS_H_ */
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h
deleted file mode 100644
index 687e2255cb19..000000000000
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_hbm_pll_regs.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2018 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-/************************************
- ** This is an auto-generated file **
- **       DO NOT EDIT BELOW        **
- ************************************/
-
-#ifndef ASIC_REG_PSOC_HBM_PLL_REGS_H_
-#define ASIC_REG_PSOC_HBM_PLL_REGS_H_
-
-/*
- *****************************************
- *   PSOC_HBM_PLL (Prototype: PLL)
- *****************************************
- */
-
-#define mmPSOC_HBM_PLL_NR                                            0xC74100
-
-#define mmPSOC_HBM_PLL_NF                                            0xC74104
-
-#define mmPSOC_HBM_PLL_OD                                            0xC74108
-
-#define mmPSOC_HBM_PLL_NB                                            0xC7410C
-
-#define mmPSOC_HBM_PLL_CFG                                           0xC74110
-
-#define mmPSOC_HBM_PLL_LOSE_MASK                                     0xC74120
-
-#define mmPSOC_HBM_PLL_LOCK_INTR                                     0xC74128
-
-#define mmPSOC_HBM_PLL_LOCK_BYPASS                                   0xC7412C
-
-#define mmPSOC_HBM_PLL_DATA_CHNG                                     0xC74130
-
-#define mmPSOC_HBM_PLL_RST                                           0xC74134
-
-#define mmPSOC_HBM_PLL_SLIP_WD_CNTR                                  0xC74150
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_0                                  0xC74200
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_1                                  0xC74204
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_2                                  0xC74208
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_3                                  0xC7420C
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_0                              0xC74220
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_1                              0xC74224
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_2                              0xC74228
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_CMD_3                              0xC7422C
-
-#define mmPSOC_HBM_PLL_DIV_SEL_0                                     0xC74280
-
-#define mmPSOC_HBM_PLL_DIV_SEL_1                                     0xC74284
-
-#define mmPSOC_HBM_PLL_DIV_SEL_2                                     0xC74288
-
-#define mmPSOC_HBM_PLL_DIV_SEL_3                                     0xC7428C
-
-#define mmPSOC_HBM_PLL_DIV_EN_0                                      0xC742A0
-
-#define mmPSOC_HBM_PLL_DIV_EN_1                                      0xC742A4
-
-#define mmPSOC_HBM_PLL_DIV_EN_2                                      0xC742A8
-
-#define mmPSOC_HBM_PLL_DIV_EN_3                                      0xC742AC
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_0                             0xC742C0
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_1                             0xC742C4
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_2                             0xC742C8
-
-#define mmPSOC_HBM_PLL_DIV_FACTOR_BUSY_3                             0xC742CC
-
-#define mmPSOC_HBM_PLL_CLK_GATER                                     0xC74300
-
-#define mmPSOC_HBM_PLL_CLK_RLX_0                                     0xC74310
-
-#define mmPSOC_HBM_PLL_CLK_RLX_1                                     0xC74314
-
-#define mmPSOC_HBM_PLL_CLK_RLX_2                                     0xC74318
-
-#define mmPSOC_HBM_PLL_CLK_RLX_3                                     0xC7431C
-
-#define mmPSOC_HBM_PLL_REF_CNTR_PERIOD                               0xC74400
-
-#define mmPSOC_HBM_PLL_REF_LOW_THRESHOLD                             0xC74410
-
-#define mmPSOC_HBM_PLL_REF_HIGH_THRESHOLD                            0xC74420
-
-#define mmPSOC_HBM_PLL_PLL_NOT_STABLE                                0xC74430
-
-#define mmPSOC_HBM_PLL_FREQ_CALC_EN                                  0xC74440
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_CFG                                0xC74500
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_0                                  0xC74510
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_1                                  0xC74514
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_2                                  0xC74518
-
-#define mmPSOC_HBM_PLL_RLX_BITMAP_3                                  0xC7451C
-
-#endif /* ASIC_REG_PSOC_HBM_PLL_REGS_H_ */
diff --git a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h b/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h
deleted file mode 100644
index 3dc9bb4542dd..000000000000
--- a/drivers/misc/habanalabs/include/gaudi/asic_reg/psoc_pci_pll_regs.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2018 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-/************************************
- ** This is an auto-generated file **
- **       DO NOT EDIT BELOW        **
- ************************************/
-
-#ifndef ASIC_REG_PSOC_PCI_PLL_REGS_H_
-#define ASIC_REG_PSOC_PCI_PLL_REGS_H_
-
-/*
- *****************************************
- *   PSOC_PCI_PLL (Prototype: PLL)
- *****************************************
- */
-
-#define mmPSOC_PCI_PLL_NR                                            0xC72100
-
-#define mmPSOC_PCI_PLL_NF                                            0xC72104
-
-#define mmPSOC_PCI_PLL_OD                                            0xC72108
-
-#define mmPSOC_PCI_PLL_NB                                            0xC7210C
-
-#define mmPSOC_PCI_PLL_CFG                                           0xC72110
-
-#define mmPSOC_PCI_PLL_LOSE_MASK                                     0xC72120
-
-#define mmPSOC_PCI_PLL_LOCK_INTR                                     0xC72128
-
-#define mmPSOC_PCI_PLL_LOCK_BYPASS                                   0xC7212C
-
-#define mmPSOC_PCI_PLL_DATA_CHNG                                     0xC72130
-
-#define mmPSOC_PCI_PLL_RST                                           0xC72134
-
-#define mmPSOC_PCI_PLL_SLIP_WD_CNTR                                  0xC72150
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_0                                  0xC72200
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_1                                  0xC72204
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_2                                  0xC72208
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_3                                  0xC7220C
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_0                              0xC72220
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_1                              0xC72224
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_2                              0xC72228
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_CMD_3                              0xC7222C
-
-#define mmPSOC_PCI_PLL_DIV_SEL_0                                     0xC72280
-
-#define mmPSOC_PCI_PLL_DIV_SEL_1                                     0xC72284
-
-#define mmPSOC_PCI_PLL_DIV_SEL_2                                     0xC72288
-
-#define mmPSOC_PCI_PLL_DIV_SEL_3                                     0xC7228C
-
-#define mmPSOC_PCI_PLL_DIV_EN_0                                      0xC722A0
-
-#define mmPSOC_PCI_PLL_DIV_EN_1                                      0xC722A4
-
-#define mmPSOC_PCI_PLL_DIV_EN_2                                      0xC722A8
-
-#define mmPSOC_PCI_PLL_DIV_EN_3                                      0xC722AC
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_0                             0xC722C0
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_1                             0xC722C4
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_2                             0xC722C8
-
-#define mmPSOC_PCI_PLL_DIV_FACTOR_BUSY_3                             0xC722CC
-
-#define mmPSOC_PCI_PLL_CLK_GATER                                     0xC72300
-
-#define mmPSOC_PCI_PLL_CLK_RLX_0                                     0xC72310
-
-#define mmPSOC_PCI_PLL_CLK_RLX_1                                     0xC72314
-
-#define mmPSOC_PCI_PLL_CLK_RLX_2                                     0xC72318
-
-#define mmPSOC_PCI_PLL_CLK_RLX_3                                     0xC7231C
-
-#define mmPSOC_PCI_PLL_REF_CNTR_PERIOD                               0xC72400
-
-#define mmPSOC_PCI_PLL_REF_LOW_THRESHOLD                             0xC72410
-
-#define mmPSOC_PCI_PLL_REF_HIGH_THRESHOLD                            0xC72420
-
-#define mmPSOC_PCI_PLL_PLL_NOT_STABLE                                0xC72430
-
-#define mmPSOC_PCI_PLL_FREQ_CALC_EN                                  0xC72440
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_CFG                                0xC72500
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_0                                  0xC72510
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_1                                  0xC72514
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_2                                  0xC72518
-
-#define mmPSOC_PCI_PLL_RLX_BITMAP_3                                  0xC7251C
-
-#endif /* ASIC_REG_PSOC_PCI_PLL_REGS_H_ */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d9cc782aba21..96eea49f48bc 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -295,6 +295,7 @@ enum hl_device_status {
 #define HL_INFO_CLK_THROTTLE_REASON	13
 #define HL_INFO_SYNC_MANAGER		14
 #define HL_INFO_TOTAL_ENERGY		15
+#define HL_INFO_PLL_FREQUENCY		16
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -396,6 +397,12 @@ struct hl_info_energy {
 	__u64 total_energy_consumption;
 };
 
+#define HL_PLL_NUM_OUTPUTS 4
+
+struct hl_pll_frequency_info {
+	__u16 output[HL_PLL_NUM_OUTPUTS];
+};
+
 /**
  * struct hl_info_sync_manager - sync manager information
  * @first_available_sync_object: first available sob
@@ -465,6 +472,8 @@ struct hl_info_args {
 		 * resolution.
 		 */
 		__u32 period_ms;
+		/* PLL frequency retrieval */
+		__u32 pll_index;
 	};
 
 	__u32 pad;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] habanalabs: mmu map wrapper for sizes larger than a page
  2020-11-18 13:50 [PATCH] habanalabs: fetch pll frequency from firmware Oded Gabbay
@ 2020-11-18 13:50 ` Oded Gabbay
  2020-11-18 13:50 ` [PATCH] habanalabs: print CS type when it is stuck Oded Gabbay
  1 sibling, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2020-11-18 13:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

We introduce a new wrapper which allows us to mmu map any size
to any host va_range available. In addition we remove duplicated
code from various places in driver and using this new wrapper
instead.
This wrapper supports mapping only contiguous physical
memory blocks and will be used for mappings that are done to the
driver ASID.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_buffer.c   |  10 +-
 drivers/misc/habanalabs/common/habanalabs.h   |   7 +-
 drivers/misc/habanalabs/common/memory.c       |   6 +-
 drivers/misc/habanalabs/common/mmu.c          | 112 +++++++++++++++++-
 drivers/misc/habanalabs/gaudi/gaudi.c         |  60 ++--------
 drivers/misc/habanalabs/goya/goya.c           |  26 ++--
 6 files changed, 144 insertions(+), 77 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index 0c482358f350..2856bb3423ee 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -67,9 +67,9 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 	bus_addr = cb->bus_address;
 	offset = 0;
 	list_for_each_entry(va_block, &cb->va_block_list, node) {
-		rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
-				list_is_last(&va_block->node,
-						&cb->va_block_list));
+		rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
+				va_block->size, list_is_last(&va_block->node,
+							&cb->va_block_list));
 		if (rc) {
 			dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
 				va_block->start);
@@ -92,7 +92,7 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 	list_for_each_entry(va_block, &cb->va_block_list, node) {
 		if (offset <= 0)
 			break;
-		hl_mmu_unmap(ctx, va_block->start, va_block->size,
+		hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
 				offset <= va_block->size);
 		offset -= va_block->size;
 	}
@@ -119,7 +119,7 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
 	mutex_lock(&ctx->mmu_lock);
 
 	list_for_each_entry(va_block, &cb->va_block_list, node)
-		if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
+		if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
 				list_is_last(&va_block->node,
 						&cb->va_block_list)))
 			dev_warn_ratelimited(hdev->dev,
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 43aa8cbd8969..e1db8301ecbd 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2162,10 +2162,13 @@ int hl_mmu_init(struct hl_device *hdev);
 void hl_mmu_fini(struct hl_device *hdev);
 int hl_mmu_ctx_init(struct hl_ctx *ctx);
 void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		u32 page_size, bool flush_pte);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 		bool flush_pte);
+int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
+					u64 phys_addr, u32 size);
+int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
 void hl_mmu_swap_out(struct hl_ctx *ctx);
 void hl_mmu_swap_in(struct hl_ctx *ctx);
 int hl_mmu_if_set_funcs(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 351c9927151f..744275dd6410 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -843,7 +843,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
 		paddr = phys_pg_pack->pages[i];
 
-		rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+		rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
 				(i + 1) == phys_pg_pack->npages);
 		if (rc) {
 			dev_err(hdev->dev,
@@ -862,7 +862,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 err:
 	next_vaddr = vaddr;
 	for (i = 0 ; i < mapped_pg_cnt ; i++) {
-		if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+		if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
 					(i + 1) == mapped_pg_cnt))
 			dev_warn_ratelimited(hdev->dev,
 				"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
@@ -892,7 +892,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 	next_vaddr = vaddr;
 
 	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
-		if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+		if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
 				       (i + 1) == phys_pg_pack->npages))
 			dev_warn_ratelimited(hdev->dev,
 			"unmap failed for vaddr: 0x%llx\n", next_vaddr);
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
index 7279c83cc081..33ae953d3a36 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -122,7 +122,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 }
 
 /*
- * hl_mmu_unmap - unmaps a virtual addr
+ * hl_mmu_unmap_page - unmaps a virtual addr
  *
  * @ctx: pointer to the context structure
  * @virt_addr: virt addr to map from
@@ -142,7 +142,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
  * For optimization reasons PCI flush may be requested once after unmapping of
  * large area.
  */
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 		bool flush_pte)
 {
 	struct hl_device *hdev = ctx->hdev;
@@ -200,7 +200,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 }
 
 /*
- * hl_mmu_map - maps a virtual addr to physical addr
+ * hl_mmu_map_page - maps a virtual addr to physical addr
  *
  * @ctx: pointer to the context structure
  * @virt_addr: virt addr to map from
@@ -221,8 +221,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
  * For optimization reasons PCI flush may be requested once after mapping of
  * large area.
  */
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
-		bool flush_pte)
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+		u32 page_size, bool flush_pte)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -302,6 +302,108 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
 	return rc;
 }
 
+/*
+ * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page
+ *                         for mapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @size: size to map
+ *
+ */
+int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
+					u64 phys_addr, u32 size)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u64 curr_va, curr_pa;
+	u32 page_size;
+	bool flush_pte;
+	int rc = 0, off;
+
+	if (hl_mem_area_inside_range(virt_addr, size,
+			prop->dmmu.start_addr, prop->dmmu.end_addr))
+		page_size = prop->dmmu.page_size;
+	else if (hl_mem_area_inside_range(virt_addr, size,
+			prop->pmmu.start_addr, prop->pmmu.end_addr))
+		page_size = prop->pmmu.page_size;
+	else if (hl_mem_area_inside_range(virt_addr, size,
+			prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+		page_size = prop->pmmu_huge.page_size;
+	else
+		return -EINVAL;
+
+	for (off = 0 ; off < size ; off += page_size) {
+		curr_va = virt_addr + off;
+		curr_pa = phys_addr + off;
+		flush_pte = (off + page_size) >= size;
+		rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size,
+								flush_pte);
+		if (rc) {
+			dev_err(hdev->dev,
+				"Map failed for va 0x%llx to pa 0x%llx\n",
+				curr_va, curr_pa);
+			goto unmap;
+		}
+	}
+
+	return rc;
+
+unmap:
+	for (; off >= 0 ; off -= page_size) {
+		curr_va = virt_addr + off;
+		flush_pte = (off - (s32) page_size) < 0;
+		if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte))
+			dev_warn_ratelimited(hdev->dev,
+				"failed to unmap va 0x%llx\n", curr_va);
+	}
+
+	return rc;
+}
+
+/*
+ * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page
+ *                           for unmapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to unmap
+ * @size: size to unmap
+ *
+ */
+int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u64 curr_va;
+	u32 page_size;
+	bool flush_pte;
+	int rc = 0, off;
+
+	if (hl_mem_area_inside_range(virt_addr, size,
+			prop->dmmu.start_addr, prop->dmmu.end_addr))
+		page_size = prop->dmmu.page_size;
+	else if (hl_mem_area_inside_range(virt_addr, size,
+			prop->pmmu.start_addr, prop->pmmu.end_addr))
+		page_size = prop->pmmu.page_size;
+	else if (hl_mem_area_inside_range(virt_addr, size,
+			prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+		page_size = prop->pmmu_huge.page_size;
+	else
+		return -EINVAL;
+
+	for (off = 0 ; off < size ; off += page_size) {
+		curr_va = virt_addr + off;
+		flush_pte = (off + page_size) >= size;
+		rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte);
+		if (rc)
+			dev_warn_ratelimited(hdev->dev,
+				"Unmap failed for va 0x%llx\n", curr_va);
+	}
+
+	return rc;
+}
+
 /*
  * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
  *
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9f54baeef754..c44a883d9787 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7766,9 +7766,6 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
 		struct hl_ctx *ctx)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
-	bool flush_pte;
-	u64 va, pa;
-	s64 off;
 	int min_alloc_order, rc, collective_cb_size;
 
 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
@@ -7813,48 +7810,23 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
 		goto destroy_internal_cb_pool;
 
 	mutex_lock(&ctx->mmu_lock);
-
-	/* The mapping is done page by page since we can't assure allocated ptr
-	 * is aligned to HOST_SPACE_INTERNAL_CB_SZ
-	 */
-	for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
-		va = hdev->internal_cb_va_base + off;
-		pa = hdev->internal_cb_pool_dma_addr + off;
-		flush_pte = (off + PAGE_SIZE_4KB) >= HOST_SPACE_INTERNAL_CB_SZ;
-		rc = hl_mmu_map(ctx, va, pa, PAGE_SIZE_4KB, flush_pte);
-		if (rc) {
-			dev_err(hdev->dev,
-				"Map failed for va 0x%llx to pa 0x%llx\n",
-				va, pa);
-			goto unmap;
-		}
-	}
+	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
+			hdev->internal_cb_pool_dma_addr,
+			HOST_SPACE_INTERNAL_CB_SZ);
 
 	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
-
 	mutex_unlock(&ctx->mmu_lock);
 
-	return 0;
+	if (rc)
+		goto unreserve_internal_cb_pool;
 
-unmap:
-	for (; off >= 0 ; off -= PAGE_SIZE_4KB) {
-		va = hdev->internal_cb_va_base + off;
-		flush_pte = (off - (s32) PAGE_SIZE_4KB) < 0;
-		if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
-			dev_warn_ratelimited(hdev->dev,
-					"failed to unmap va 0x%llx\n", va);
-	}
+	return 0;
 
+unreserve_internal_cb_pool:
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
 			HOST_SPACE_INTERNAL_CB_SZ);
-
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
-
-	mutex_unlock(&ctx->mmu_lock);
-
 destroy_internal_cb_pool:
 	gen_pool_destroy(hdev->internal_cb_pool);
-
 free_internal_cb_pool:
 	hdev->asic_funcs->asic_dma_free_coherent(hdev,
 			HOST_SPACE_INTERNAL_CB_SZ,
@@ -7868,30 +7840,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 		struct hl_ctx *ctx)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
-	bool flush_pte = false;
-	u64 va, off;
 
 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
 		return;
 
 	mutex_lock(&ctx->mmu_lock);
-
-	for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
-		va = hdev->internal_cb_va_base + off;
-
-		if (off + PAGE_SIZE_4KB >= HOST_SPACE_INTERNAL_CB_SZ)
-			flush_pte = true;
-
-		if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
-			dev_warn_ratelimited(hdev->dev,
-					"failed to unmap va 0x%llx\n", va);
-	}
-
+	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
+			HOST_SPACE_INTERNAL_CB_SZ);
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
 			HOST_SPACE_INTERNAL_CB_SZ);
-
 	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
-
 	mutex_unlock(&ctx->mmu_lock);
 
 	gen_pool_destroy(hdev->internal_cb_pool);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 55d174d3cac8..342227b93778 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4906,9 +4906,10 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 		return 0;
 
 	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
-		rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
-				prop->dram_base_address + off, PAGE_SIZE_2MB,
-				(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
+		rc = hl_mmu_map_page(hdev->kernel_ctx,
+			prop->dram_base_address + off,
+			prop->dram_base_address + off, PAGE_SIZE_2MB,
+			(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
 		if (rc) {
 			dev_err(hdev->dev, "Map failed for address 0x%llx\n",
 				prop->dram_base_address + off);
@@ -4917,8 +4918,10 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 	}
 
 	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
-		rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
-			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
+		rc = hl_mmu_map_page(hdev->kernel_ctx,
+			VA_CPU_ACCESSIBLE_MEM_ADDR,
+			hdev->cpu_accessible_dma_address,
+			PAGE_SIZE_2MB, true);
 
 		if (rc) {
 			dev_err(hdev->dev,
@@ -4928,7 +4931,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 		}
 	} else {
 		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
-			rc = hl_mmu_map(hdev->kernel_ctx,
+			rc = hl_mmu_map_page(hdev->kernel_ctx,
 				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
 				hdev->cpu_accessible_dma_address + cpu_off,
 				PAGE_SIZE_4KB, true);
@@ -4955,7 +4958,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 
 unmap_cpu:
 	for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
-		if (hl_mmu_unmap(hdev->kernel_ctx,
+		if (hl_mmu_unmap_page(hdev->kernel_ctx,
 				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
 				PAGE_SIZE_4KB, true))
 			dev_warn_ratelimited(hdev->dev,
@@ -4963,7 +4966,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
 unmap:
 	for (; off >= 0 ; off -= PAGE_SIZE_2MB)
-		if (hl_mmu_unmap(hdev->kernel_ctx,
+		if (hl_mmu_unmap_page(hdev->kernel_ctx,
 				prop->dram_base_address + off, PAGE_SIZE_2MB,
 				true))
 			dev_warn_ratelimited(hdev->dev,
@@ -4989,13 +4992,14 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
 	WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
 
 	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
-		if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+		if (hl_mmu_unmap_page(hdev->kernel_ctx,
+				VA_CPU_ACCESSIBLE_MEM_ADDR,
 				PAGE_SIZE_2MB, true))
 			dev_warn(hdev->dev,
 				"Failed to unmap CPU accessible memory\n");
 	} else {
 		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
-			if (hl_mmu_unmap(hdev->kernel_ctx,
+			if (hl_mmu_unmap_page(hdev->kernel_ctx,
 					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
 					PAGE_SIZE_4KB,
 					(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
@@ -5005,7 +5009,7 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
 	}
 
 	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
-		if (hl_mmu_unmap(hdev->kernel_ctx,
+		if (hl_mmu_unmap_page(hdev->kernel_ctx,
 				prop->dram_base_address + off, PAGE_SIZE_2MB,
 				(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
 			dev_warn_ratelimited(hdev->dev,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] habanalabs: print CS type when it is stuck
  2020-11-18 13:50 [PATCH] habanalabs: fetch pll frequency from firmware Oded Gabbay
  2020-11-18 13:50 ` [PATCH] habanalabs: mmu map wrapper for sizes larger than a page Oded Gabbay
@ 2020-11-18 13:50 ` Oded Gabbay
  2020-11-18 19:22   ` kernel test robot
  1 sibling, 1 reply; 4+ messages in thread
From: Oded Gabbay @ 2020-11-18 13:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: SW_Drivers

We have several types of command submissions and the user wants to know
which type of command submission has not finished in time when that
event occurs. This is very helpful for debug.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 28 +++++++++++++++++--
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 516cbaacc03e..6ca77abf7f8a 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -418,9 +418,31 @@ static void cs_timedout(struct work_struct *work)
 
 	hdev = cs->ctx->hdev;
 
-	dev_err(hdev->dev,
-		"Command submission %llu has not finished in time!\n",
-		cs->sequence);
+	switch (cs->type) {
+	case CS_TYPE_SIGNAL:
+		dev_err(hdev->dev,
+			"Signal command submission %llu has not finished in time!\n",
+			cs->sequence);
+		break;
+
+	case CS_TYPE_WAIT:
+		dev_err(hdev->dev,
+			"Wait command submission %llu has not finished in time!\n",
+			cs->sequence);
+		break;
+
+	case CS_TYPE_COLLECTIVE_WAIT:
+		dev_err(hdev->dev,
+			"Collective Wait command submission %llu has not finished in time!\n",
+			cs->sequence);
+		break;
+
+	default:
+		dev_err(hdev->dev,
+			"Command submission %llu has not finished in time!\n",
+			cs->sequence);
+		break;
+	}
 
 	cs_put(cs);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] habanalabs: print CS type when it is stuck
  2020-11-18 13:50 ` [PATCH] habanalabs: print CS type when it is stuck Oded Gabbay
@ 2020-11-18 19:22   ` kernel test robot
  0 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2020-11-18 19:22 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel; +Cc: kbuild-all, clang-built-linux, SW_Drivers

[-- Attachment #1: Type: text/plain, Size: 3516 bytes --]

Hi Oded,

I love your patch! Yet something to improve:

[auto build test ERROR on char-misc/char-misc-testing]
[also build test ERROR on linux/master linus/master v5.10-rc4 next-20201118]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Oded-Gabbay/habanalabs-print-CS-type-when-it-is-stuck/20201118-215554
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 93c69b2d17372463ae33b79b3002c22a208945b3
config: x86_64-randconfig-a013-20201118 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project b2613fb2f0f53691dd0211895afbb9413457fca7)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/71112c9df0288d0689aa5c7fbdd396f2b099daa4
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Oded-Gabbay/habanalabs-print-CS-type-when-it-is-stuck/20201118-215554
        git checkout 71112c9df0288d0689aa5c7fbdd396f2b099daa4
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> drivers/misc/habanalabs/common/command_submission.c:400:7: error: use of undeclared identifier 'CS_TYPE_COLLECTIVE_WAIT'
           case CS_TYPE_COLLECTIVE_WAIT:
                ^
   1 error generated.

vim +/CS_TYPE_COLLECTIVE_WAIT +400 drivers/misc/habanalabs/common/command_submission.c

   366	
   367	static void cs_timedout(struct work_struct *work)
   368	{
   369		struct hl_device *hdev;
   370		int rc;
   371		struct hl_cs *cs = container_of(work, struct hl_cs,
   372							 work_tdr.work);
   373		rc = cs_get_unless_zero(cs);
   374		if (!rc)
   375			return;
   376	
   377		if ((!cs->submitted) || (cs->completed)) {
   378			cs_put(cs);
   379			return;
   380		}
   381	
   382		/* Mark the CS is timed out so we won't try to cancel its TDR */
   383		cs->timedout = true;
   384	
   385		hdev = cs->ctx->hdev;
   386	
   387		switch (cs->type) {
   388		case CS_TYPE_SIGNAL:
   389			dev_err(hdev->dev,
   390				"Signal command submission %llu has not finished in time!\n",
   391				cs->sequence);
   392			break;
   393	
   394		case CS_TYPE_WAIT:
   395			dev_err(hdev->dev,
   396				"Wait command submission %llu has not finished in time!\n",
   397				cs->sequence);
   398			break;
   399	
 > 400		case CS_TYPE_COLLECTIVE_WAIT:
   401			dev_err(hdev->dev,
   402				"Collective Wait command submission %llu has not finished in time!\n",
   403				cs->sequence);
   404			break;
   405	
   406		default:
   407			dev_err(hdev->dev,
   408				"Command submission %llu has not finished in time!\n",
   409				cs->sequence);
   410			break;
   411		}
   412	
   413		cs_put(cs);
   414	
   415		if (hdev->reset_on_lockup)
   416			hl_device_reset(hdev, false, false);
   417	}
   418	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 45587 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-11-18 19:23 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-18 13:50 [PATCH] habanalabs: fetch pll frequency from firmware Oded Gabbay
2020-11-18 13:50 ` [PATCH] habanalabs: mmu map wrapper for sizes larger than a page Oded Gabbay
2020-11-18 13:50 ` [PATCH] habanalabs: print CS type when it is stuck Oded Gabbay
2020-11-18 19:22   ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).