linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware
@ 2020-03-31 14:55 Oded Gabbay
  2020-03-31 14:56 ` [PATCH 2/2] habanalabs: handle barriers in DMA QMAN streams Oded Gabbay
  2020-03-31 18:29 ` [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware Tomer Tayar
  0 siblings, 2 replies; 4+ messages in thread
From: Oded Gabbay @ 2020-03-31 14:55 UTC (permalink / raw)
  To: linux-kernel, oshpigelman, ttayar; +Cc: gregkh

Retrieve from the firmware the DMA mask value we need to set according to
the device's PCI controller configuration. This is needed when working on
POWER9 machines, as the device's PCI controller is configured in a
different way in those machines.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c          | 19 +++++-
 drivers/misc/habanalabs/habanalabs.h         |  9 ++-
 drivers/misc/habanalabs/include/hl_boot_if.h |  1 +
 drivers/misc/habanalabs/pci.c                | 63 +++++++++-----------
 4 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index a0a96ca31757..85f29cb7d67b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -531,7 +531,7 @@ static int goya_early_init(struct hl_device *hdev)
 
 	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
 
-	rc = hl_pci_init(hdev, 48);
+	rc = hl_pci_init(hdev);
 	if (rc)
 		return rc;
 
@@ -5185,6 +5185,20 @@ u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
 	return cq_idx;
 }
 
+static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
+{
+	if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
+							HL_POWER9_HOST_MAGIC) {
+		dev_dbg(hdev->dev, "Working in 64-bit DMA mode\n");
+		hdev->power9_64bit_dma_enable = 1;
+		hdev->dma_mask = 64;
+	} else {
+		dev_dbg(hdev->dev, "Working in 48-bit DMA mode\n");
+		hdev->power9_64bit_dma_enable = 0;
+		hdev->dma_mask = 48;
+	}
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5247,7 +5261,8 @@ static const struct hl_asic_funcs goya_funcs = {
 	.get_clk_rate = goya_get_clk_rate,
 	.get_queue_id_for_cq = goya_get_queue_id_for_cq,
 	.read_device_fw_version = goya_read_device_fw_version,
-	.load_firmware_to_device = goya_load_firmware_to_device
+	.load_firmware_to_device = goya_load_firmware_to_device,
+	.set_dma_mask_from_fw = goya_set_dma_mask_from_fw
 };
 
 /*
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 6c54d0ba0a1d..29b9767387af 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -552,6 +552,8 @@ enum hl_pll_frequency {
  * @read_device_fw_version: read the device's firmware versions that are
  *                          contained in registers
  * @load_firmware_to_device: load the firmware to the device's memory
+ * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
+ *                        firmware configuration
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -642,6 +644,7 @@ struct hl_asic_funcs {
 	void (*read_device_fw_version)(struct hl_device *hdev,
 					enum hl_fw_component fwc);
 	int (*load_firmware_to_device)(struct hl_device *hdev);
+	void (*set_dma_mask_from_fw)(struct hl_device *hdev);
 };
 
 
@@ -1321,6 +1324,8 @@ struct hl_device_idle_busy_ts {
  * @dma_mask: the dma mask that was set for this device
  * @in_debug: is device under debug. This, together with fpriv_list, enforces
  *            that only a single user is configuring the debug infrastructure.
+ * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
+ *                           only to POWER9 machines.
  * @cdev_sysfs_created: were char devices and sysfs nodes created.
  * @stop_on_err: true if engines should stop on error.
  */
@@ -1402,6 +1407,7 @@ struct hl_device {
 	u8				device_cpu_disabled;
 	u8				dma_mask;
 	u8				in_debug;
+	u8                              power9_64bit_dma_enable;
 	u8				cdev_sysfs_created;
 	u8				stop_on_err;
 
@@ -1632,9 +1638,8 @@ int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
 int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 			u64 dram_base_address, u64 host_phys_base_address,
 			u64 host_phys_size);
-int hl_pci_init(struct hl_device *hdev, u8 dma_mask);
+int hl_pci_init(struct hl_device *hdev);
 void hl_pci_fini(struct hl_device *hdev);
-int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
 
 long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
 void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h
index 660550604362..7106315fc92e 100644
--- a/drivers/misc/habanalabs/include/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/hl_boot_if.h
@@ -9,6 +9,7 @@
 #define HL_BOOT_IF_H
 
 #define LKD_HARD_RESET_MAGIC		0xED7BD694
+#define HL_POWER9_HOST_MAGIC		0x1DA30009
 
 /*
  * CPU error bits in BOOT_ERROR registers
diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c
index c98d88c7a5c6..0aef4af9f5ec 100644
--- a/drivers/misc/habanalabs/pci.c
+++ b/drivers/misc/habanalabs/pci.c
@@ -267,6 +267,12 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 	/* Enable + Bar match + match enable */
 	rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000);
 
+	/* Return the DBI window to the default location */
+	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
+	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+
+	hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+
 	/* Point to DRAM */
 	if (!hdev->asic_funcs->set_dram_bar_base)
 		return -EINVAL;
@@ -274,7 +280,6 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 								U64_MAX)
 		return -EIO;
 
-
 	/* Outbound Region 0 - Point to Host */
 	host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
 	rc |= hl_pci_iatu_write(hdev, 0x008,
@@ -283,7 +288,12 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
 				upper_32_bits(host_phys_base_address));
 	rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
 	rc |= hl_pci_iatu_write(hdev, 0x014, 0);
-	rc |= hl_pci_iatu_write(hdev, 0x018, 0);
+
+	if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
+		rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
+	else
+		rc |= hl_pci_iatu_write(hdev, 0x018, 0);
+
 	rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr));
 	/* Increase region size */
 	rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
@@ -310,41 +320,25 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
  *
  * Return: 0 on success, non-zero for failure.
  */
-int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask)
+int hl_pci_set_dma_mask(struct hl_device *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
 	int rc;
 
 	/* set DMA mask */
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
 	if (rc) {
-		dev_warn(hdev->dev,
+		dev_err(hdev->dev,
 			"Failed to set pci dma mask to %d bits, error %d\n",
-			dma_mask, rc);
-
-		dma_mask = hdev->dma_mask;
-
-		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
-		if (rc) {
-			dev_err(hdev->dev,
-				"Failed to set pci dma mask to %d bits, error %d\n",
-				dma_mask, rc);
-			return rc;
-		}
+			hdev->dma_mask, rc);
+		return rc;
 	}
 
-	/*
-	 * We managed to set the dma mask, so update the dma mask field. If
-	 * the set to the coherent mask will fail with that mask, we will
-	 * fail the entire function
-	 */
-	hdev->dma_mask = dma_mask;
-
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
 	if (rc) {
 		dev_err(hdev->dev,
 			"Failed to set pci consistent dma mask to %d bits, error %d\n",
-			dma_mask, rc);
+			hdev->dma_mask, rc);
 		return rc;
 	}
 
@@ -354,21 +348,16 @@ int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask)
 /**
  * hl_pci_init() - PCI initialization code.
  * @hdev: Pointer to hl_device structure.
- * @dma_mask: number of bits for the requested dma mask.
  *
  * Set DMA masks, initialize the PCI controller and map the PCI BARs.
  *
  * Return: 0 on success, non-zero for failure.
  */
-int hl_pci_init(struct hl_device *hdev, u8 dma_mask)
+int hl_pci_init(struct hl_device *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
 	int rc;
 
-	rc = hl_pci_set_dma_mask(hdev, dma_mask);
-	if (rc)
-		return rc;
-
 	if (hdev->reset_pcilink)
 		hl_pci_reset_link_through_bridge(hdev);
 
@@ -380,18 +369,22 @@ int hl_pci_init(struct hl_device *hdev, u8 dma_mask)
 
 	pci_set_master(pdev);
 
-	rc = hdev->asic_funcs->init_iatu(hdev);
+	rc = hdev->asic_funcs->pci_bars_map(hdev);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to initialize iATU\n");
+		dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
 		goto disable_device;
 	}
 
-	rc = hdev->asic_funcs->pci_bars_map(hdev);
+	rc = hdev->asic_funcs->init_iatu(hdev);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
+		dev_err(hdev->dev, "Failed to initialize iATU\n");
 		goto disable_device;
 	}
 
+	rc = hl_pci_set_dma_mask(hdev);
+	if (rc)
+		goto disable_device;
+
 	return 0;
 
 disable_device:
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] habanalabs: handle barriers in DMA QMAN streams
  2020-03-31 14:55 [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware Oded Gabbay
@ 2020-03-31 14:56 ` Oded Gabbay
  2020-03-31 18:29   ` Tomer Tayar
  2020-03-31 18:29 ` [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware Tomer Tayar
  1 sibling, 1 reply; 4+ messages in thread
From: Oded Gabbay @ 2020-03-31 14:56 UTC (permalink / raw)
  To: linux-kernel, oshpigelman, ttayar; +Cc: gregkh

When we have DMA QMAN with multiple streams, we need to know whether the
command buffer contains at least one DMA packet in order to configure the
barriers correctly when adding the 2xMSG_PROT at the end of the JOB. If
there is no DMA packet, then there is no need to put engine barrier. This
is relevant only for GAUDI as GOYA doesn't have streams so the engine can't
be busy by another stream.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/command_submission.c |  1 +
 drivers/misc/habanalabs/goya/goya.c          |  3 ++-
 drivers/misc/habanalabs/goya/goyaP.h         |  3 ++-
 drivers/misc/habanalabs/habanalabs.h         | 17 ++++++++++++++++-
 drivers/misc/habanalabs/hw_queue.c           |  3 ++-
 5 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 409276b6374d..6680e183d881 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -113,6 +113,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 		if (!rc) {
 			job->patched_cb = parser.patched_cb;
 			job->job_cb_size = parser.patched_cb_size;
+			job->contains_dma_pkt = parser.contains_dma_pkt;
 
 			spin_lock(&job->patched_cb->lock);
 			job->patched_cb->cs_cnt++;
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 85f29cb7d67b..19c3bdf4c358 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -3903,7 +3903,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
 }
 
 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
-				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
+				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
+				bool eb)
 {
 	struct packet_msg_prot *cq_pkt;
 	u32 tmp;
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index a05250e53175..86857cdd36b1 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -216,7 +216,8 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
 
 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
-				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
+				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
+				bool eb);
 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser);
 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
 				dma_addr_t *dma_handle,	u16 *queue_len);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 29b9767387af..8db955485609 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -598,7 +598,8 @@ struct hl_asic_funcs {
 					struct sg_table *sgt);
 	void (*add_end_of_cb_packets)(struct hl_device *hdev,
 					u64 kernel_address, u32 len,
-					u64 cq_addr, u32 cq_val, u32 msix_num);
+					u64 cq_addr, u32 cq_val, u32 msix_num,
+					bool eb);
 	void (*update_eq_ci)(struct hl_device *hdev, u32 val);
 	int (*context_switch)(struct hl_device *hdev, u32 asid);
 	void (*restore_phase_topology)(struct hl_device *hdev);
@@ -824,6 +825,12 @@ struct hl_cs {
  * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
  *                          handle to a kernel-allocated CB object, false
  *                          otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ *                    info is needed later, when adding the 2xMSG_PROT at the
+ *                    end of the JOB, to know which barriers to put in the
+ *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ *                    have streams so the engine can't be busy by another
+ *                    stream.
  */
 struct hl_cs_job {
 	struct list_head	cs_node;
@@ -839,6 +846,7 @@ struct hl_cs_job {
 	u32			user_cb_size;
 	u32			job_cb_size;
 	u8			is_kernel_allocated_cb;
+	u8			contains_dma_pkt;
 };
 
 /**
@@ -858,6 +866,12 @@ struct hl_cs_job {
  * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
  *                          handle to a kernel-allocated CB object, false
  *                          otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ *                    info is needed later, when adding the 2xMSG_PROT at the
+ *                    end of the JOB, to know which barriers to put in the
+ *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ *                    have streams so the engine can't be busy by another
+ *                    stream.
  */
 struct hl_cs_parser {
 	struct hl_cb		*user_cb;
@@ -871,6 +885,7 @@ struct hl_cs_parser {
 	u32			patched_cb_size;
 	u8			job_id;
 	u8			is_kernel_allocated_cb;
+	u8			contains_dma_pkt;
 };
 
 
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 8248adcc7ef8..a5abc224399d 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -314,7 +314,8 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
 	hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
 						cq_addr,
 						le32_to_cpu(cq_pkt.data),
-						q->msi_vec);
+						q->msi_vec,
+						job->contains_dma_pkt);
 
 	q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware
  2020-03-31 14:55 [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware Oded Gabbay
  2020-03-31 14:56 ` [PATCH 2/2] habanalabs: handle barriers in DMA QMAN streams Oded Gabbay
@ 2020-03-31 18:29 ` Tomer Tayar
  1 sibling, 0 replies; 4+ messages in thread
From: Tomer Tayar @ 2020-03-31 18:29 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, Omer Shpigelman; +Cc: gregkh

On Sat, Mar 31, 2020 at 17:56, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> Retrieve from the firmware the DMA mask value we need to set according to
> the device's PCI controller configuration. This is needed when working on
> POWER9 machines, as the device's PCI controller is configured in a
> different way in those machines.
> 
> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

Reviewed-by: Tomer Tayar <ttayar@habana.ai>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH 2/2] habanalabs: handle barriers in DMA QMAN streams
  2020-03-31 14:56 ` [PATCH 2/2] habanalabs: handle barriers in DMA QMAN streams Oded Gabbay
@ 2020-03-31 18:29   ` Tomer Tayar
  0 siblings, 0 replies; 4+ messages in thread
From: Tomer Tayar @ 2020-03-31 18:29 UTC (permalink / raw)
  To: Oded Gabbay, linux-kernel, Omer Shpigelman; +Cc: gregkh

On Sat, Mar 31, 2020 at 17:56, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> When we have DMA QMAN with multiple streams, we need to know
> whether the
> command buffer contains at least one DMA packet in order to configure the
> barriers correctly when adding the 2xMSG_PROT at the end of the JOB. If
> there is no DMA packet, then there is no need to put engine barrier. This
> is relevant only for GAUDI as GOYA doesn't have streams so the engine can't
> be busy by another stream.
> 
> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>

Reviewed-by: Tomer Tayar <ttayar@habana.ai>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-03-31 18:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-31 14:55 [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware Oded Gabbay
2020-03-31 14:56 ` [PATCH 2/2] habanalabs: handle barriers in DMA QMAN streams Oded Gabbay
2020-03-31 18:29   ` Tomer Tayar
2020-03-31 18:29 ` [PATCH 1/2] habanalabs: retrieve DMA mask indication from firmware Tomer Tayar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).