All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Oded Gabbay <oded.gabbay@gmail.com>,
	Ben Segal <bpsegal20@gmail.com>, Sasha Levin <sashal@kernel.org>
Subject: [PATCH AUTOSEL 5.2 33/44] habanalabs: fix endianness handling for internal QMAN submission
Date: Tue, 20 Aug 2019 09:40:17 -0400	[thread overview]
Message-ID: <20190820134028.10829-33-sashal@kernel.org> (raw)
In-Reply-To: <20190820134028.10829-1-sashal@kernel.org>

From: Oded Gabbay <oded.gabbay@gmail.com>

[ Upstream commit b9040c99414ba5b85090595a61abc686a5dbb388 ]

The PQs of internal H/W queues (QMANs) can be located in different memory
areas for different ASICs. Therefore, when writing PQEs, we need to use
the correct function according to the location of the PQ. e.g. if the PQ
is located in the device's memory (SRAM or DRAM), we need to use
memcpy_toio() so it would work in architectures that have separate
address ranges for IO memory.

This patch makes the code that writes the PQE to be ASIC-specific so we
can handle this properly per ASIC.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Tested-by: Ben Segal <bpsegal20@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/misc/habanalabs/goya/goya.c  |  7 ++++---
 drivers/misc/habanalabs/goya/goyaP.h |  2 +-
 drivers/misc/habanalabs/habanalabs.h |  9 +++++++--
 drivers/misc/habanalabs/hw_queue.c   | 14 +++++---------
 4 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 0644fd7742057..9216cc3599178 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2716,9 +2716,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 				GOYA_ASYNC_EVENT_ID_PI_UPDATE);
 }
 
-void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
+void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
 {
-	/* Not needed in Goya */
+	/* The QMANs are on the SRAM so need to copy to IO space */
+	memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
 }
 
 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
@@ -4784,7 +4785,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.resume = goya_resume,
 	.cb_mmap = goya_cb_mmap,
 	.ring_doorbell = goya_ring_doorbell,
-	.flush_pq_write = goya_flush_pq_write,
+	.pqe_write = goya_pqe_write,
 	.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
 	.asic_dma_free_coherent = goya_dma_free_coherent,
 	.get_int_queue_base = goya_get_int_queue_base,
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index c83cab0d641e2..e2040fd331ca1 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -170,7 +170,7 @@ int goya_late_init(struct hl_device *hdev);
 void goya_late_fini(struct hl_device *hdev);
 
 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
-void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val);
+void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd);
 void goya_update_eq_ci(struct hl_device *hdev, u32 val);
 void goya_restore_phase_topology(struct hl_device *hdev);
 int goya_context_switch(struct hl_device *hdev, u32 asid);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index adef7d9d7488a..d56ab65d5b2a4 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -449,7 +449,11 @@ enum hl_pll_frequency {
  * @resume: handles IP specific H/W or SW changes for resume.
  * @cb_mmap: maps a CB.
  * @ring_doorbell: increment PI on a given QMAN.
- * @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
+ * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
+ *             function because the PQs are located in different memory areas
+ *             per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
+ *             writing the PQE must match the destination memory area
+ *             properties.
  * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
  *                           dma_alloc_coherent(). This is ASIC function because
  *                           its implementation is not trivial when the driver
@@ -518,7 +522,8 @@ struct hl_asic_funcs {
 	int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
 			u64 kaddress, phys_addr_t paddress, u32 size);
 	void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
-	void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
+	void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
+			struct hl_bd *bd);
 	void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flag);
 	void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 2894d89759334..bb76794747279 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -290,23 +290,19 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job)
 	struct hl_device *hdev = job->cs->ctx->hdev;
 	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
 	struct hl_bd bd;
-	u64 *pi, *pbd = (u64 *) &bd;
+	__le64 *pi;
 
 	bd.ctl = 0;
-	bd.len = __cpu_to_le32(job->job_cb_size);
-	bd.ptr = __cpu_to_le64((u64) (uintptr_t) job->user_cb);
+	bd.len = cpu_to_le32(job->job_cb_size);
+	bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
 
-	pi = (u64 *) (uintptr_t) (q->kernel_address +
+	pi = (__le64 *) (uintptr_t) (q->kernel_address +
 		((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
 
-	pi[0] = pbd[0];
-	pi[1] = pbd[1];
-
 	q->pi++;
 	q->pi &= ((q->int_queue_len << 1) - 1);
 
-	/* Flush PQ entry write. Relevant only for specific ASICs */
-	hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]);
+	hdev->asic_funcs->pqe_write(hdev, pi, &bd);
 
 	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
 }
-- 
2.20.1


  parent reply	other threads:[~2019-08-20 13:41 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-20 13:39 [PATCH AUTOSEL 5.2 01/44] dmaengine: ste_dma40: fix unneeded variable warning Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 02/44] nvme-multipath: revalidate nvme_ns_head gendisk in nvme_validate_ns Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 03/44] afs: Fix the CB.ProbeUuid service handler to reply correctly Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 04/44] afs: Fix loop index mixup in afs_deliver_vl_get_entry_by_name_u() Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 05/44] fs: afs: Fix a possible null-pointer dereference in afs_put_read() Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 06/44] afs: Fix off-by-one in afs_rename() expected data version calculation Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 07/44] afs: Only update d_fsdata if different in afs_d_revalidate() Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 08/44] afs: Fix missing dentry data version updating Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 09/44] intel_th: Use the correct style for SPDX License Identifier Sasha Levin
2019-08-20 14:27   ` Greg Kroah-Hartman
2019-08-20 20:03     ` Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 10/44] nvmet: Fix use-after-free bug when a port is removed Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 11/44] nvmet-loop: Flush nvme_delete_wq when removing the port Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 12/44] nvmet-file: fix nvmet_file_flush() always returning an error Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 13/44] nvme-core: Fix extra device_put() call on error path Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 14/44] nvme: fix a possible deadlock when passthru commands sent to a multipath device Sasha Levin
2019-08-20 13:39 ` [PATCH AUTOSEL 5.2 15/44] nvme-rdma: fix possible use-after-free in connect error flow Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 16/44] nvme: fix controller removal race with scan work Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 17/44] nvme-pci: Fix async probe remove race Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 18/44] soundwire: cadence_master: fix register definition for SLAVE_STATE Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 19/44] soundwire: cadence_master: fix definitions for INTSTAT0/1 Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 20/44] iio: adc: max9611: Fix temperature reading in probe Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 21/44] auxdisplay: panel: need to delete scan_timer when misc_register fails in panel_attach Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 22/44] btrfs: trim: Check the range passed into to prevent overflow Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 23/44] IB/mlx5: Fix implicit MR release flow Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 24/44] dmaengine: stm32-mdma: Fix a possible null-pointer dereference in stm32_mdma_irq_handler() Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 25/44] omap-dma/omap_vout_vrfb: fix off-by-one fi value Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 26/44] iommu/dma: Handle SG length overflow better Sasha Levin
2019-08-20 13:40   ` Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 27/44] dma-direct: don't truncate dma_required_mask to bus addressing capabilities Sasha Levin
2019-08-20 13:40   ` Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 28/44] usb: gadget: composite: Clear "suspended" on reset/disconnect Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 29/44] usb: gadget: mass_storage: Fix races between fsg_disable and fsg_set_alt Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 30/44] habanalabs: fix DRAM usage accounting on context tear down Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 31/44] habanalabs: fix endianness handling for packets from user Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 32/44] habanalabs: fix completion queue handling when host is BE Sasha Levin
2019-08-20 13:40 ` Sasha Levin [this message]
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 34/44] habanalabs: fix device IRQ unmasking for BE host Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 35/44] xen/blkback: fix memory leaks Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 36/44] arm64: cpufeature: Don't treat granule sizes as strict Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 37/44] riscv: fix flush_tlb_range() end address for flush_tlb_page() Sasha Levin
2019-08-20 13:40   ` Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 38/44] i2c: rcar: avoid race when unregistering slave client Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 39/44] i2c: emev2: " Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 40/44] drm/scheduler: use job count instead of peek Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 41/44] drm/ast: Fixed reboot test may cause system hanged Sasha Levin
2019-08-20 13:40   ` Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 42/44] usb: host: fotg2: restart hcd after port reset Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 43/44] tools: hv: fixed Python pep8/flake8 warnings for lsvmbus Sasha Levin
2019-08-20 13:40 ` [PATCH AUTOSEL 5.2 44/44] tools: hv: fix KVP and VSS daemons exit code Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190820134028.10829-33-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=bpsegal20@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=oded.gabbay@gmail.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.