All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	atull@opensource.altera.com,
	Moritz Fischer <moritz.fischer@ettus.com>
Cc: linux-kernel@vger.kernel.org, linux-fpga@vger.kernel.org
Subject: [PATCH v4 fpga 4/4] fpga zynq: Use the scatterlist interface
Date: Wed,  1 Feb 2017 12:48:45 -0700	[thread overview]
Message-ID: <1485978525-13676-5-git-send-email-jgunthorpe@obsidianresearch.com> (raw)
In-Reply-To: <1485978525-13676-1-git-send-email-jgunthorpe@obsidianresearch.com>

This allows the driver to avoid a high order coherent DMA allocation
and memory copy. With this patch it can DMA directly from the kernel
pages that the bitfile is stored in.

Since this is now a gather DMA operation the driver uses the ISR
to feed the chips DMA queue with each entry from the SGL.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: Moritz Fischer <moritz.fischer@ettus.com>
---
 drivers/fpga/zynq-fpga.c | 174 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 135 insertions(+), 39 deletions(-)

diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index c3fc2a231e2810..34cb98139442df 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -30,6 +30,7 @@
 #include <linux/pm.h>
 #include <linux/regmap.h>
 #include <linux/string.h>
+#include <linux/scatterlist.h>
 
 /* Offsets into SLCR regmap */
 
@@ -80,6 +81,7 @@
 
 /* FPGA init status */
 #define STATUS_DMA_Q_F			BIT(31)
+#define STATUS_DMA_Q_E			BIT(30)
 #define STATUS_PCFG_INIT_MASK		BIT(4)
 
 /* Interrupt Status/Mask Register Bit definitions */
@@ -98,12 +100,16 @@
 #define DMA_INVALID_ADDRESS		GENMASK(31, 0)
 /* Used to unlock the dev */
 #define UNLOCK_MASK			0x757bdf0d
-/* Timeout for DMA to complete */
-#define DMA_DONE_TIMEOUT		msecs_to_jiffies(1000)
 /* Timeout for polling reset bits */
 #define INIT_POLL_TIMEOUT		2500000
 /* Delay for polling reset bits */
 #define INIT_POLL_DELAY			20
+/* Signal this is the last DMA transfer, wait for the AXI and PCAP before
+ * interrupting
+ */
+#define DMA_SRC_LAST_TRANSFER		1
+/* Timeout for DMA completion */
+#define DMA_TIMEOUT_MS			5000
 
 /* Masks for controlling stuff in SLCR */
 /* Disable all Level shifters */
@@ -124,6 +130,11 @@ struct zynq_fpga_priv {
 	void __iomem *io_base;
 	struct regmap *slcr;
 
+	spinlock_t dma_lock;
+	unsigned int dma_elm;
+	unsigned int dma_nelms;
+	struct scatterlist *cur_sg;
+
 	struct completion dma_done;
 };
 
@@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable)
 	zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable);
 }
 
+/* Must be called with dma_lock held */
+static void zynq_step_dma(struct zynq_fpga_priv *priv)
+{
+	u32 addr;
+	u32 len;
+	bool first;
+
+	first = priv->dma_elm == 0;
+	while (priv->cur_sg) {
+		/* Feed the DMA queue until it is full. */
+		if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F)
+			break;
+
+		addr = sg_dma_address(priv->cur_sg);
+		len = sg_dma_len(priv->cur_sg);
+		if (priv->dma_elm + 1 == priv->dma_nelms) {
+			/* The last transfer waits for the PCAP to finish too,
+			 * notice this also changes the irq_mask to ignore
+			 * IXR_DMA_DONE_MASK which ensures we do not trigger
+			 * the completion too early.
+			 */
+			addr |= DMA_SRC_LAST_TRANSFER;
+			priv->cur_sg = NULL;
+		} else {
+			priv->cur_sg = sg_next(priv->cur_sg);
+			priv->dma_elm++;
+		}
+
+		zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr);
+		zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS);
+		zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4);
+		zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
+	}
+
+	/* Once the first transfer is queued we can turn on the ISR, future
+	 * calls to zynq_step_dma will happen from the ISR context. The
+	 * dma_lock spinlock guarentees this handover is done coherently, the
+	 * ISR enable is put at the end to avoid another CPU spinning in the
+	 * ISR on this lock.
+	 */
+	if (first && priv->cur_sg) {
+		zynq_fpga_set_irq(priv,
+				  IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK);
+	} else if (!priv->cur_sg) {
+		/* The last transfer changes to DMA & PCAP mode since we do
+		 * not want to continue until everything has been flushed into
+		 * the PCAP.
+		 */
+		zynq_fpga_set_irq(priv,
+				  IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
+	}
+}
+
 static irqreturn_t zynq_fpga_isr(int irq, void *data)
 {
 	struct zynq_fpga_priv *priv = data;
+	u32 intr_status;
 
-	/* disable DMA and error IRQs */
-	zynq_fpga_set_irq(priv, 0);
+	/* If anything other than DMA completion is reported stop and hand
+	 * control back to zynq_fpga_ops_write, something went wrong,
+	 * otherwise progress the DMA.
+	 */
+	spin_lock(&priv->dma_lock);
+	intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
+	if (!(intr_status & IXR_ERROR_FLAGS_MASK) &&
+	    (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) {
+		zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK);
+		zynq_step_dma(priv);
+		spin_unlock(&priv->dma_lock);
+		return IRQ_HANDLED;
+	}
+	spin_unlock(&priv->dma_lock);
 
+	zynq_fpga_set_irq(priv, 0);
 	complete(&priv->dma_done);
 
 	return IRQ_HANDLED;
@@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
 	zynq_fpga_write(priv, CTRL_OFFSET,
 			(CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl));
 
-	/* check that we have room in the command queue */
+	/* We expect that the command queue is empty right now. */
 	status = zynq_fpga_read(priv, STATUS_OFFSET);
-	if (status & STATUS_DMA_Q_F) {
-		dev_err(&mgr->dev, "DMA command queue full\n");
+	if ((status & STATUS_DMA_Q_F) ||
+	    (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) {
+		dev_err(&mgr->dev, "DMA command queue not right\n");
 		err = -EBUSY;
 		goto out_err;
 	}
@@ -288,27 +367,36 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
 	return err;
 }
 
-static int zynq_fpga_ops_write(struct fpga_manager *mgr,
-			       const char *buf, size_t count)
+static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt)
 {
 	struct zynq_fpga_priv *priv;
 	const char *why;
 	int err;
-	char *kbuf;
-	size_t in_count;
-	dma_addr_t dma_addr;
-	u32 transfer_length;
 	u32 intr_status;
+	unsigned long timeout;
+	unsigned long flags;
+	struct scatterlist *sg;
+	int i;
 
-	in_count = count;
 	priv = mgr->priv;
 
-	kbuf =
-	    dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL);
-	if (!kbuf)
-		return -ENOMEM;
+	/* The hardware can only DMA multiples of 4 bytes, and it requires the
+	 * starting addresses to be aligned to 64 bits (UG585 pg 212).
+	 */
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		if ((sg->offset % 8) || (sg->length % 4)) {
+			dev_err(&mgr->dev,
+			    "Invalid bitstream, chunks must be aligned\n");
+			return -EINVAL;
+		}
+	}
 
-	memcpy(kbuf, buf, count);
+	priv->dma_nelms =
+	    dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
+	if (priv->dma_nelms == 0) {
+		dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n");
+		return -ENOMEM;
+	}
 
 	/* enable clock */
 	err = clk_enable(priv->clk);
@@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 		goto out_free;
 
 	zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
-
 	reinit_completion(&priv->dma_done);
 
-	/* enable DMA and error IRQs */
-	zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
-
-	/* the +1 in the src addr is used to hold off on DMA_DONE IRQ
-	 * until both AXI and PCAP are done ...
-	 */
-	zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1);
-	zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS);
-
-	/* convert #bytes to #words */
-	transfer_length = (count + 3) / 4;
+	/* zynq_step_dma will turn on interrupts */
+	spin_lock_irqsave(&priv->dma_lock, flags);
+	priv->dma_elm = 0;
+	priv->cur_sg = sgt->sgl;
+	zynq_step_dma(priv);
+	spin_unlock_irqrestore(&priv->dma_lock, flags);
 
-	zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length);
-	zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
+	timeout = wait_for_completion_timeout(&priv->dma_done,
+					      msecs_to_jiffies(DMA_TIMEOUT_MS));
 
-	wait_for_completion(&priv->dma_done);
+	spin_lock_irqsave(&priv->dma_lock, flags);
+	zynq_fpga_set_irq(priv, 0);
+	priv->cur_sg = NULL;
+	spin_unlock_irqrestore(&priv->dma_lock, flags);
 
 	intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
-	zynq_fpga_write(priv, INT_STS_OFFSET, intr_status);
+	zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
+
+	/* There doesn't seem to be a way to force cancel any DMA, so if
+	 * something went wrong we are relying on the hardware to have halted
+	 * the DMA before we get here, if there was we could use
+	 * wait_for_completion_interruptible too.
+	 */
 
 	if (intr_status & IXR_ERROR_FLAGS_MASK) {
 		why = "DMA reported error";
@@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 		goto out_report;
 	}
 
-	if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
-		why = "DMA did not complete";
+	if (priv->cur_sg ||
+	    !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
+		if (timeout == 0)
+			why = "DMA timed out";
+		else
+			why = "DMA did not complete";
 		err = -EIO;
 		goto out_report;
 	}
@@ -369,7 +464,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
 	clk_disable(priv->clk);
 
 out_free:
-	dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr);
+	dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
 	return err;
 }
 
@@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = {
 	.initial_header_size = 128,
 	.state = zynq_fpga_ops_state,
 	.write_init = zynq_fpga_ops_write_init,
-	.write = zynq_fpga_ops_write,
+	.write_sg = zynq_fpga_ops_write,
 	.write_complete = zynq_fpga_ops_write_complete,
 };
 
@@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
+	spin_lock_init(&priv->dma_lock);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	priv->io_base = devm_ioremap_resource(dev, res);
-- 
2.7.4

  parent reply	other threads:[~2017-02-01 19:49 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-02-01 19:48 [PATCH v4 fpga 0/4] Gather DMA for Zynq FPGA programming Jason Gunthorpe
2017-02-01 19:48 ` [PATCH v4 fpga 1/4] fpga zynq: Check for errors after completing DMA Jason Gunthorpe
2017-02-01 19:48 ` [PATCH v4 fpga 2/4] fpga zynq: Check the bitstream for validity Jason Gunthorpe
2017-02-01 19:48 ` [PATCH v4 fpga 3/4] fpga: Add scatterlist based programming Jason Gunthorpe
2017-02-01 19:48 ` Jason Gunthorpe [this message]
2017-02-10 16:37   ` [PATCH v4 fpga 4/4] fpga zynq: Use the scatterlist interface Alan Tull

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1485978525-13676-5-git-send-email-jgunthorpe@obsidianresearch.com \
    --to=jgunthorpe@obsidianresearch.com \
    --cc=atull@opensource.altera.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-fpga@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=moritz.fischer@ettus.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.