All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V3 0/2] Add support for the IMG hash accelerator
@ 2015-03-06  2:58 James Hartley
  2015-03-06  2:58 ` [PATCH V3 1/2] crypto: Add Imagination Technologies hw " James Hartley
  2015-03-06  2:58 ` [PATCH V3 2/2] Documentation: crypto: Add DT binding info for the img " James Hartley
  0 siblings, 2 replies; 13+ messages in thread
From: James Hartley @ 2015-03-06  2:58 UTC (permalink / raw)
  To: herbert, robh+dt, pawel.moll, mark.rutland, galak,
	andrew.bresticker, ezequiel.garcia
  Cc: linux-crypto, james.hartley

This adds support for the Imagination Technologies hash
accelerator which provides hardware acceleration for
SHA1 SHA244 SHA256 and MD5 hashes.

Tested on silicon, using testmgr.

Changes from V2:
	* This hardware does not support importing a partial hash state, 
	  so the init, update, final and finup have been reworked to use
	  a fallback driver; only digest remains as hardware accelerated.
	* Simplified the driver as a result of the above rework

Changes from V1:
	* Addressed review comments from Andrew Bresticker and 
	  Vladimir Zapolskiy
	* rebased to current linux-next

James Hartley (2):
  crypto: Add Imagination Technologies hw hash accelerator
  Documentation: crypto: Add DT binding info for the img hw hash
    accelerator

 .../devicetree/bindings/crypto/img-hash.txt        |   27 +
 drivers/crypto/Kconfig                             |   14 +
 drivers/crypto/Makefile                            |    2 +
 drivers/crypto/img-hash.c                          | 1037 ++++++++++++++++++++
 4 files changed, 1080 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/crypto/img-hash.txt
 create mode 100644 drivers/crypto/img-hash.c

-- 
1.7.9.5

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-06  2:58 [PATCH V3 0/2] Add support for the IMG hash accelerator James Hartley
@ 2015-03-06  2:58 ` James Hartley
  2015-03-09  6:35   ` Stephan Mueller
  2015-03-06  2:58 ` [PATCH V3 2/2] Documentation: crypto: Add DT binding info for the img " James Hartley
  1 sibling, 1 reply; 13+ messages in thread
From: James Hartley @ 2015-03-06  2:58 UTC (permalink / raw)
  To: herbert, robh+dt, pawel.moll, mark.rutland, galak,
	andrew.bresticker, ezequiel.garcia
  Cc: linux-crypto, james.hartley

This adds support for the Imagination Technologies hash
accelerator which provides hardware acceleration for
SHA1 SHA224 SHA256 and MD5 hashes.

Signed-off-by: James Hartley <james.hartley@imgtec.com>
---
 drivers/crypto/Kconfig    |   14 +
 drivers/crypto/Makefile   |    2 +
 drivers/crypto/img-hash.c | 1037 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1053 insertions(+)
 create mode 100644 drivers/crypto/img-hash.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2fb0fdf..c72223e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -436,4 +436,18 @@ config CRYPTO_DEV_QCE
 	  hardware. To compile this driver as a module, choose M here. The
 	  module will be called qcrypto.
 
+config CRYPTO_DEV_IMGTEC_HASH
+	depends on MIPS || COMPILE_TEST
+	tristate "Imagination Technologies hardware hash accelerator"
+	select CRYPTO_ALG_API
+	select CRYPTO_MD5
+	select CRYPTO_SHA1
+	select CRYPTO_SHA224
+	select CRYPTO_SHA256
+	select CRYPTO_HASH
+	help
+	  This driver interfaces with the Imagination Technologies
+	  hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
+	  hashing algorithms.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 3924f93..fb84be7 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
 obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
+obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
 obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
 obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
 obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
 obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
+obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
new file mode 100644
index 0000000..94a3a6f
--- /dev/null
+++ b/drivers/crypto/img-hash.c
@@ -0,0 +1,1037 @@
+/*
+ * Copyright (c) 2014 Imagination Technologies
+ * Authors:  Will Thomas, James Hartley
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ *	Interface structure taken from omap-sham driver
+ */
+
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+
+#include <crypto/internal/hash.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+
+#define CR_RESET			0
+#define CR_RESET_SET			1
+#define CR_RESET_UNSET			0
+
+#define CR_MESSAGE_LENGTH_H		0x4
+#define CR_MESSAGE_LENGTH_L		0x8
+
+#define CR_CONTROL			0xc
+#define CR_CONTROL_BYTE_ORDER_3210	0
+#define CR_CONTROL_BYTE_ORDER_0123	1
+#define CR_CONTROL_BYTE_ORDER_2310	2
+#define CR_CONTROL_BYTE_ORDER_1032	3
+#define CR_CONTROL_BYTE_ORDER_SHIFT	8
+#define CR_CONTROL_ALGO_MD5	0
+#define CR_CONTROL_ALGO_SHA1	1
+#define CR_CONTROL_ALGO_SHA224	2
+#define CR_CONTROL_ALGO_SHA256	3
+
+#define CR_INTSTAT			0x10
+#define CR_INTENAB			0x14
+#define CR_INTCLEAR			0x18
+#define CR_INT_RESULTS_AVAILABLE	BIT(0)
+#define CR_INT_NEW_RESULTS_SET		BIT(1)
+#define CR_INT_RESULT_READ_ERR		BIT(2)
+#define CR_INT_MESSAGE_WRITE_ERROR	BIT(3)
+#define CR_INT_STATUS			BIT(8)
+
+#define CR_RESULT_QUEUE		0x1c
+#define CR_RSD0				0x40
+#define CR_CORE_REV			0x50
+#define CR_CORE_DES1		0x60
+#define CR_CORE_DES2		0x70
+
+#define DRIVER_FLAGS_BUSY		BIT(0)
+#define DRIVER_FLAGS_FINAL		BIT(1)
+#define DRIVER_FLAGS_DMA_ACTIVE		BIT(2)
+#define DRIVER_FLAGS_OUTPUT_READY	BIT(3)
+#define DRIVER_FLAGS_INIT		BIT(4)
+#define DRIVER_FLAGS_CPU		BIT(5)
+#define DRIVER_FLAGS_DMA_READY		BIT(6)
+#define DRIVER_FLAGS_ERROR		BIT(7)
+#define DRIVER_FLAGS_SG			BIT(8)
+#define DRIVER_FLAGS_SHA1		BIT(18)
+#define DRIVER_FLAGS_SHA224		BIT(19)
+#define DRIVER_FLAGS_SHA256		BIT(20)
+#define DRIVER_FLAGS_MD5		BIT(21)
+
+#define IMG_HASH_QUEUE_LENGTH		20
+#define IMG_HASH_DMA_THRESHOLD		64
+
+#ifdef __LITTLE_ENDIAN
+#define IMG_HASH_BYTE_ORDER		(CR_CONTROL_BYTE_ORDER_3210)
+#else
+#define IMG_HASH_BYTE_ORDER		(CR_CONTROL_BYTE_ORDER_0123)
+#endif
+
+struct img_hash_dev;
+
+struct img_hash_request_ctx {
+	struct img_hash_dev	*hdev;
+	u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+	unsigned long		flags;
+	size_t			digsize;
+
+	dma_addr_t		dma_addr;
+	size_t			dma_ct;
+
+	/* sg root */
+	struct scatterlist	*sgfirst;
+	/* walk state */
+	struct scatterlist	*sg;
+	size_t			nents;
+	size_t			offset;
+	unsigned int		total;
+	size_t			sent;
+
+	unsigned long		op;
+
+	size_t			bufcnt;
+	u8 buffer[0] __aligned(sizeof(u32));
+	struct ahash_request	fallback_req;
+};
+
+struct img_hash_ctx {
+	struct img_hash_dev	*hdev;
+	unsigned long		flags;
+	struct crypto_ahash	*fallback;
+};
+
+struct img_hash_dev {
+	struct list_head	list;
+	struct device		*dev;
+	struct clk		*iclk;
+	struct clk		*fclk;
+	int			irq;
+	void __iomem		*io_base;
+
+	phys_addr_t		bus_addr;
+	void __iomem		*cpu_addr;
+
+	spinlock_t		lock;
+	int			err;
+	struct tasklet_struct	done_task;
+	struct tasklet_struct	dma_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+
+	struct dma_chan		*dma_lch;
+};
+
+struct img_hash_drv {
+	struct list_head dev_list;
+	spinlock_t lock;
+};
+
+static struct img_hash_drv img_hash = {
+	.dev_list = LIST_HEAD_INIT(img_hash.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
+};
+
+static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
+{
+	return readl_relaxed(hdev->io_base + offset);
+}
+
+static inline void img_hash_write(struct img_hash_dev *hdev,
+				  u32 offset, u32 value)
+{
+	writel_relaxed(value, hdev->io_base + offset);
+}
+
+static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev)
+{
+	return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
+}
+
+static void img_hash_start(struct img_hash_dev *hdev, bool dma)
+{
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+	u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
+
+	if (ctx->flags & DRIVER_FLAGS_MD5)
+		cr |= CR_CONTROL_ALGO_MD5;
+	else if (ctx->flags & DRIVER_FLAGS_SHA1)
+		cr |= CR_CONTROL_ALGO_SHA1;
+	else if (ctx->flags & DRIVER_FLAGS_SHA224)
+		cr |= CR_CONTROL_ALGO_SHA224;
+	else if (ctx->flags & DRIVER_FLAGS_SHA256)
+		cr |= CR_CONTROL_ALGO_SHA256;
+	dev_dbg(hdev->dev, "Starting hash process\n");
+	img_hash_write(hdev, CR_CONTROL, cr);
+
+	/*
+	 * The hardware block requires two cycles between writing the control
+	 * register and writing the first word of data in non DMA mode, to
+	 * ensure the first data write is not grouped in burst with the control
+	 * register write a read is issued to 'flush' the bus.
+	 */
+	if (!dma)
+		img_hash_read(hdev, CR_CONTROL);
+}
+
+static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
+			     size_t length, int final)
+{
+	u32 count, len32;
+	const u32 *buffer = (const u32 *)buf;
+
+	dev_dbg(hdev->dev, "xmit_cpu:  length: %u bytes\n", length);
+
+	if (final)
+		hdev->flags |= DRIVER_FLAGS_FINAL;
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	for (count = 0; count < len32; count++)
+		writel_relaxed(buffer[count], hdev->cpu_addr);
+
+	return -EINPROGRESS;
+}
+
+static void img_hash_dma_callback(void *data)
+{
+	struct img_hash_dev *hdev = (struct img_hash_dev *)data;
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+	if (ctx->bufcnt) {
+		img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
+		ctx->bufcnt = 0;
+	}
+	if (ctx->sg)
+		tasklet_schedule(&hdev->dma_task);
+}
+
+static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
+{
+	struct dma_async_tx_descriptor *desc;
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+	ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+	if (ctx->dma_ct == 0) {
+		dev_err(hdev->dev, "Invalid DMA sg\n");
+		hdev->err = -EINVAL;
+		return -EINVAL;
+	}
+
+	desc = dmaengine_prep_slave_sg(hdev->dma_lch,
+					sg,
+					ctx->dma_ct,
+					DMA_MEM_TO_DEV,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc) {
+		dev_err(hdev->dev, "Null DMA descriptor\n");
+		hdev->err = -EINVAL;
+		dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
+		return -EINVAL;
+	}
+	desc->callback = img_hash_dma_callback;
+	desc->callback_param = hdev;
+	dmaengine_submit(desc);
+	dma_async_issue_pending(hdev->dma_lch);
+
+	return 0;
+}
+
+static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
+{
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+	int ret = 0;
+
+	ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
+					ctx->buffer, hdev->req->nbytes);
+
+	ctx->total = hdev->req->nbytes;
+	ctx->bufcnt = 0;
+
+	hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
+
+	img_hash_start(hdev, false);
+
+	if (ctx->total)
+		ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
+	else
+		ret = 0;
+
+	return ret;
+}
+
+static int img_hash_finish(struct ahash_request *req)
+{
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+
+	if (!req->result)
+		return -EINVAL;
+
+	memcpy(req->result, ctx->digest, ctx->digsize);
+
+	return 0;
+}
+
+static void img_hash_copy_hash(struct ahash_request *req)
+{
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)ctx->digest;
+	int i;
+
+	for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
+		hash[i] = img_hash_read_result_queue(ctx->hdev);
+}
+
+static void img_hash_finish_req(struct ahash_request *req, int err)
+{
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+	struct img_hash_dev *hdev =  ctx->hdev;
+
+	if (!err) {
+		img_hash_copy_hash(req);
+		if (DRIVER_FLAGS_FINAL & hdev->flags)
+			err = img_hash_finish(req);
+	} else {
+		dev_warn(hdev->dev, "Hash failed with error %d\n", err);
+		ctx->flags |= DRIVER_FLAGS_ERROR;
+	}
+
+	hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY |
+		DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL);
+
+	if (req->base.complete)
+		req->base.complete(&req->base, err);
+}
+
+static int img_hash_write_via_dma(struct img_hash_dev *hdev)
+{
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+	img_hash_start(hdev, true);
+
+	dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
+
+	if (!ctx->total)
+		hdev->flags |= DRIVER_FLAGS_FINAL;
+
+	hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
+
+	tasklet_schedule(&hdev->dma_task);
+
+	return -EINPROGRESS;
+}
+
+static int img_hash_dma_init(struct img_hash_dev *hdev)
+{
+	struct dma_slave_config dma_conf;
+	int err = -EINVAL;
+
+	hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
+	if (!hdev->dma_lch) {
+		dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
+		return -EBUSY;
+	}
+	dma_conf.direction = DMA_MEM_TO_DEV;
+	dma_conf.dst_addr = hdev->bus_addr;
+	dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	dma_conf.dst_maxburst = 16;
+	dma_conf.device_fc = false;
+
+	err = dmaengine_slave_config(hdev->dma_lch,  &dma_conf);
+
+	if (err) {
+		dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
+		dma_release_channel(hdev->dma_lch);
+		return err;
+	}
+	return 0;
+}
+
+static void img_hash_dma_task(unsigned long d)
+{
+	struct img_hash_dev *hdev = (struct img_hash_dev *)d;
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+	u8 *addr;
+	size_t nbytes, bleft, wsend, len, tbc;
+	struct scatterlist tsg;
+
+	if (!ctx->sg)
+		return;
+
+	addr = sg_virt(ctx->sg);
+	nbytes = ctx->sg->length - ctx->offset;
+
+	/* The hash accelerator does not support a data valid mask. This means
+	 * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
+	 * padding bytes in the last word written by that dma would erroneously
+	 * be included in the hash. To avoid this we round down the transfer,
+	 * and add the excess to the start of the next dma. It does not matter
+	 * that the final dma may not be a multiple of 4 bytes as the hashing
+	 * block is programmed to accept the correct number of bytes.
+	 */
+
+	bleft = nbytes % 4;
+	wsend = (nbytes / 4);
+
+	if (wsend) {
+		sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
+		if (img_hash_xmit_dma(hdev, &tsg)) {
+			dev_err(hdev->dev, "DMA failed, falling back to CPU");
+			ctx->flags |= DRIVER_FLAGS_CPU;
+			hdev->err = 0;
+			img_hash_xmit_cpu(hdev, addr + ctx->offset,
+					  wsend * 4, 0);
+			ctx->sent += wsend * 4;
+			wsend = 0;
+		} else {
+			ctx->sent += wsend * 4;
+		}
+	}
+
+	if (bleft) {
+		ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
+						 ctx->buffer, bleft, ctx->sent);
+		tbc = 0;
+		ctx->sg = sg_next(ctx->sg);
+		while (ctx->sg && (ctx->bufcnt < 4)) {
+			len = ctx->sg->length;
+			if (likely(len > (4 - ctx->bufcnt)))
+				len = 4 - ctx->bufcnt;
+			tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
+						 ctx->buffer + ctx->bufcnt, len,
+					ctx->sent + ctx->bufcnt);
+			ctx->bufcnt += tbc;
+			if (tbc >= ctx->sg->length) {
+				ctx->sg = sg_next(ctx->sg);
+				tbc = 0;
+			}
+		}
+
+		ctx->sent += ctx->bufcnt;
+		ctx->offset = tbc;
+
+		if (!wsend)
+			img_hash_dma_callback(hdev);
+	} else {
+		ctx->offset = 0;
+		ctx->sg = sg_next(ctx->sg);
+	}
+}
+
+static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
+{
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+
+	if (ctx->flags & DRIVER_FLAGS_SG)
+		dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+static int img_hash_process_data(struct img_hash_dev *hdev)
+{
+	struct ahash_request *req = hdev->req;
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+	int err = 0;
+
+	ctx->bufcnt = 0;
+
+	if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
+		dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n",
+			req->nbytes);
+		err = img_hash_write_via_dma(hdev);
+	} else {
+		dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n",
+			req->nbytes);
+		err = img_hash_write_via_cpu(hdev);
+	}
+	return err;
+}
+
+static int img_hash_hw_init(struct img_hash_dev *hdev)
+{
+	unsigned long long nbits;
+	u32 u, l;
+	int ret;
+
+	ret = clk_prepare_enable(hdev->iclk);
+	if (ret)
+		return ret;
+
+	img_hash_write(hdev, CR_RESET, CR_RESET_SET);
+	img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
+	img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
+
+	nbits = (hdev->req->nbytes << 3);
+	u = nbits >> 32;
+	l = nbits;
+	img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
+	img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
+
+	if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
+		hdev->flags |= DRIVER_FLAGS_INIT;
+		hdev->err = 0;
+	}
+	dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
+	return 0;
+}
+
+static int img_hash_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+	rctx->fallback_req.base.flags =	req->base.flags
+		& CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	return crypto_ahash_init(&rctx->fallback_req);
+}
+
+static int img_hash_handle_queue(struct img_hash_dev *hdev,
+				 struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct img_hash_request_ctx *ctx;
+	unsigned long flags;
+	int err = 0, res = 0;
+
+	spin_lock_irqsave(&hdev->lock, flags);
+
+	if (req)
+		res = ahash_enqueue_request(&hdev->queue, req);
+
+	if (DRIVER_FLAGS_BUSY & hdev->flags) {
+		spin_unlock_irqrestore(&hdev->lock, flags);
+		return res;
+	}
+
+	backlog = crypto_get_backlog(&hdev->queue);
+	async_req = crypto_dequeue_request(&hdev->queue);
+	if (async_req)
+		hdev->flags |= DRIVER_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&hdev->lock, flags);
+
+	if (!async_req)
+		return res;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+	hdev->req = req;
+
+	ctx = ahash_request_ctx(req);
+
+	dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
+		 ctx->op, req->nbytes);
+
+	err = img_hash_hw_init(hdev);
+
+	if (!err)
+		err = img_hash_process_data(hdev);
+
+	if (err != -EINPROGRESS) {
+		/* done_task will not finish so do it here */
+		img_hash_finish_req(req, err);
+	}
+	return res;
+}
+
+static int img_hash_update(struct ahash_request *req)
+{
+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+	rctx->fallback_req.base.flags = req->base.flags
+		& CRYPTO_TFM_REQ_MAY_SLEEP;
+	rctx->fallback_req.nbytes = req->nbytes;
+	rctx->fallback_req.src = req->src;
+
+	return crypto_ahash_update(&rctx->fallback_req);
+}
+
+static int img_hash_final(struct ahash_request *req)
+{
+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+	rctx->fallback_req.base.flags = req->base.flags
+		& CRYPTO_TFM_REQ_MAY_SLEEP;
+	rctx->fallback_req.result = req->result;
+
+	return crypto_ahash_final(&rctx->fallback_req);
+}
+
+static int img_hash_finup(struct ahash_request *req)
+{
+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+	rctx->fallback_req.base.flags = req->base.flags
+		& CRYPTO_TFM_REQ_MAY_SLEEP;
+	rctx->fallback_req.nbytes = req->nbytes;
+	rctx->fallback_req.src = req->src;
+	rctx->fallback_req.result = req->result;
+
+	return crypto_ahash_finup(&rctx->fallback_req);
+}
+
+static int img_hash_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
+	struct img_hash_dev *hdev = NULL;
+	struct img_hash_dev *tmp;
+	int err;
+
+	spin_lock_bh(&img_hash.lock);
+	if (!tctx->hdev) {
+		list_for_each_entry(tmp, &img_hash.dev_list, list) {
+			hdev = tmp;
+			break;
+		}
+		tctx->hdev = hdev;
+
+	} else {
+		hdev = tctx->hdev;
+	}
+
+	spin_unlock_bh(&img_hash.lock);
+	ctx->hdev = hdev;
+	ctx->flags = 0;
+	ctx->digsize = crypto_ahash_digestsize(tfm);
+
+	switch (ctx->digsize) {
+	case SHA1_DIGEST_SIZE:
+		ctx->flags |= DRIVER_FLAGS_SHA1;
+		break;
+	case SHA256_DIGEST_SIZE:
+		ctx->flags |= DRIVER_FLAGS_SHA256;
+		break;
+	case SHA224_DIGEST_SIZE:
+		ctx->flags |= DRIVER_FLAGS_SHA224;
+		break;
+	case MD5_DIGEST_SIZE:
+		ctx->flags |= DRIVER_FLAGS_MD5;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ctx->bufcnt = 0;
+	ctx->offset = 0;
+	ctx->sent = 0;
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->sgfirst = req->src;
+	ctx->nents = sg_nents(ctx->sg);
+
+	err = img_hash_handle_queue(tctx->hdev, req);
+
+	return err;
+}
+
+static int img_hash_cra_init(struct crypto_tfm *tfm)
+{
+	struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	const char *alg_name = crypto_tfm_alg_name(tfm);
+	int err = -ENOMEM;
+
+	ctx->fallback = crypto_alloc_ahash(alg_name, 0,
+					   CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fallback)) {
+		pr_err("img_hash: Could not load fallback driver.\n");
+		err = PTR_ERR(ctx->fallback);
+		goto err;
+	}
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct img_hash_request_ctx) +
+				 IMG_HASH_DMA_THRESHOLD);
+
+	return 0;
+
+err:
+	return err;
+}
+
+static void img_hash_cra_exit(struct crypto_tfm *tfm)
+{
+	struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_ahash(tctx->fallback);
+}
+
+static irqreturn_t img_irq_handler(int irq, void *dev_id)
+{
+	struct img_hash_dev *hdev = dev_id;
+	u32 reg;
+
+	reg = img_hash_read(hdev, CR_INTSTAT);
+	img_hash_write(hdev, CR_INTCLEAR, reg);
+
+	if (reg & CR_INT_NEW_RESULTS_SET) {
+		dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
+		if (DRIVER_FLAGS_BUSY & hdev->flags) {
+			hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
+			if (!(DRIVER_FLAGS_CPU & hdev->flags))
+				hdev->flags |= DRIVER_FLAGS_DMA_READY;
+			tasklet_schedule(&hdev->done_task);
+		} else {
+			dev_warn(hdev->dev,
+				 "HASH interrupt when no active requests.\n");
+		}
+	} else if (reg & CR_INT_RESULTS_AVAILABLE) {
+		dev_warn(hdev->dev,
+			 "IRQ triggered before the hash had completed\n");
+	} else if (reg & CR_INT_RESULT_READ_ERR) {
+		dev_warn(hdev->dev,
+			 "Attempt to read from an empty result queue\n");
+	} else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
+		dev_warn(hdev->dev,
+			 "Data written before the hardware was configured\n");
+	}
+	return IRQ_HANDLED;
+}
+
+static struct ahash_alg img_algs[] = {
+	{
+		.init = img_hash_init,
+		.update = img_hash_update,
+		.final = img_hash_final,
+		.finup = img_hash_finup,
+		.digest = img_hash_digest,
+		.halg = {
+			.digestsize = MD5_DIGEST_SIZE,
+			.base = {
+				.cra_name = "md5",
+				.cra_driver_name = "img-md5",
+				.cra_priority = 301,
+				.cra_flags =
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct img_hash_ctx),
+				.cra_init = img_hash_cra_init,
+				.cra_exit = img_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = img_hash_init,
+		.update = img_hash_update,
+		.final = img_hash_final,
+		.finup = img_hash_finup,
+		.digest = img_hash_digest,
+		.halg = {
+			.digestsize = SHA1_DIGEST_SIZE,
+			.base = {
+				.cra_name = "sha1",
+				.cra_driver_name = "img-sha1",
+				.cra_priority = 3000,
+				.cra_flags =
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct img_hash_ctx),
+				.cra_init = img_hash_cra_init,
+				.cra_exit = img_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = img_hash_init,
+		.update = img_hash_update,
+		.final = img_hash_final,
+		.finup = img_hash_finup,
+		.digest = img_hash_digest,
+		.halg = {
+			.digestsize = SHA224_DIGEST_SIZE,
+			.base = {
+				.cra_name = "sha224",
+				.cra_driver_name = "img-sha224",
+				.cra_priority = 3000,
+				.cra_flags =
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct img_hash_ctx),
+				.cra_init = img_hash_cra_init,
+				.cra_exit = img_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	},
+	{
+		.init = img_hash_init,
+		.update = img_hash_update,
+		.final = img_hash_final,
+		.finup = img_hash_finup,
+		.digest = img_hash_digest,
+		.halg = {
+			.digestsize = SHA256_DIGEST_SIZE,
+			.base = {
+				.cra_name = "sha256",
+				.cra_driver_name = "img-sha256",
+				.cra_priority = 301,
+				.cra_flags =
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_NEED_FALLBACK,
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+				.cra_ctxsize = sizeof(struct img_hash_ctx),
+				.cra_init = img_hash_cra_init,
+				.cra_exit = img_hash_cra_exit,
+				.cra_module = THIS_MODULE,
+			}
+		}
+	}
+};
+
+static int img_register_algs(struct img_hash_dev *hdev)
+{
+	int i, err;
+
+	for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
+		err = crypto_register_ahash(&img_algs[i]);
+		if (err)
+			goto err_reg;
+	}
+	return 0;
+
+err_reg:
+	for (; i--; )
+		crypto_unregister_ahash(&img_algs[i]);
+
+	return err;
+}
+
+static int img_unregister_algs(struct img_hash_dev *hdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(img_algs); i++)
+		crypto_unregister_ahash(&img_algs[i]);
+	return 0;
+}
+
+static void img_hash_done_task(unsigned long data)
+{
+	struct img_hash_dev *hdev = (struct img_hash_dev *)data;
+	int err = 0;
+
+	if (hdev->err == -EINVAL) {
+		err = hdev->err;
+		goto finish;
+	}
+
+	if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
+		img_hash_handle_queue(hdev, NULL);
+		return;
+	}
+
+	if (DRIVER_FLAGS_CPU & hdev->flags) {
+		if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
+			hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
+			goto finish;
+		}
+	} else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
+		if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
+			hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
+			img_hash_write_via_dma_stop(hdev);
+			if (hdev->err) {
+				err = hdev->err;
+				goto finish;
+			}
+		}
+		if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
+			hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
+					DRIVER_FLAGS_OUTPUT_READY);
+			goto finish;
+		}
+	}
+	return;
+
+finish:
+	img_hash_finish_req(hdev->req, err);
+}
+
+static const struct of_device_id img_hash_match[] = {
+	{ .compatible = "img,hash-accelerator" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, img_hash_match)
+
+static int img_hash_probe(struct platform_device *pdev)
+{
+	struct img_hash_dev *hdev;
+	struct device *dev = &pdev->dev;
+	struct resource *hash_res;
+	int err;
+
+	hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
+	if (hdev == NULL) {
+		err = -ENOMEM;
+		goto sha_dev_err;
+	}
+	spin_lock_init(&hdev->lock);
+
+	hdev->dev = dev;
+
+	platform_set_drvdata(pdev, hdev);
+
+	INIT_LIST_HEAD(&hdev->list);
+
+	tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev);
+	tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev);
+
+	crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
+
+	/* Register bank */
+	hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	hdev->io_base = devm_ioremap_resource(dev, hash_res);
+	if (IS_ERR(hdev->io_base)) {
+		dev_err(dev, "can't ioremap\n");
+		err = PTR_ERR(hdev->io_base);
+		goto hash_io_err;
+	}
+
+	/* Write port (DMA or CPU) */
+	hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!hash_res) {
+		dev_err(dev, "no write port resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	hdev->bus_addr = hash_res->start;
+	hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
+	if (IS_ERR(hdev->cpu_addr)) {
+		dev_err(dev, "can't ioremap write port\n");
+		err = PTR_ERR(hdev->cpu_addr);
+		goto res_err;
+	}
+
+	hdev->irq = platform_get_irq(pdev, 0);
+	if (hdev->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = hdev->irq;
+		goto res_err;
+	}
+
+	err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
+			       dev_name(dev), hdev);
+	if (err) {
+		dev_err(dev, "unable to request %s irq\n", dev_name(dev));
+		goto res_err;
+	}
+	dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
+
+	hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
+	if (IS_ERR(hdev->iclk)) {
+		dev_err(dev, "clock initialization failed.\n");
+		err = PTR_ERR(hdev->iclk);
+		goto res_err;
+	}
+
+	hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
+	if (IS_ERR(hdev->fclk)) {
+		dev_err(dev, "clock initialization failed.\n");
+		err = PTR_ERR(hdev->fclk);
+		goto res_err;
+	}
+
+	err = img_hash_dma_init(hdev);
+	if (err)
+		goto err_dma;
+
+	dev_dbg(dev, "using %s for DMA transfers\n",
+		dma_chan_name(hdev->dma_lch));
+
+	spin_lock(&img_hash.lock);
+	list_add_tail(&hdev->list, &img_hash.dev_list);
+	spin_unlock(&img_hash.lock);
+
+	err = img_register_algs(hdev);
+	if (err)
+		goto err_algs;
+	dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr, 0x4);
+	dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
+
+	return 0;
+
+err_algs:
+	spin_lock(&img_hash.lock);
+	list_del(&hdev->list);
+	spin_unlock(&img_hash.lock);
+	dma_release_channel(hdev->dma_lch);
+err_dma:
+hash_io_err:
+	devm_clk_put(dev, hdev->iclk);
+	devm_clk_put(dev, hdev->fclk);
+res_err:
+	tasklet_kill(&hdev->done_task);
+	tasklet_kill(&hdev->dma_task);
+sha_dev_err:
+	dev_err(dev, "initialization failed.\n");
+	return err;
+}
+
+static int img_hash_remove(struct platform_device *pdev)
+{
+	static struct img_hash_dev *hdev;
+
+	hdev = platform_get_drvdata(pdev);
+	spin_lock(&img_hash.lock);
+	list_del(&hdev->list);
+	spin_unlock(&img_hash.lock);
+
+	img_unregister_algs(hdev);
+
+	tasklet_kill(&hdev->done_task);
+	tasklet_kill(&hdev->dma_task);
+
+	dma_release_channel(hdev->dma_lch);
+
+	clk_disable_unprepare(hdev->iclk);
+	clk_disable_unprepare(hdev->fclk);
+	return 0;
+}
+
+static struct platform_driver img_hash_driver = {
+	.probe		= img_hash_probe,
+	.remove		= img_hash_remove,
+	.driver		= {
+		.name	= "img-hash-accelerator",
+		.of_match_table	= of_match_ptr(img_hash_match),
+	}
+};
+module_platform_driver(img_hash_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
+MODULE_AUTHOR("Will Thomas.");
+MODULE_AUTHOR("James Hartley.");
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH V3 2/2] Documentation: crypto: Add DT binding info for the img hw hash accelerator
  2015-03-06  2:58 [PATCH V3 0/2] Add support for the IMG hash accelerator James Hartley
  2015-03-06  2:58 ` [PATCH V3 1/2] crypto: Add Imagination Technologies hw " James Hartley
@ 2015-03-06  2:58 ` James Hartley
  1 sibling, 0 replies; 13+ messages in thread
From: James Hartley @ 2015-03-06  2:58 UTC (permalink / raw)
  To: herbert, robh+dt, pawel.moll, mark.rutland, galak,
	andrew.bresticker, ezequiel.garcia
  Cc: linux-crypto, james.hartley

This adds the binding documentation for the Imagination Technologies
hash accelerator that provides hardware acceleration for
SHA1/SHA224/SHA256/MD5 hashes.  This hardware will be present in
the upcoming pistachio SoC.

Signed-off-by: James Hartley <james.hartley@imgtec.com>
---
 .../devicetree/bindings/crypto/img-hash.txt        |   27 ++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/crypto/img-hash.txt

diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
new file mode 100644
index 0000000..7adc519
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/img-hash.txt
@@ -0,0 +1,27 @@
+Imagination Technologies hardware hash accelerator
+
+The hash accelerator provides hardware hashing acceleration for
+SHA1, SHA224, SHA256 and MD5 hashes
+
+Required properties:
+
+- compatible : "img,hash-accelerator"
+- reg : Offset and length of the register set for the module, and the DMA port
+- interrupts : The designated IRQ line for the hashing module.
+- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
+- dma-names : Should be "tx"
+- clocks : Clock specifiers
+- clock-names : "hash_clk" Used to clock data through the accelerator
+		"hash_reg_clk" Used to clock the hash block registers
+
+Example:
+
+	hash: hash@18149600 {
+	compatible = "img,hash-accelerator";
+		reg = <0x18149600 0x100, 0x18101100 0x4>;
+		interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
+		dmas = <&dma 8 0xffffffff>;
+		dma-names = "tx";
+		clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
+		clock-names = "hash_clk, hash_reg_clk";
+	};
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-06  2:58 ` [PATCH V3 1/2] crypto: Add Imagination Technologies hw " James Hartley
@ 2015-03-09  6:35   ` Stephan Mueller
  2015-03-10  9:37     ` Herbert Xu
  0 siblings, 1 reply; 13+ messages in thread
From: Stephan Mueller @ 2015-03-09  6:35 UTC (permalink / raw)
  To: James Hartley
  Cc: herbert, robh+dt, pawel.moll, mark.rutland, galak,
	andrew.bresticker, ezequiel.garcia, linux-crypto

Am Freitag, 6. März 2015, 02:58:26 schrieb James Hartley:

Hi James,

>This adds support for the Imagination Technologies hash
>accelerator which provides hardware acceleration for
>SHA1 SHA224 SHA256 and MD5 hashes.
>
>Signed-off-by: James Hartley <james.hartley@imgtec.com>
>---
> drivers/crypto/Kconfig    |   14 +
> drivers/crypto/Makefile   |    2 +
> drivers/crypto/img-hash.c | 1037
>+++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1053
>insertions(+)
> create mode 100644 drivers/crypto/img-hash.c
>
>diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
>index 2fb0fdf..c72223e 100644
>--- a/drivers/crypto/Kconfig
>+++ b/drivers/crypto/Kconfig
>@@ -436,4 +436,18 @@ config CRYPTO_DEV_QCE
> 	  hardware. To compile this driver as a module, choose M here. 
The
> 	  module will be called qcrypto.
>
>+config CRYPTO_DEV_IMGTEC_HASH
>+	depends on MIPS || COMPILE_TEST
>+	tristate "Imagination Technologies hardware hash accelerator"
>+	select CRYPTO_ALG_API
>+	select CRYPTO_MD5
>+	select CRYPTO_SHA1
>+	select CRYPTO_SHA224
>+	select CRYPTO_SHA256
>+	select CRYPTO_HASH
>+	help
>+	  This driver interfaces with the Imagination Technologies
>+	  hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
>+	  hashing algorithms.
>+
> endif # CRYPTO_HW
>diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
>index 3924f93..fb84be7 100644
>--- a/drivers/crypto/Makefile
>+++ b/drivers/crypto/Makefile
>@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
> obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
> obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
> obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
>+obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
> obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
> obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
> obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
>@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
> obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
> obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
> obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
>+obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
>diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
>new file mode 100644
>index 0000000..94a3a6f
>--- /dev/null
>+++ b/drivers/crypto/img-hash.c
>@@ -0,0 +1,1037 @@
>+/*
>+ * Copyright (c) 2014 Imagination Technologies
>+ * Authors:  Will Thomas, James Hartley
>+ *
>+ * This program is free software; you can redistribute it and/or
>modify + * it under the terms of the GNU General Public License
>version 2 as published + * by the Free Software Foundation.
>+ *
>+ *	Interface structure taken from omap-sham driver
>+ */
>+
>+#include <linux/clk.h>
>+#include <linux/dmaengine.h>
>+#include <linux/interrupt.h>
>+#include <linux/io.h>
>+#include <linux/kernel.h>
>+#include <linux/module.h>
>+#include <linux/of_device.h>
>+#include <linux/platform_device.h>
>+#include <linux/scatterlist.h>
>+
>+#include <crypto/internal/hash.h>
>+#include <crypto/md5.h>
>+#include <crypto/sha.h>
>+
>+#define CR_RESET			0
>+#define CR_RESET_SET			1
>+#define CR_RESET_UNSET			0
>+
>+#define CR_MESSAGE_LENGTH_H		0x4
>+#define CR_MESSAGE_LENGTH_L		0x8
>+
>+#define CR_CONTROL			0xc
>+#define CR_CONTROL_BYTE_ORDER_3210	0
>+#define CR_CONTROL_BYTE_ORDER_0123	1
>+#define CR_CONTROL_BYTE_ORDER_2310	2
>+#define CR_CONTROL_BYTE_ORDER_1032	3
>+#define CR_CONTROL_BYTE_ORDER_SHIFT	8
>+#define CR_CONTROL_ALGO_MD5	0
>+#define CR_CONTROL_ALGO_SHA1	1
>+#define CR_CONTROL_ALGO_SHA224	2
>+#define CR_CONTROL_ALGO_SHA256	3
>+
>+#define CR_INTSTAT			0x10
>+#define CR_INTENAB			0x14
>+#define CR_INTCLEAR			0x18
>+#define CR_INT_RESULTS_AVAILABLE	BIT(0)
>+#define CR_INT_NEW_RESULTS_SET		BIT(1)
>+#define CR_INT_RESULT_READ_ERR		BIT(2)
>+#define CR_INT_MESSAGE_WRITE_ERROR	BIT(3)
>+#define CR_INT_STATUS			BIT(8)
>+
>+#define CR_RESULT_QUEUE		0x1c
>+#define CR_RSD0				0x40
>+#define CR_CORE_REV			0x50
>+#define CR_CORE_DES1		0x60
>+#define CR_CORE_DES2		0x70
>+
>+#define DRIVER_FLAGS_BUSY		BIT(0)
>+#define DRIVER_FLAGS_FINAL		BIT(1)
>+#define DRIVER_FLAGS_DMA_ACTIVE		BIT(2)
>+#define DRIVER_FLAGS_OUTPUT_READY	BIT(3)
>+#define DRIVER_FLAGS_INIT		BIT(4)
>+#define DRIVER_FLAGS_CPU		BIT(5)
>+#define DRIVER_FLAGS_DMA_READY		BIT(6)
>+#define DRIVER_FLAGS_ERROR		BIT(7)
>+#define DRIVER_FLAGS_SG			BIT(8)
>+#define DRIVER_FLAGS_SHA1		BIT(18)
>+#define DRIVER_FLAGS_SHA224		BIT(19)
>+#define DRIVER_FLAGS_SHA256		BIT(20)
>+#define DRIVER_FLAGS_MD5		BIT(21)
>+
>+#define IMG_HASH_QUEUE_LENGTH		20
>+#define IMG_HASH_DMA_THRESHOLD		64
>+
>+#ifdef __LITTLE_ENDIAN
>+#define IMG_HASH_BYTE_ORDER		(CR_CONTROL_BYTE_ORDER_3210)
>+#else
>+#define IMG_HASH_BYTE_ORDER		(CR_CONTROL_BYTE_ORDER_0123)
>+#endif
>+
>+struct img_hash_dev;
>+
>+struct img_hash_request_ctx {
>+	struct img_hash_dev	*hdev;
>+	u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
>+	unsigned long		flags;
>+	size_t			digsize;
>+
>+	dma_addr_t		dma_addr;
>+	size_t			dma_ct;
>+
>+	/* sg root */
>+	struct scatterlist	*sgfirst;
>+	/* walk state */
>+	struct scatterlist	*sg;
>+	size_t			nents;
>+	size_t			offset;
>+	unsigned int		total;
>+	size_t			sent;
>+
>+	unsigned long		op;
>+
>+	size_t			bufcnt;
>+	u8 buffer[0] __aligned(sizeof(u32));
>+	struct ahash_request	fallback_req;
>+};
>+
>+struct img_hash_ctx {
>+	struct img_hash_dev	*hdev;
>+	unsigned long		flags;
>+	struct crypto_ahash	*fallback;
>+};
>+
>+struct img_hash_dev {
>+	struct list_head	list;
>+	struct device		*dev;
>+	struct clk		*iclk;
>+	struct clk		*fclk;
>+	int			irq;
>+	void __iomem		*io_base;
>+
>+	phys_addr_t		bus_addr;
>+	void __iomem		*cpu_addr;
>+
>+	spinlock_t		lock;
>+	int			err;
>+	struct tasklet_struct	done_task;
>+	struct tasklet_struct	dma_task;
>+
>+	unsigned long		flags;
>+	struct crypto_queue	queue;
>+	struct ahash_request	*req;
>+
>+	struct dma_chan		*dma_lch;
>+};
>+
>+struct img_hash_drv {
>+	struct list_head dev_list;
>+	spinlock_t lock;
>+};
>+
>+static struct img_hash_drv img_hash = {
>+	.dev_list = LIST_HEAD_INIT(img_hash.dev_list),
>+	.lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
>+};
>+
>+static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
>+{
>+	return readl_relaxed(hdev->io_base + offset);
>+}
>+
>+static inline void img_hash_write(struct img_hash_dev *hdev,
>+				  u32 offset, u32 value)
>+{
>+	writel_relaxed(value, hdev->io_base + offset);
>+}
>+
>+static inline u32 img_hash_read_result_queue(struct img_hash_dev
>*hdev) +{
>+	return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
>+}
>+
>+static void img_hash_start(struct img_hash_dev *hdev, bool dma)
>+{
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+	u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
>+
>+	if (ctx->flags & DRIVER_FLAGS_MD5)
>+		cr |= CR_CONTROL_ALGO_MD5;
>+	else if (ctx->flags & DRIVER_FLAGS_SHA1)
>+		cr |= CR_CONTROL_ALGO_SHA1;
>+	else if (ctx->flags & DRIVER_FLAGS_SHA224)
>+		cr |= CR_CONTROL_ALGO_SHA224;
>+	else if (ctx->flags & DRIVER_FLAGS_SHA256)
>+		cr |= CR_CONTROL_ALGO_SHA256;
>+	dev_dbg(hdev->dev, "Starting hash process\n");
>+	img_hash_write(hdev, CR_CONTROL, cr);
>+
>+	/*
>+	 * The hardware block requires two cycles between writing the 
control
>+	 * register and writing the first word of data in non DMA mode, 
to
>+	 * ensure the first data write is not grouped in burst with the
>control +	 * register write a read is issued to 'flush' the bus.
>+	 */
>+	if (!dma)
>+		img_hash_read(hdev, CR_CONTROL);
>+}
>+
>+static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
>+			     size_t length, int final)
>+{
>+	u32 count, len32;
>+	const u32 *buffer = (const u32 *)buf;
>+
>+	dev_dbg(hdev->dev, "xmit_cpu:  length: %u bytes\n", length);
>+
>+	if (final)
>+		hdev->flags |= DRIVER_FLAGS_FINAL;
>+
>+	len32 = DIV_ROUND_UP(length, sizeof(u32));
>+
>+	for (count = 0; count < len32; count++)
>+		writel_relaxed(buffer[count], hdev->cpu_addr);
>+
>+	return -EINPROGRESS;
>+}
>+
>+static void img_hash_dma_callback(void *data)
>+{
>+	struct img_hash_dev *hdev = (struct img_hash_dev *)data;
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+	if (ctx->bufcnt) {
>+		img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
>+		ctx->bufcnt = 0;
>+	}
>+	if (ctx->sg)
>+		tasklet_schedule(&hdev->dma_task);
>+}
>+
>+static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct
>scatterlist *sg) +{
>+	struct dma_async_tx_descriptor *desc;
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+	ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
>+	if (ctx->dma_ct == 0) {
>+		dev_err(hdev->dev, "Invalid DMA sg\n");
>+		hdev->err = -EINVAL;
>+		return -EINVAL;
>+	}
>+
>+	desc = dmaengine_prep_slave_sg(hdev->dma_lch,
>+					sg,
>+					ctx->dma_ct,
>+					DMA_MEM_TO_DEV,
>+					DMA_PREP_INTERRUPT | 
DMA_CTRL_ACK);
>+	if (!desc) {
>+		dev_err(hdev->dev, "Null DMA descriptor\n");
>+		hdev->err = -EINVAL;
>+		dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
>+		return -EINVAL;
>+	}
>+	desc->callback = img_hash_dma_callback;
>+	desc->callback_param = hdev;
>+	dmaengine_submit(desc);
>+	dma_async_issue_pending(hdev->dma_lch);
>+
>+	return 0;
>+}
>+
>+static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
>+{
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+	int ret = 0;
>+
>+	ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx-
>sg),
>+					ctx->buffer, hdev->req->nbytes);
>+
>+	ctx->total = hdev->req->nbytes;
>+	ctx->bufcnt = 0;
>+
>+	hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
>+
>+	img_hash_start(hdev, false);
>+
>+	if (ctx->total)
>+		ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 
1);
>+	else
>+		ret = 0;
>+
>+	return ret;
>+}
>+
>+static int img_hash_finish(struct ahash_request *req)
>+{
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+
>+	if (!req->result)
>+		return -EINVAL;
>+
>+	memcpy(req->result, ctx->digest, ctx->digsize);
>+
>+	return 0;
>+}
>+
>+static void img_hash_copy_hash(struct ahash_request *req)
>+{
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+	u32 *hash = (u32 *)ctx->digest;
>+	int i;
>+
>+	for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
>+		hash[i] = img_hash_read_result_queue(ctx->hdev);
>+}
>+
>+static void img_hash_finish_req(struct ahash_request *req, int err)
>+{
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+	struct img_hash_dev *hdev =  ctx->hdev;
>+
>+	if (!err) {
>+		img_hash_copy_hash(req);
>+		if (DRIVER_FLAGS_FINAL & hdev->flags)
>+			err = img_hash_finish(req);
>+	} else {
>+		dev_warn(hdev->dev, "Hash failed with error %d\n", err);
>+		ctx->flags |= DRIVER_FLAGS_ERROR;
>+	}
>+
>+	hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | 
DRIVER_FLAGS_OUTPUT_READY |
>+		DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | 
DRIVER_FLAGS_FINAL); +
>+	if (req->base.complete)
>+		req->base.complete(&req->base, err);
>+}
>+
>+static int img_hash_write_via_dma(struct img_hash_dev *hdev)
>+{
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+	img_hash_start(hdev, true);
>+
>+	dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
>+
>+	if (!ctx->total)
>+		hdev->flags |= DRIVER_FLAGS_FINAL;
>+
>+	hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
>+
>+	tasklet_schedule(&hdev->dma_task);
>+
>+	return -EINPROGRESS;
>+}
>+
>+static int img_hash_dma_init(struct img_hash_dev *hdev)
>+{
>+	struct dma_slave_config dma_conf;
>+	int err = -EINVAL;
>+
>+	hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
>+	if (!hdev->dma_lch) {
>+		dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.
\n");
>+		return -EBUSY;
>+	}
>+	dma_conf.direction = DMA_MEM_TO_DEV;
>+	dma_conf.dst_addr = hdev->bus_addr;
>+	dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
>+	dma_conf.dst_maxburst = 16;
>+	dma_conf.device_fc = false;
>+
>+	err = dmaengine_slave_config(hdev->dma_lch,  &dma_conf);
>+
>+	if (err) {
>+		dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
>+		dma_release_channel(hdev->dma_lch);
>+		return err;
>+	}
>+	return 0;
>+}
>+
>+static void img_hash_dma_task(unsigned long d)
>+{
>+	struct img_hash_dev *hdev = (struct img_hash_dev *)d;
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+	u8 *addr;
>+	size_t nbytes, bleft, wsend, len, tbc;
>+	struct scatterlist tsg;
>+
>+	if (!ctx->sg)
>+		return;
>+
>+	addr = sg_virt(ctx->sg);
>+	nbytes = ctx->sg->length - ctx->offset;
>+
>+	/* The hash accelerator does not support a data valid mask. This
>means +	 * that if each dma (i.e. per page) is not a multiple of 
4
>bytes, the +	 * padding bytes in the last word written by that dma
>would erroneously +	 * be included in the hash. To avoid this we 
round
>down the transfer, +	 * and add the excess to the start of the next
>dma. It does not matter +	 * that the final dma may not be a 
multiple
>of 4 bytes as the hashing +	 * block is programmed to accept the
>correct number of bytes. +	 */
>+
>+	bleft = nbytes % 4;
>+	wsend = (nbytes / 4);
>+
>+	if (wsend) {
>+		sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
>+		if (img_hash_xmit_dma(hdev, &tsg)) {
>+			dev_err(hdev->dev, "DMA failed, falling back to 
CPU");
>+			ctx->flags |= DRIVER_FLAGS_CPU;
>+			hdev->err = 0;
>+			img_hash_xmit_cpu(hdev, addr + ctx->offset,
>+					  wsend * 4, 0);
>+			ctx->sent += wsend * 4;
>+			wsend = 0;
>+		} else {
>+			ctx->sent += wsend * 4;
>+		}
>+	}
>+
>+	if (bleft) {
>+		ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx-
>nents,
>+						 ctx->buffer, bleft, 
ctx->sent);
>+		tbc = 0;
>+		ctx->sg = sg_next(ctx->sg);
>+		while (ctx->sg && (ctx->bufcnt < 4)) {
>+			len = ctx->sg->length;
>+			if (likely(len > (4 - ctx->bufcnt)))
>+				len = 4 - ctx->bufcnt;
>+			tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx-
>nents,
>+						 ctx->buffer + ctx-
>bufcnt, len,
>+					ctx->sent + ctx->bufcnt);
>+			ctx->bufcnt += tbc;
>+			if (tbc >= ctx->sg->length) {
>+				ctx->sg = sg_next(ctx->sg);
>+				tbc = 0;
>+			}
>+		}
>+
>+		ctx->sent += ctx->bufcnt;
>+		ctx->offset = tbc;
>+
>+		if (!wsend)
>+			img_hash_dma_callback(hdev);
>+	} else {
>+		ctx->offset = 0;
>+		ctx->sg = sg_next(ctx->sg);
>+	}
>+}
>+
>+static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
>+{
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
>+
>+	if (ctx->flags & DRIVER_FLAGS_SG)
>+		dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, 
DMA_TO_DEVICE);
>+
>+	return 0;
>+}
>+
>+static int img_hash_process_data(struct img_hash_dev *hdev)
>+{
>+	struct ahash_request *req = hdev->req;
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+	int err = 0;
>+
>+	ctx->bufcnt = 0;
>+
>+	if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
>+		dev_dbg(hdev->dev, "process data request(%d bytes) using 
DMA\n",
>+			req->nbytes);
>+		err = img_hash_write_via_dma(hdev);
>+	} else {
>+		dev_dbg(hdev->dev, "process data request(%d bytes) using 
CPU\n",
>+			req->nbytes);
>+		err = img_hash_write_via_cpu(hdev);
>+	}
>+	return err;
>+}
>+
>+static int img_hash_hw_init(struct img_hash_dev *hdev)
>+{
>+	unsigned long long nbits;
>+	u32 u, l;
>+	int ret;
>+
>+	ret = clk_prepare_enable(hdev->iclk);
>+	if (ret)
>+		return ret;
>+
>+	img_hash_write(hdev, CR_RESET, CR_RESET_SET);
>+	img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
>+	img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
>+
>+	nbits = (hdev->req->nbytes << 3);
>+	u = nbits >> 32;
>+	l = nbits;
>+	img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
>+	img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
>+
>+	if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
>+		hdev->flags |= DRIVER_FLAGS_INIT;
>+		hdev->err = 0;
>+	}
>+	dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
>+	return 0;
>+}
>+
>+static int img_hash_init(struct ahash_request *req)
>+{
>+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+	rctx->fallback_req.base.flags =	req->base.flags
>+		& CRYPTO_TFM_REQ_MAY_SLEEP;
>+
>+	return crypto_ahash_init(&rctx->fallback_req);
>+}
>+
>+static int img_hash_handle_queue(struct img_hash_dev *hdev,
>+				 struct ahash_request *req)
>+{
>+	struct crypto_async_request *async_req, *backlog;
>+	struct img_hash_request_ctx *ctx;
>+	unsigned long flags;
>+	int err = 0, res = 0;
>+
>+	spin_lock_irqsave(&hdev->lock, flags);
>+
>+	if (req)
>+		res = ahash_enqueue_request(&hdev->queue, req);
>+
>+	if (DRIVER_FLAGS_BUSY & hdev->flags) {
>+		spin_unlock_irqrestore(&hdev->lock, flags);
>+		return res;
>+	}
>+
>+	backlog = crypto_get_backlog(&hdev->queue);
>+	async_req = crypto_dequeue_request(&hdev->queue);
>+	if (async_req)
>+		hdev->flags |= DRIVER_FLAGS_BUSY;
>+
>+	spin_unlock_irqrestore(&hdev->lock, flags);
>+
>+	if (!async_req)
>+		return res;
>+
>+	if (backlog)
>+		backlog->complete(backlog, -EINPROGRESS);
>+
>+	req = ahash_request_cast(async_req);
>+	hdev->req = req;
>+
>+	ctx = ahash_request_ctx(req);
>+
>+	dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
>+		 ctx->op, req->nbytes);
>+
>+	err = img_hash_hw_init(hdev);
>+
>+	if (!err)
>+		err = img_hash_process_data(hdev);
>+
>+	if (err != -EINPROGRESS) {
>+		/* done_task will not finish so do it here */
>+		img_hash_finish_req(req, err);
>+	}
>+	return res;
>+}
>+
>+static int img_hash_update(struct ahash_request *req)
>+{
>+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+	rctx->fallback_req.base.flags = req->base.flags
>+		& CRYPTO_TFM_REQ_MAY_SLEEP;
>+	rctx->fallback_req.nbytes = req->nbytes;
>+	rctx->fallback_req.src = req->src;
>+
>+	return crypto_ahash_update(&rctx->fallback_req);
>+}
>+
>+static int img_hash_final(struct ahash_request *req)
>+{
>+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+	rctx->fallback_req.base.flags = req->base.flags
>+		& CRYPTO_TFM_REQ_MAY_SLEEP;
>+	rctx->fallback_req.result = req->result;
>+
>+	return crypto_ahash_final(&rctx->fallback_req);
>+}
>+
>+static int img_hash_finup(struct ahash_request *req)
>+{
>+	struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
>+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+	struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
>+
>+	ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
>+	rctx->fallback_req.base.flags = req->base.flags
>+		& CRYPTO_TFM_REQ_MAY_SLEEP;
>+	rctx->fallback_req.nbytes = req->nbytes;
>+	rctx->fallback_req.src = req->src;
>+	rctx->fallback_req.result = req->result;
>+
>+	return crypto_ahash_finup(&rctx->fallback_req);
>+}
>+
>+static int img_hash_digest(struct ahash_request *req)
>+{
>+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
>+	struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
>+	struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
>+	struct img_hash_dev *hdev = NULL;
>+	struct img_hash_dev *tmp;
>+	int err;
>+
>+	spin_lock_bh(&img_hash.lock);
>+	if (!tctx->hdev) {
>+		list_for_each_entry(tmp, &img_hash.dev_list, list) {
>+			hdev = tmp;
>+			break;
>+		}
>+		tctx->hdev = hdev;
>+
>+	} else {
>+		hdev = tctx->hdev;
>+	}
>+
>+	spin_unlock_bh(&img_hash.lock);
>+	ctx->hdev = hdev;
>+	ctx->flags = 0;
>+	ctx->digsize = crypto_ahash_digestsize(tfm);
>+
>+	switch (ctx->digsize) {
>+	case SHA1_DIGEST_SIZE:
>+		ctx->flags |= DRIVER_FLAGS_SHA1;
>+		break;
>+	case SHA256_DIGEST_SIZE:
>+		ctx->flags |= DRIVER_FLAGS_SHA256;
>+		break;
>+	case SHA224_DIGEST_SIZE:
>+		ctx->flags |= DRIVER_FLAGS_SHA224;
>+		break;
>+	case MD5_DIGEST_SIZE:
>+		ctx->flags |= DRIVER_FLAGS_MD5;
>+		break;
>+	default:
>+		return -EINVAL;
>+	}
>+
>+	ctx->bufcnt = 0;
>+	ctx->offset = 0;
>+	ctx->sent = 0;
>+	ctx->total = req->nbytes;
>+	ctx->sg = req->src;
>+	ctx->sgfirst = req->src;
>+	ctx->nents = sg_nents(ctx->sg);
>+
>+	err = img_hash_handle_queue(tctx->hdev, req);
>+
>+	return err;
>+}
>+
>+static int img_hash_cra_init(struct crypto_tfm *tfm)
>+{
>+	struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
>+	const char *alg_name = crypto_tfm_alg_name(tfm);
>+	int err = -ENOMEM;
>+
>+	ctx->fallback = crypto_alloc_ahash(alg_name, 0,
>+					   CRYPTO_ALG_NEED_FALLBACK);
>+	if (IS_ERR(ctx->fallback)) {
>+		pr_err("img_hash: Could not load fallback driver.\n");
>+		err = PTR_ERR(ctx->fallback);
>+		goto err;
>+	}
>+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
>+				 sizeof(struct img_hash_request_ctx) +
>+				 IMG_HASH_DMA_THRESHOLD);
>+
>+	return 0;
>+
>+err:
>+	return err;
>+}
>+
>+static void img_hash_cra_exit(struct crypto_tfm *tfm)
>+{
>+	struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
>+
>+	crypto_free_ahash(tctx->fallback);
>+}
>+
>+static irqreturn_t img_irq_handler(int irq, void *dev_id)
>+{
>+	struct img_hash_dev *hdev = dev_id;
>+	u32 reg;
>+
>+	reg = img_hash_read(hdev, CR_INTSTAT);
>+	img_hash_write(hdev, CR_INTCLEAR, reg);
>+
>+	if (reg & CR_INT_NEW_RESULTS_SET) {
>+		dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
>+		if (DRIVER_FLAGS_BUSY & hdev->flags) {
>+			hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
>+			if (!(DRIVER_FLAGS_CPU & hdev->flags))
>+				hdev->flags |= DRIVER_FLAGS_DMA_READY;
>+			tasklet_schedule(&hdev->done_task);
>+		} else {
>+			dev_warn(hdev->dev,
>+				 "HASH interrupt when no active 
requests.\n");
>+		}
>+	} else if (reg & CR_INT_RESULTS_AVAILABLE) {
>+		dev_warn(hdev->dev,
>+			 "IRQ triggered before the hash had 
completed\n");
>+	} else if (reg & CR_INT_RESULT_READ_ERR) {
>+		dev_warn(hdev->dev,
>+			 "Attempt to read from an empty result 
queue\n");
>+	} else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
>+		dev_warn(hdev->dev,
>+			 "Data written before the hardware was 
configured\n");
>+	}
>+	return IRQ_HANDLED;
>+}
>+
>+static struct ahash_alg img_algs[] = {
>+	{
>+		.init = img_hash_init,
>+		.update = img_hash_update,
>+		.final = img_hash_final,
>+		.finup = img_hash_finup,
>+		.digest = img_hash_digest,
>+		.halg = {
>+			.digestsize = MD5_DIGEST_SIZE,
>+			.base = {
>+				.cra_name = "md5",
>+				.cra_driver_name = "img-md5",
>+				.cra_priority = 301,

Just curious: why do you use such odd priorities of 301 or 3000? IMHO, 
all you need is a priority of more than 100 to "beat" the generic C 
prios. Maybe you also need to beat the standard assembler 
implementations which are routinely at 200 for hashes. So, a prio of 300 
should suffice, should it not?
>+				.cra_flags =
>+				CRYPTO_ALG_ASYNC |
>+				CRYPTO_ALG_NEED_FALLBACK,
>+				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
>+				.cra_ctxsize = sizeof(struct 
img_hash_ctx),
>+				.cra_init = img_hash_cra_init,
>+				.cra_exit = img_hash_cra_exit,
>+				.cra_module = THIS_MODULE,
>+			}
>+		}
>+	},
>+	{
>+		.init = img_hash_init,
>+		.update = img_hash_update,
>+		.final = img_hash_final,
>+		.finup = img_hash_finup,
>+		.digest = img_hash_digest,
>+		.halg = {
>+			.digestsize = SHA1_DIGEST_SIZE,
>+			.base = {
>+				.cra_name = "sha1",
>+				.cra_driver_name = "img-sha1",
>+				.cra_priority = 3000,
>+				.cra_flags =
>+				CRYPTO_ALG_ASYNC |
>+				CRYPTO_ALG_NEED_FALLBACK,
>+				.cra_blocksize = SHA1_BLOCK_SIZE,
>+				.cra_ctxsize = sizeof(struct 
img_hash_ctx),
>+				.cra_init = img_hash_cra_init,
>+				.cra_exit = img_hash_cra_exit,
>+				.cra_module = THIS_MODULE,
>+			}
>+		}
>+	},
>+	{
>+		.init = img_hash_init,
>+		.update = img_hash_update,
>+		.final = img_hash_final,
>+		.finup = img_hash_finup,
>+		.digest = img_hash_digest,
>+		.halg = {
>+			.digestsize = SHA224_DIGEST_SIZE,
>+			.base = {
>+				.cra_name = "sha224",
>+				.cra_driver_name = "img-sha224",
>+				.cra_priority = 3000,
>+				.cra_flags =
>+				CRYPTO_ALG_ASYNC |
>+				CRYPTO_ALG_NEED_FALLBACK,
>+				.cra_blocksize = SHA224_BLOCK_SIZE,
>+				.cra_ctxsize = sizeof(struct 
img_hash_ctx),
>+				.cra_init = img_hash_cra_init,
>+				.cra_exit = img_hash_cra_exit,
>+				.cra_module = THIS_MODULE,
>+			}
>+		}
>+	},
>+	{
>+		.init = img_hash_init,
>+		.update = img_hash_update,
>+		.final = img_hash_final,
>+		.finup = img_hash_finup,
>+		.digest = img_hash_digest,
>+		.halg = {
>+			.digestsize = SHA256_DIGEST_SIZE,
>+			.base = {
>+				.cra_name = "sha256",
>+				.cra_driver_name = "img-sha256",
>+				.cra_priority = 301,
>+				.cra_flags =
>+				CRYPTO_ALG_ASYNC |
>+				CRYPTO_ALG_NEED_FALLBACK,
>+				.cra_blocksize = SHA256_BLOCK_SIZE,
>+				.cra_ctxsize = sizeof(struct 
img_hash_ctx),
>+				.cra_init = img_hash_cra_init,
>+				.cra_exit = img_hash_cra_exit,
>+				.cra_module = THIS_MODULE,
>+			}
>+		}
>+	}
>+};
>+
>+static int img_register_algs(struct img_hash_dev *hdev)
>+{
>+	int i, err;
>+
>+	for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
>+		err = crypto_register_ahash(&img_algs[i]);
>+		if (err)
>+			goto err_reg;
>+	}
>+	return 0;
>+
>+err_reg:
>+	for (; i--; )
>+		crypto_unregister_ahash(&img_algs[i]);
>+
>+	return err;
>+}
>+
>+static int img_unregister_algs(struct img_hash_dev *hdev)
>+{
>+	int i;
>+
>+	for (i = 0; i < ARRAY_SIZE(img_algs); i++)
>+		crypto_unregister_ahash(&img_algs[i]);
>+	return 0;
>+}
>+
>+static void img_hash_done_task(unsigned long data)
>+{
>+	struct img_hash_dev *hdev = (struct img_hash_dev *)data;
>+	int err = 0;
>+
>+	if (hdev->err == -EINVAL) {
>+		err = hdev->err;
>+		goto finish;
>+	}
>+
>+	if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
>+		img_hash_handle_queue(hdev, NULL);
>+		return;
>+	}
>+
>+	if (DRIVER_FLAGS_CPU & hdev->flags) {
>+		if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
>+			hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
>+			goto finish;
>+		}
>+	} else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
>+		if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
>+			hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
>+			img_hash_write_via_dma_stop(hdev);
>+			if (hdev->err) {
>+				err = hdev->err;
>+				goto finish;
>+			}
>+		}
>+		if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
>+			hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
>+					DRIVER_FLAGS_OUTPUT_READY);
>+			goto finish;
>+		}
>+	}
>+	return;
>+
>+finish:
>+	img_hash_finish_req(hdev->req, err);
>+}
>+
>+static const struct of_device_id img_hash_match[] = {
>+	{ .compatible = "img,hash-accelerator" },
>+	{}
>+};
>+MODULE_DEVICE_TABLE(of, img_hash_match)
>+
>+static int img_hash_probe(struct platform_device *pdev)
>+{
>+	struct img_hash_dev *hdev;
>+	struct device *dev = &pdev->dev;
>+	struct resource *hash_res;
>+	int err;
>+
>+	hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
>+	if (hdev == NULL) {
>+		err = -ENOMEM;
>+		goto sha_dev_err;
>+	}
>+	spin_lock_init(&hdev->lock);
>+
>+	hdev->dev = dev;
>+
>+	platform_set_drvdata(pdev, hdev);
>+
>+	INIT_LIST_HEAD(&hdev->list);
>+
>+	tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned
>long)hdev); +	tasklet_init(&hdev->dma_task, img_hash_dma_task,
>(unsigned long)hdev); +
>+	crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
>+
>+	/* Register bank */
>+	hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>+
>+	hdev->io_base = devm_ioremap_resource(dev, hash_res);
>+	if (IS_ERR(hdev->io_base)) {
>+		dev_err(dev, "can't ioremap\n");
>+		err = PTR_ERR(hdev->io_base);
>+		goto hash_io_err;
>+	}
>+
>+	/* Write port (DMA or CPU) */
>+	hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
>+	if (!hash_res) {
>+		dev_err(dev, "no write port resource info\n");
>+		err = -ENODEV;
>+		goto res_err;
>+	}
>+	hdev->bus_addr = hash_res->start;
>+	hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
>+	if (IS_ERR(hdev->cpu_addr)) {
>+		dev_err(dev, "can't ioremap write port\n");
>+		err = PTR_ERR(hdev->cpu_addr);
>+		goto res_err;
>+	}
>+
>+	hdev->irq = platform_get_irq(pdev, 0);
>+	if (hdev->irq < 0) {
>+		dev_err(dev, "no IRQ resource info\n");
>+		err = hdev->irq;
>+		goto res_err;
>+	}
>+
>+	err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
>+			       dev_name(dev), hdev);
>+	if (err) {
>+		dev_err(dev, "unable to request %s irq\n", 
dev_name(dev));
>+		goto res_err;
>+	}
>+	dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
>+
>+	hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
>+	if (IS_ERR(hdev->iclk)) {
>+		dev_err(dev, "clock initialization failed.\n");
>+		err = PTR_ERR(hdev->iclk);
>+		goto res_err;
>+	}
>+
>+	hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
>+	if (IS_ERR(hdev->fclk)) {
>+		dev_err(dev, "clock initialization failed.\n");
>+		err = PTR_ERR(hdev->fclk);
>+		goto res_err;
>+	}
>+
>+	err = img_hash_dma_init(hdev);
>+	if (err)
>+		goto err_dma;
>+
>+	dev_dbg(dev, "using %s for DMA transfers\n",
>+		dma_chan_name(hdev->dma_lch));
>+
>+	spin_lock(&img_hash.lock);
>+	list_add_tail(&hdev->list, &img_hash.dev_list);
>+	spin_unlock(&img_hash.lock);
>+
>+	err = img_register_algs(hdev);
>+	if (err)
>+		goto err_algs;
>+	dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr, 
0x4);
>+	dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator
>initialized\n"); +
>+	return 0;
>+
>+err_algs:
>+	spin_lock(&img_hash.lock);
>+	list_del(&hdev->list);
>+	spin_unlock(&img_hash.lock);
>+	dma_release_channel(hdev->dma_lch);
>+err_dma:
>+hash_io_err:
>+	devm_clk_put(dev, hdev->iclk);
>+	devm_clk_put(dev, hdev->fclk);
>+res_err:
>+	tasklet_kill(&hdev->done_task);
>+	tasklet_kill(&hdev->dma_task);
>+sha_dev_err:
>+	dev_err(dev, "initialization failed.\n");
>+	return err;
>+}
>+
>+static int img_hash_remove(struct platform_device *pdev)
>+{
>+	static struct img_hash_dev *hdev;
>+
>+	hdev = platform_get_drvdata(pdev);
>+	spin_lock(&img_hash.lock);
>+	list_del(&hdev->list);
>+	spin_unlock(&img_hash.lock);
>+
>+	img_unregister_algs(hdev);
>+
>+	tasklet_kill(&hdev->done_task);
>+	tasklet_kill(&hdev->dma_task);
>+
>+	dma_release_channel(hdev->dma_lch);
>+
>+	clk_disable_unprepare(hdev->iclk);
>+	clk_disable_unprepare(hdev->fclk);
>+	return 0;
>+}
>+
>+static struct platform_driver img_hash_driver = {
>+	.probe		= img_hash_probe,
>+	.remove		= img_hash_remove,
>+	.driver		= {
>+		.name	= "img-hash-accelerator",
>+		.of_match_table	= of_match_ptr(img_hash_match),
>+	}
>+};
>+module_platform_driver(img_hash_driver);
>+
>+MODULE_LICENSE("GPL v2");
>+MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
>+MODULE_AUTHOR("Will Thomas.");
>+MODULE_AUTHOR("James Hartley.");


Ciao
Stephan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-09  6:35   ` Stephan Mueller
@ 2015-03-10  9:37     ` Herbert Xu
  2015-03-10  9:53       ` James Hartley
  2015-03-10  9:54       ` James Hartley
  0 siblings, 2 replies; 13+ messages in thread
From: Herbert Xu @ 2015-03-10  9:37 UTC (permalink / raw)
  To: Stephan Mueller
  Cc: James Hartley, robh+dt, pawel.moll, mark.rutland, galak,
	andrew.bresticker, ezequiel.garcia, linux-crypto

On Mon, Mar 09, 2015 at 07:35:57AM +0100, Stephan Mueller wrote:
>
> >+static struct ahash_alg img_algs[] = {
> >+	{
> >+		.init = img_hash_init,
> >+		.update = img_hash_update,
> >+		.final = img_hash_final,
> >+		.finup = img_hash_finup,
> >+		.digest = img_hash_digest,
> >+		.halg = {
> >+			.digestsize = MD5_DIGEST_SIZE,
> >+			.base = {
> >+				.cra_name = "md5",
> >+				.cra_driver_name = "img-md5",
> >+				.cra_priority = 301,
> 
> Just curious: why do you use such odd priorities of 301 or 3000? IMHO, 
> all you need is a priority of more than 100 to "beat" the generic C 
> prios. Maybe you also need to beat the standard assembler 
> implementations which are routinely at 200 for hashes. So, a prio of 300 
> should suffice, should it not?

James, can you answer Stephan's question please?

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-10  9:37     ` Herbert Xu
@ 2015-03-10  9:53       ` James Hartley
  2015-03-10  9:54       ` James Hartley
  1 sibling, 0 replies; 13+ messages in thread
From: James Hartley @ 2015-03-10  9:53 UTC (permalink / raw)
  To: Herbert Xu, Stephan Mueller
  Cc: robh+dt, pawel.moll, mark.rutland, galak, andrew.bresticker,
	Ezequiel Garcia, linux-crypto


> -----Original Message-----
> From: Herbert Xu [mailto:herbert@gondor.apana.org.au]
> Sent: 10 March 2015 09:37
> To: Stephan Mueller
> Cc: James Hartley; robh+dt@kernel.org; pawel.moll@arm.com;
> mark.rutland@arm.com; galak@codeaurora.org;
> andrew.bresticker@chromium.org; Ezequiel Garcia; linux-
> crypto@vger.kernel.org
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
> 
> On Mon, Mar 09, 2015 at 07:35:57AM +0100, Stephan Mueller wrote:
> >
> > >+static struct ahash_alg img_algs[] = {
> > >+	{
> > >+		.init = img_hash_init,
> > >+		.update = img_hash_update,
> > >+		.final = img_hash_final,
> > >+		.finup = img_hash_finup,
> > >+		.digest = img_hash_digest,
> > >+		.halg = {
> > >+			.digestsize = MD5_DIGEST_SIZE,
> > >+			.base = {
> > >+				.cra_name = "md5",
> > >+				.cra_driver_name = "img-md5",
> > >+				.cra_priority = 301,
> >
> > Just curious: why do you use such odd priorities of 301 or 3000? IMHO,
> > all you need is a priority of more than 100 to "beat" the generic C
> > prios. Maybe you also need to beat the standard assembler
> > implementations which are routinely at 200 for hashes. So, a prio of
> > 300 should suffice, should it not?
> 
> James, can you answer Stephan's question please?

Hi Herbert, and Stephan, 

The difficulty here is that the driver was written by a summer placement student who has since left the company, and despite searching our internal commit logs I'm unable to find any reason why 301 and 3000 are used.  I am happy to set them to 300 if that is a sensible figure to use.  

Thanks for the review Stephan!

> 
> Thanks,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page:
> http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

James.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-10  9:37     ` Herbert Xu
  2015-03-10  9:53       ` James Hartley
@ 2015-03-10  9:54       ` James Hartley
  2015-03-10  9:55         ` Herbert Xu
  1 sibling, 1 reply; 13+ messages in thread
From: James Hartley @ 2015-03-10  9:54 UTC (permalink / raw)
  To: Herbert Xu, Stephan Mueller
  Cc: robh+dt, pawel.moll, mark.rutland, galak,
	Andrew Bresticker (abrestic@chromium.org),
	Ezequiel Garcia, linux-crypto

Resend with correct email address for Andrew Bresticker.

> -----Original Message-----
> From: James Hartley
> Sent: 10 March 2015 09:53
> To: 'Herbert Xu'; Stephan Mueller
> Cc: robh+dt@kernel.org; pawel.moll@arm.com; mark.rutland@arm.com;
> galak@codeaurora.org; andrew.bresticker@chromium.org; Ezequiel Garcia;
> linux-crypto@vger.kernel.org
> Subject: RE: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
> 
> 
> > -----Original Message-----
> > From: Herbert Xu [mailto:herbert@gondor.apana.org.au]
> > Sent: 10 March 2015 09:37
> > To: Stephan Mueller
> > Cc: James Hartley; robh+dt@kernel.org; pawel.moll@arm.com;
> > mark.rutland@arm.com; galak@codeaurora.org;
> > andrew.bresticker@chromium.org; Ezequiel Garcia; linux-
> > crypto@vger.kernel.org
> > Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> > accelerator
> >
> > On Mon, Mar 09, 2015 at 07:35:57AM +0100, Stephan Mueller wrote:
> > >
> > > >+static struct ahash_alg img_algs[] = {
> > > >+	{
> > > >+		.init = img_hash_init,
> > > >+		.update = img_hash_update,
> > > >+		.final = img_hash_final,
> > > >+		.finup = img_hash_finup,
> > > >+		.digest = img_hash_digest,
> > > >+		.halg = {
> > > >+			.digestsize = MD5_DIGEST_SIZE,
> > > >+			.base = {
> > > >+				.cra_name = "md5",
> > > >+				.cra_driver_name = "img-md5",
> > > >+				.cra_priority = 301,
> > >
> > > Just curious: why do you use such odd priorities of 301 or 3000? IMHO,
> > > all you need is a priority of more than 100 to "beat" the generic C
> > > prios. Maybe you also need to beat the standard assembler
> > > implementations which are routinely at 200 for hashes. So, a prio of
> > > 300 should suffice, should it not?
> >
> > James, can you answer Stephan's question please?
> 
> Hi Herbert, and Stephan,
> 
> The difficulty here is that the driver was written by a summer placement
> student who has since left the company, and despite searching our internal
> commit logs I'm unable to find any reason why 301 and 3000 are used.  I am
> happy to set them to 300 if that is a sensible figure to use.
> 
> Thanks for the review Stephan!
> 
> >
> > Thanks,
> > --
> > Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page:
> > http://gondor.apana.org.au/~herbert/
> > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> 
> James.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-10  9:54       ` James Hartley
@ 2015-03-10  9:55         ` Herbert Xu
  2015-03-10 12:30           ` James Hartley
  0 siblings, 1 reply; 13+ messages in thread
From: Herbert Xu @ 2015-03-10  9:55 UTC (permalink / raw)
  To: James Hartley
  Cc: Stephan Mueller, robh+dt, pawel.moll, mark.rutland, galak,
	Andrew Bresticker (abrestic@chromium.org),
	Ezequiel Garcia, linux-crypto

On Tue, Mar 10, 2015 at 09:54:49AM +0000, James Hartley wrote:
>
> > Hi Herbert, and Stephan,
> > 
> > The difficulty here is that the driver was written by a summer placement
> > student who has since left the company, and despite searching our internal
> > commit logs I'm unable to find any reason why 301 and 3000 are used.  I am
> > happy to set them to 300 if that is a sensible figure to use.

OK please resend the patches with that change James.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-10  9:55         ` Herbert Xu
@ 2015-03-10 12:30           ` James Hartley
  0 siblings, 0 replies; 13+ messages in thread
From: James Hartley @ 2015-03-10 12:30 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Stephan Mueller, robh+dt, pawel.moll, mark.rutland, galak,
	Andrew Bresticker (abrestic@chromium.org),
	Ezequiel Garcia, linux-crypto


> -----Original Message-----
> From: linux-crypto-owner@vger.kernel.org [mailto:linux-crypto-
> owner@vger.kernel.org] On Behalf Of Herbert Xu
> Sent: 10 March 2015 09:56
> To: James Hartley
> Cc: Stephan Mueller; robh+dt@kernel.org; pawel.moll@arm.com;
> mark.rutland@arm.com; galak@codeaurora.org; Andrew Bresticker
> (abrestic@chromium.org); Ezequiel Garcia; linux-crypto@vger.kernel.org
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
> 
> On Tue, Mar 10, 2015 at 09:54:49AM +0000, James Hartley wrote:
> >
> > > Hi Herbert, and Stephan,
> > >
> > > The difficulty here is that the driver was written by a summer
> > > placement student who has since left the company, and despite
> > > searching our internal commit logs I'm unable to find any reason why
> > > 301 and 3000 are used.  I am happy to set them to 300 if that is a
> sensible figure to use.
> 
> OK please resend the patches with that change James.

Will do, I want to address Andrew Bresticker's review comments first though.

Thanks, 
James

> 
> Thanks,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page:
> http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> --
> To unsubscribe from this list: send the line "unsubscribe linux-crypto" in the
> body of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-10 18:02       ` Andrew Bresticker
@ 2015-03-12  0:13         ` James Hartley
  0 siblings, 0 replies; 13+ messages in thread
From: James Hartley @ 2015-03-12  0:13 UTC (permalink / raw)
  To: Andrew Bresticker; +Cc: linux-crypto

Hi Andrew, 

> -----Original Message-----
> From: abrestic@google.com [mailto:abrestic@google.com] On Behalf Of
> Andrew Bresticker
> Sent: 10 March 2015 18:02
> To: James Hartley
> Cc: linux-crypto@vger.kernel.org
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
> 
> Hi James,
> 
> >> > +static irqreturn_t img_irq_handler(int irq, void *dev_id) {
> >> > +       struct img_hash_dev *hdev = dev_id;
> >> > +       u32 reg;
> >> > +
> >> > +       reg = img_hash_read(hdev, CR_INTSTAT);
> >> > +       img_hash_write(hdev, CR_INTCLEAR, reg);
> >> > +
> >> > +       if (reg & CR_INT_NEW_RESULTS_SET) {
> >> > +               dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
> >> > +               if (DRIVER_FLAGS_BUSY & hdev->flags) {
> >> > +                       hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
> >> > +                       if (!(DRIVER_FLAGS_CPU & hdev->flags))
> >> > +                               hdev->flags |= DRIVER_FLAGS_DMA_READY;
> >> > +                       tasklet_schedule(&hdev->done_task);
> >>
> >> Since this done_task only gets scheduled from here, why not make this
> >> a threaded IRQ handler and just do the work here instead of
> >> separating it between a hard IRQ handler and a tasklet?
> >
> > What is the advantage of doing that?  i.e is this simple case worth setting up
> an extra thread?
> 
> I believe threaded IRQ handlers are now preferred to using a hard IRQ
> handler + tasklet when possible, partly because people tend to screw up
> tasklet usage.

Fair enough, but I'd like to leave this as an enhancement for when I have some
extra bandwidth if it's not essential (same for the runtime PM).  

> 
> >> > +               err = PTR_ERR(hdev->io_base);
> >> > +               goto hash_io_err;
> >> > +       }
> >> > +
> >> > +       /* Write port (DMA or CPU) */
> >> > +       hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> >> > +       if (!hash_res) {
> >> > +               dev_err(dev, "no write port resource info\n");
> >> > +               err = -ENODEV;
> >> > +               goto res_err;
> >> > +       }
> >> > +       hdev->bus_addr = hash_res->start;
> >>
> >> Maybe set this after devm_ioremap_resource() succeeds and avoid the
> >> extra error check?
> >
> > Not quite following you here - which check are you saying can be removed?
> 
> You can remove the if (hash_res) check if you set hdev->bus_addr after
> devm_ioremap_resource().

I see what you mean - done.

> 
> Thanks,
> Andrew

Thanks, 
James.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-10 15:31     ` James Hartley
@ 2015-03-10 18:02       ` Andrew Bresticker
  2015-03-12  0:13         ` James Hartley
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Bresticker @ 2015-03-10 18:02 UTC (permalink / raw)
  To: James Hartley; +Cc: linux-crypto

Hi James,

>> > +static irqreturn_t img_irq_handler(int irq, void *dev_id) {
>> > +       struct img_hash_dev *hdev = dev_id;
>> > +       u32 reg;
>> > +
>> > +       reg = img_hash_read(hdev, CR_INTSTAT);
>> > +       img_hash_write(hdev, CR_INTCLEAR, reg);
>> > +
>> > +       if (reg & CR_INT_NEW_RESULTS_SET) {
>> > +               dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
>> > +               if (DRIVER_FLAGS_BUSY & hdev->flags) {
>> > +                       hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
>> > +                       if (!(DRIVER_FLAGS_CPU & hdev->flags))
>> > +                               hdev->flags |= DRIVER_FLAGS_DMA_READY;
>> > +                       tasklet_schedule(&hdev->done_task);
>>
>> Since this done_task only gets scheduled from here, why not make this a
>> threaded IRQ handler and just do the work here instead of separating it
>> between a hard IRQ handler and a tasklet?
>
> What is the advantage of doing that?  i.e is this simple case worth setting up an extra thread?

I believe threaded IRQ handlers are now preferred to using a hard IRQ
handler + tasklet when possible, partly because people tend to screw
up tasklet usage.

>> > +               err = PTR_ERR(hdev->io_base);
>> > +               goto hash_io_err;
>> > +       }
>> > +
>> > +       /* Write port (DMA or CPU) */
>> > +       hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
>> > +       if (!hash_res) {
>> > +               dev_err(dev, "no write port resource info\n");
>> > +               err = -ENODEV;
>> > +               goto res_err;
>> > +       }
>> > +       hdev->bus_addr = hash_res->start;
>>
>> Maybe set this after devm_ioremap_resource() succeeds and avoid the extra
>> error check?
>
> Not quite following you here - which check are you saying can be removed?

You can remove the if (hash_res) check if you set hdev->bus_addr after
devm_ioremap_resource().

Thanks,
Andrew

^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
  2015-03-09  5:42   ` [PATCH V3 1/2] crypto: Add Imagination Technologies " Andrew Bresticker
@ 2015-03-10 15:31     ` James Hartley
  2015-03-10 18:02       ` Andrew Bresticker
  0 siblings, 1 reply; 13+ messages in thread
From: James Hartley @ 2015-03-10 15:31 UTC (permalink / raw)
  To: Andrew Bresticker; +Cc: linux-crypto

Hi Andrew

> -----Original Message-----
> From: abrestic@google.com [mailto:abrestic@google.com] On Behalf Of
> Andrew Bresticker
> Sent: 09 March 2015 05:42
> To: James Hartley
> Cc: linux-crypto@vger.kernel.org
> Subject: Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash
> accelerator
> 
> Hi James,
> 
> On Thu, Mar 5, 2015 at 7:01 PM, James Hartley <james.hartley@imgtec.com>
> wrote:
> > This adds support for the Imagination Technologies hash accelerator
> > which provides hardware acceleration for
> > SHA1 SHA224 SHA256 and MD5 hashes.
> >
> > Signed-off-by: James Hartley <james.hartley@imgtec.com>
> 
> Some general comments below, I'll leave review of the crypto-specific stuff to
> Herbert.
> 
> > diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index
> > 3924f93..fb84be7 100644
> > --- a/drivers/crypto/Makefile
> > +++ b/drivers/crypto/Makefile
> > @@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
> >  obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
> >  obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
> >  obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
> > +obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
> >  obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
> >  obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
> >  obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o @@ -25,3 +26,4 @@
> > obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
> >  obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
> >  obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
> >  obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
> > +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
> 
> Unrelated change - perhaps a bad merge conflict resolution?

Yep, now resolved, thanks.

> 
> > diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c new
> > file mode 100644 index 0000000..94a3a6f
> > --- /dev/null
> > +++ b/drivers/crypto/img-hash.c
> > @@ -0,0 +1,1037 @@
> > +/*
> > + * Copyright (c) 2014 Imagination Technologies
> > + * Authors:  Will Thomas, James Hartley
> > + *
> > + * This program is free software; you can redistribute it and/or
> > +modify
> > + * it under the terms of the GNU General Public License version 2 as
> > +published
> > + * by the Free Software Foundation.
> > + *
> > + *     Interface structure taken from omap-sham driver
> > + */
> > +
> > +#include <linux/clk.h>
> > +#include <linux/dmaengine.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/kernel.h>
> > +#include <linux/module.h>
> > +#include <linux/of_device.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/scatterlist.h>
> > +
> > +#include <crypto/internal/hash.h>
> > +#include <crypto/md5.h>
> > +#include <crypto/sha.h>
> > +
> > +#define CR_RESET                       0
> > +#define CR_RESET_SET                   1
> > +#define CR_RESET_UNSET                 0
> > +
> > +#define CR_MESSAGE_LENGTH_H            0x4
> > +#define CR_MESSAGE_LENGTH_L            0x8
> > +
> > +#define CR_CONTROL                     0xc
> > +#define CR_CONTROL_BYTE_ORDER_3210     0
> > +#define CR_CONTROL_BYTE_ORDER_0123     1
> > +#define CR_CONTROL_BYTE_ORDER_2310     2
> > +#define CR_CONTROL_BYTE_ORDER_1032     3
> > +#define CR_CONTROL_BYTE_ORDER_SHIFT    8
> > +#define CR_CONTROL_ALGO_MD5    0
> > +#define CR_CONTROL_ALGO_SHA1   1
> > +#define CR_CONTROL_ALGO_SHA224 2
> > +#define CR_CONTROL_ALGO_SHA256 3
> > +
> > +#define CR_INTSTAT                     0x10
> > +#define CR_INTENAB                     0x14
> > +#define CR_INTCLEAR                    0x18
> > +#define CR_INT_RESULTS_AVAILABLE       BIT(0)
> > +#define CR_INT_NEW_RESULTS_SET         BIT(1)
> > +#define CR_INT_RESULT_READ_ERR         BIT(2)
> > +#define CR_INT_MESSAGE_WRITE_ERROR     BIT(3)
> > +#define CR_INT_STATUS                  BIT(8)
> > +
> > +#define CR_RESULT_QUEUE                0x1c
> > +#define CR_RSD0                                0x40
> > +#define CR_CORE_REV                    0x50
> > +#define CR_CORE_DES1           0x60
> > +#define CR_CORE_DES2           0x70
> > +
> > +#define DRIVER_FLAGS_BUSY              BIT(0)
> > +#define DRIVER_FLAGS_FINAL             BIT(1)
> > +#define DRIVER_FLAGS_DMA_ACTIVE                BIT(2)
> > +#define DRIVER_FLAGS_OUTPUT_READY      BIT(3)
> > +#define DRIVER_FLAGS_INIT              BIT(4)
> > +#define DRIVER_FLAGS_CPU               BIT(5)
> > +#define DRIVER_FLAGS_DMA_READY         BIT(6)
> > +#define DRIVER_FLAGS_ERROR             BIT(7)
> > +#define DRIVER_FLAGS_SG                        BIT(8)
> > +#define DRIVER_FLAGS_SHA1              BIT(18)
> > +#define DRIVER_FLAGS_SHA224            BIT(19)
> > +#define DRIVER_FLAGS_SHA256            BIT(20)
> > +#define DRIVER_FLAGS_MD5               BIT(21)
> > +
> > +#define IMG_HASH_QUEUE_LENGTH          20
> > +#define IMG_HASH_DMA_THRESHOLD         64
> > +
> > +#ifdef __LITTLE_ENDIAN
> > +#define IMG_HASH_BYTE_ORDER            (CR_CONTROL_BYTE_ORDER_3210)
> > +#else
> > +#define IMG_HASH_BYTE_ORDER            (CR_CONTROL_BYTE_ORDER_0123)
> > +#endif
> 
> Unnecessary parenthesis.

Fixed

> 
> > +struct img_hash_dev;
> > +
> > +struct img_hash_request_ctx {
> > +       struct img_hash_dev     *hdev;
> > +       u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
> > +       unsigned long           flags;
> > +       size_t                  digsize;
> > +
> > +       dma_addr_t              dma_addr;
> > +       size_t                  dma_ct;
> > +
> > +       /* sg root */
> > +       struct scatterlist      *sgfirst;
> > +       /* walk state */
> > +       struct scatterlist      *sg;
> > +       size_t                  nents;
> > +       size_t                  offset;
> > +       unsigned int            total;
> > +       size_t                  sent;
> > +
> > +       unsigned long           op;
> > +
> > +       size_t                  bufcnt;
> > +       u8 buffer[0] __aligned(sizeof(u32));
> > +       struct ahash_request    fallback_req;
> > +};
> > +
> > +struct img_hash_ctx {
> > +       struct img_hash_dev     *hdev;
> > +       unsigned long           flags;
> > +       struct crypto_ahash     *fallback;
> > +};
> > +
> > +struct img_hash_dev {
> > +       struct list_head        list;
> > +       struct device           *dev;
> > +       struct clk              *iclk;
> > +       struct clk              *fclk;
> 
> Maybe make these names a little more obvious so it's clear which clock is
> which.

Agreed - done

> 
> > +       int                     irq;
> 
> This isn't used outside of probe(), so it need not be carried around in this
> struct.

Done

> 
> > +       void __iomem            *io_base;
> > +
> > +       phys_addr_t             bus_addr;
> > +       void __iomem            *cpu_addr;
> > +
> > +       spinlock_t              lock;
> > +       int                     err;
> > +       struct tasklet_struct   done_task;
> > +       struct tasklet_struct   dma_task;
> > +
> > +       unsigned long           flags;
> > +       struct crypto_queue     queue;
> > +       struct ahash_request    *req;
> > +
> > +       struct dma_chan         *dma_lch;
> > +};
> > +
> > +struct img_hash_drv {
> > +       struct list_head dev_list;
> > +       spinlock_t lock;
> > +};
> > +
> > +static struct img_hash_drv img_hash = {
> > +       .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
> > +       .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
> > +};
> > +
> > +static inline u32 img_hash_read(struct img_hash_dev *hdev, u32
> > +offset) {
> > +       return readl_relaxed(hdev->io_base + offset); }
> > +
> > +static inline void img_hash_write(struct img_hash_dev *hdev,
> > +                                 u32 offset, u32 value) {
> > +       writel_relaxed(value, hdev->io_base + offset); }
> > +
> > +static inline u32 img_hash_read_result_queue(struct img_hash_dev
> > +*hdev) {
> > +       return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE)); }
> > +
> > +static void img_hash_start(struct img_hash_dev *hdev, bool dma) {
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > +       u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
> > +
> > +       if (ctx->flags & DRIVER_FLAGS_MD5)
> > +               cr |= CR_CONTROL_ALGO_MD5;
> > +       else if (ctx->flags & DRIVER_FLAGS_SHA1)
> > +               cr |= CR_CONTROL_ALGO_SHA1;
> > +       else if (ctx->flags & DRIVER_FLAGS_SHA224)
> > +               cr |= CR_CONTROL_ALGO_SHA224;
> > +       else if (ctx->flags & DRIVER_FLAGS_SHA256)
> > +               cr |= CR_CONTROL_ALGO_SHA256;
> > +       dev_dbg(hdev->dev, "Starting hash process\n");
> > +       img_hash_write(hdev, CR_CONTROL, cr);
> > +
> > +       /*
> > +        * The hardware block requires two cycles between writing the control
> > +        * register and writing the first word of data in non DMA mode, to
> > +        * ensure the first data write is not grouped in burst with the control
> > +        * register write a read is issued to 'flush' the bus.
> > +        */
> > +       if (!dma)
> > +               img_hash_read(hdev, CR_CONTROL); }
> > +
> > +static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
> > +                            size_t length, int final) {
> > +       u32 count, len32;
> > +       const u32 *buffer = (const u32 *)buf;
> > +
> > +       dev_dbg(hdev->dev, "xmit_cpu:  length: %u bytes\n", length);
> > +
> > +       if (final)
> > +               hdev->flags |= DRIVER_FLAGS_FINAL;
> > +
> > +       len32 = DIV_ROUND_UP(length, sizeof(u32));
> > +
> > +       for (count = 0; count < len32; count++)
> > +               writel_relaxed(buffer[count], hdev->cpu_addr);
> > +
> > +       return -EINPROGRESS;
> > +}
> > +
> > +static void img_hash_dma_callback(void *data) {
> > +       struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> > +       struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > +       if (ctx->bufcnt) {
> > +               img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
> > +               ctx->bufcnt = 0;
> > +       }
> > +       if (ctx->sg)
> > +               tasklet_schedule(&hdev->dma_task);
> > +}
> > +
> > +static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct
> > +scatterlist *sg) {
> > +       struct dma_async_tx_descriptor *desc;
> > +       struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > +       ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> > +       if (ctx->dma_ct == 0) {
> > +               dev_err(hdev->dev, "Invalid DMA sg\n");
> > +               hdev->err = -EINVAL;
> > +               return -EINVAL;
> > +       }
> > +
> > +       desc = dmaengine_prep_slave_sg(hdev->dma_lch,
> > +                                       sg,
> 
> Line this up (and the rest of this statement) with the open paren of
> dmaengine_prep_slave_sg().

Done

> 
> > +                                       ctx->dma_ct,
> > +                                       DMA_MEM_TO_DEV,
> > +                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> > +       if (!desc) {
> > +               dev_err(hdev->dev, "Null DMA descriptor\n");
> > +               hdev->err = -EINVAL;
> > +               dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> > +               return -EINVAL;
> > +       }
> > +       desc->callback = img_hash_dma_callback;
> > +       desc->callback_param = hdev;
> > +       dmaengine_submit(desc);
> > +       dma_async_issue_pending(hdev->dma_lch);
> > +
> > +       return 0;
> > +}
> > +
> > +static int img_hash_write_via_cpu(struct img_hash_dev *hdev) {
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > +       int ret = 0;
> > +
> > +       ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
> > +                                       ctx->buffer,
> > + hdev->req->nbytes);
> > +
> > +       ctx->total = hdev->req->nbytes;
> > +       ctx->bufcnt = 0;
> > +
> > +       hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
> > +
> > +       img_hash_start(hdev, false);
> > +
> > +       if (ctx->total)
> > +               ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
> > +       else
> > +               ret = 0;
> 
> The else block here (and possibly the entire check) seems unnecessary.
> img_hash_xmit_cpu() won't do anything if the length is 0.

Yes you're right, I've removed the check, and the temporary variable.

> 
> > +
> > +       return ret;
> > +}
> > +
> > +static int img_hash_finish(struct ahash_request *req) {
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > +
> > +       if (!req->result)
> > +               return -EINVAL;
> > +
> > +       memcpy(req->result, ctx->digest, ctx->digsize);
> > +
> > +       return 0;
> > +}
> > +
> > +static void img_hash_copy_hash(struct ahash_request *req) {
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > +       u32 *hash = (u32 *)ctx->digest;
> > +       int i;
> > +
> > +       for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
> > +               hash[i] = img_hash_read_result_queue(ctx->hdev);
> > +}
> > +
> > +static void img_hash_finish_req(struct ahash_request *req, int err) {
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > +       struct img_hash_dev *hdev =  ctx->hdev;
> > +
> > +       if (!err) {
> > +               img_hash_copy_hash(req);
> > +               if (DRIVER_FLAGS_FINAL & hdev->flags)
> > +                       err = img_hash_finish(req);
> > +       } else {
> > +               dev_warn(hdev->dev, "Hash failed with error %d\n", err);
> > +               ctx->flags |= DRIVER_FLAGS_ERROR;
> > +       }
> > +
> > +       hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> DRIVER_FLAGS_OUTPUT_READY |
> > +               DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY |
> > + DRIVER_FLAGS_FINAL);
> > +
> > +       if (req->base.complete)
> > +               req->base.complete(&req->base, err); }
> > +
> > +static int img_hash_write_via_dma(struct img_hash_dev *hdev) {
> > +       struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > +       img_hash_start(hdev, true);
> > +
> > +       dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
> > +
> > +       if (!ctx->total)
> > +               hdev->flags |= DRIVER_FLAGS_FINAL;
> > +
> > +       hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
> > +
> > +       tasklet_schedule(&hdev->dma_task);
> > +
> > +       return -EINPROGRESS;
> > +}
> > +
> > +static int img_hash_dma_init(struct img_hash_dev *hdev) {
> > +       struct dma_slave_config dma_conf;
> > +       int err = -EINVAL;
> > +
> > +       hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
> > +       if (!hdev->dma_lch) {
> > +               dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
> > +               return -EBUSY;
> > +       }
> > +       dma_conf.direction = DMA_MEM_TO_DEV;
> > +       dma_conf.dst_addr = hdev->bus_addr;
> > +       dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> > +       dma_conf.dst_maxburst = 16;
> > +       dma_conf.device_fc = false;
> > +
> > +       err = dmaengine_slave_config(hdev->dma_lch,  &dma_conf);
> > +
> > +       if (err) {
> > +               dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
> > +               dma_release_channel(hdev->dma_lch);
> > +               return err;
> > +       }
> > +       return 0;
> 
> nit: I would expect there to be a newline before the return rather than before
> the if().

Done

> 
> > +}
> > +
> > +static void img_hash_dma_task(unsigned long d) {
> > +       struct img_hash_dev *hdev = (struct img_hash_dev *)d;
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> > +       u8 *addr;
> > +       size_t nbytes, bleft, wsend, len, tbc;
> > +       struct scatterlist tsg;
> > +
> > +       if (!ctx->sg)
> > +               return;
> > +
> > +       addr = sg_virt(ctx->sg);
> > +       nbytes = ctx->sg->length - ctx->offset;
> > +
> > +       /* The hash accelerator does not support a data valid mask. This
> means
> > +        * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
> > +        * padding bytes in the last word written by that dma would
> erroneously
> > +        * be included in the hash. To avoid this we round down the transfer,
> > +        * and add the excess to the start of the next dma. It does not matter
> > +        * that the final dma may not be a multiple of 4 bytes as the hashing
> > +        * block is programmed to accept the correct number of bytes.
> > +        */
> 
> nit: Multi-line comment style is:
> 
> /*
>  * blah blah blah
>  */

Fixed

> 
> > +
> > +       bleft = nbytes % 4;
> > +       wsend = (nbytes / 4);
> > +
> > +       if (wsend) {
> > +               sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
> > +               if (img_hash_xmit_dma(hdev, &tsg)) {
> > +                       dev_err(hdev->dev, "DMA failed, falling back to CPU");
> > +                       ctx->flags |= DRIVER_FLAGS_CPU;
> > +                       hdev->err = 0;
> > +                       img_hash_xmit_cpu(hdev, addr + ctx->offset,
> > +                                         wsend * 4, 0);
> > +                       ctx->sent += wsend * 4;
> > +                       wsend = 0;
> > +               } else {
> > +                       ctx->sent += wsend * 4;
> > +               }
> > +       }
> > +
> > +       if (bleft) {
> > +               ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> > +                                                ctx->buffer, bleft, ctx->sent);
> > +               tbc = 0;
> > +               ctx->sg = sg_next(ctx->sg);
> > +               while (ctx->sg && (ctx->bufcnt < 4)) {
> > +                       len = ctx->sg->length;
> > +                       if (likely(len > (4 - ctx->bufcnt)))
> > +                               len = 4 - ctx->bufcnt;
> > +                       tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> > +                                                ctx->buffer + ctx->bufcnt, len,
> > +                                       ctx->sent + ctx->bufcnt);
> > +                       ctx->bufcnt += tbc;
> > +                       if (tbc >= ctx->sg->length) {
> > +                               ctx->sg = sg_next(ctx->sg);
> > +                               tbc = 0;
> > +                       }
> > +               }
> > +
> > +               ctx->sent += ctx->bufcnt;
> > +               ctx->offset = tbc;
> > +
> > +               if (!wsend)
> > +                       img_hash_dma_callback(hdev);
> > +       } else {
> > +               ctx->offset = 0;
> > +               ctx->sg = sg_next(ctx->sg);
> > +       }
> > +}
> > +
> > +static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev) {
> > +       struct img_hash_request_ctx *ctx =
> > +ahash_request_ctx(hdev->req);
> > +
> > +       if (ctx->flags & DRIVER_FLAGS_SG)
> > +               dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct,
> > + DMA_TO_DEVICE);
> > +
> > +       return 0;
> > +}
> > +
> > +static int img_hash_process_data(struct img_hash_dev *hdev) {
> > +       struct ahash_request *req = hdev->req;
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > +       int err = 0;
> > +
> > +       ctx->bufcnt = 0;
> > +
> > +       if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
> > +               dev_dbg(hdev->dev, "process data request(%d bytes) using
> DMA\n",
> > +                       req->nbytes);
> > +               err = img_hash_write_via_dma(hdev);
> > +       } else {
> > +               dev_dbg(hdev->dev, "process data request(%d bytes) using
> CPU\n",
> > +                       req->nbytes);
> > +               err = img_hash_write_via_cpu(hdev);
> > +       }
> > +       return err;
> > +}
> > +
> > +static int img_hash_hw_init(struct img_hash_dev *hdev) {
> > +       unsigned long long nbits;
> > +       u32 u, l;
> > +       int ret;
> > +
> > +       ret = clk_prepare_enable(hdev->iclk);
> 
> It looks like there's no corresponding clk_disable_unprepare() to this, so the
> prepare/enable counts will just keep increasing.  Perhaps it would be better
> to manage enabling/disabling the clocks with runtime PM?

Yes it probably would be - I'll look at doing that.


> 
> > +       if (ret)
> > +               return ret;
> > +
> > +       img_hash_write(hdev, CR_RESET, CR_RESET_SET);
> > +       img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
> > +       img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
> > +
> > +       nbits = (hdev->req->nbytes << 3);
> > +       u = nbits >> 32;
> > +       l = nbits;
> > +       img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
> > +       img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
> > +
> > +       if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
> > +               hdev->flags |= DRIVER_FLAGS_INIT;
> > +               hdev->err = 0;
> > +       }
> > +       dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
> > +       return 0;
> > +}
> > +
> > +static int img_hash_init(struct ahash_request *req) {
> > +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > +       rctx->fallback_req.base.flags = req->base.flags
> > +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> > +
> > +       return crypto_ahash_init(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_handle_queue(struct img_hash_dev *hdev,
> > +                                struct ahash_request *req) {
> > +       struct crypto_async_request *async_req, *backlog;
> > +       struct img_hash_request_ctx *ctx;
> > +       unsigned long flags;
> > +       int err = 0, res = 0;
> > +
> > +       spin_lock_irqsave(&hdev->lock, flags);
> > +
> > +       if (req)
> > +               res = ahash_enqueue_request(&hdev->queue, req);
> > +
> > +       if (DRIVER_FLAGS_BUSY & hdev->flags) {
> > +               spin_unlock_irqrestore(&hdev->lock, flags);
> > +               return res;
> > +       }
> > +
> > +       backlog = crypto_get_backlog(&hdev->queue);
> > +       async_req = crypto_dequeue_request(&hdev->queue);
> > +       if (async_req)
> > +               hdev->flags |= DRIVER_FLAGS_BUSY;
> > +
> > +       spin_unlock_irqrestore(&hdev->lock, flags);
> > +
> > +       if (!async_req)
> > +               return res;
> > +
> > +       if (backlog)
> > +               backlog->complete(backlog, -EINPROGRESS);
> > +
> > +       req = ahash_request_cast(async_req);
> > +       hdev->req = req;
> > +
> > +       ctx = ahash_request_ctx(req);
> > +
> > +       dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
> > +                ctx->op, req->nbytes);
> > +
> > +       err = img_hash_hw_init(hdev);
> > +
> > +       if (!err)
> > +               err = img_hash_process_data(hdev);
> > +
> > +       if (err != -EINPROGRESS) {
> > +               /* done_task will not finish so do it here */
> > +               img_hash_finish_req(req, err);
> > +       }
> > +       return res;
> > +}
> > +
> > +static int img_hash_update(struct ahash_request *req) {
> > +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > +       rctx->fallback_req.base.flags = req->base.flags
> > +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> > +       rctx->fallback_req.nbytes = req->nbytes;
> > +       rctx->fallback_req.src = req->src;
> > +
> > +       return crypto_ahash_update(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_final(struct ahash_request *req) {
> > +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > +       rctx->fallback_req.base.flags = req->base.flags
> > +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> > +       rctx->fallback_req.result = req->result;
> > +
> > +       return crypto_ahash_final(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_finup(struct ahash_request *req) {
> > +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> > +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> > +
> > +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> > +       rctx->fallback_req.base.flags = req->base.flags
> > +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> > +       rctx->fallback_req.nbytes = req->nbytes;
> > +       rctx->fallback_req.src = req->src;
> > +       rctx->fallback_req.result = req->result;
> > +
> > +       return crypto_ahash_finup(&rctx->fallback_req);
> > +}
> > +
> > +static int img_hash_digest(struct ahash_request *req) {
> > +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> > +       struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
> > +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> > +       struct img_hash_dev *hdev = NULL;
> > +       struct img_hash_dev *tmp;
> > +       int err;
> > +
> > +       spin_lock_bh(&img_hash.lock);
> 
> Why spin_{lock,unlock}_bh() here?

Changed to spin_{lock, unlock}(), I don't see any reason why it should be _bh.

> 
> > +       if (!tctx->hdev) {
> > +               list_for_each_entry(tmp, &img_hash.dev_list, list) {
> > +                       hdev = tmp;
> > +                       break;
> > +               }
> > +               tctx->hdev = hdev;
> > +
> > +       } else {
> > +               hdev = tctx->hdev;
> > +       }
> > +
> > +       spin_unlock_bh(&img_hash.lock);
> > +       ctx->hdev = hdev;
> > +       ctx->flags = 0;
> > +       ctx->digsize = crypto_ahash_digestsize(tfm);
> > +
> > +       switch (ctx->digsize) {
> > +       case SHA1_DIGEST_SIZE:
> > +               ctx->flags |= DRIVER_FLAGS_SHA1;
> > +               break;
> > +       case SHA256_DIGEST_SIZE:
> > +               ctx->flags |= DRIVER_FLAGS_SHA256;
> > +               break;
> > +       case SHA224_DIGEST_SIZE:
> > +               ctx->flags |= DRIVER_FLAGS_SHA224;
> > +               break;
> > +       case MD5_DIGEST_SIZE:
> > +               ctx->flags |= DRIVER_FLAGS_MD5;
> > +               break;
> > +       default:
> > +               return -EINVAL;
> > +       }
> > +
> > +       ctx->bufcnt = 0;
> > +       ctx->offset = 0;
> > +       ctx->sent = 0;
> > +       ctx->total = req->nbytes;
> > +       ctx->sg = req->src;
> > +       ctx->sgfirst = req->src;
> > +       ctx->nents = sg_nents(ctx->sg);
> > +
> > +       err = img_hash_handle_queue(tctx->hdev, req);
> > +
> > +       return err;
> > +}
> > +
> > +static int img_hash_cra_init(struct crypto_tfm *tfm) {
> > +       struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
> > +       const char *alg_name = crypto_tfm_alg_name(tfm);
> > +       int err = -ENOMEM;
> > +
> > +       ctx->fallback = crypto_alloc_ahash(alg_name, 0,
> > +                                          CRYPTO_ALG_NEED_FALLBACK);
> > +       if (IS_ERR(ctx->fallback)) {
> > +               pr_err("img_hash: Could not load fallback driver.\n");
> > +               err = PTR_ERR(ctx->fallback);
> > +               goto err;
> > +       }
> > +       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> > +                                sizeof(struct img_hash_request_ctx) +
> > +                                IMG_HASH_DMA_THRESHOLD);
> > +
> > +       return 0;
> > +
> > +err:
> > +       return err;
> > +}
> > +
> > +static void img_hash_cra_exit(struct crypto_tfm *tfm) {
> > +       struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
> > +
> > +       crypto_free_ahash(tctx->fallback);
> > +}
> > +
> > +static irqreturn_t img_irq_handler(int irq, void *dev_id) {
> > +       struct img_hash_dev *hdev = dev_id;
> > +       u32 reg;
> > +
> > +       reg = img_hash_read(hdev, CR_INTSTAT);
> > +       img_hash_write(hdev, CR_INTCLEAR, reg);
> > +
> > +       if (reg & CR_INT_NEW_RESULTS_SET) {
> > +               dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
> > +               if (DRIVER_FLAGS_BUSY & hdev->flags) {
> > +                       hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
> > +                       if (!(DRIVER_FLAGS_CPU & hdev->flags))
> > +                               hdev->flags |= DRIVER_FLAGS_DMA_READY;
> > +                       tasklet_schedule(&hdev->done_task);
> 
> Since this done_task only gets scheduled from here, why not make this a
> threaded IRQ handler and just do the work here instead of separating it
> between a hard IRQ handler and a tasklet?

What is the advantage of doing that?  i.e is this simple case worth setting up an extra thread?

> 
> > +               } else {
> > +                       dev_warn(hdev->dev,
> > +                                "HASH interrupt when no active requests.\n");
> > +               }
> > +       } else if (reg & CR_INT_RESULTS_AVAILABLE) {
> > +               dev_warn(hdev->dev,
> > +                        "IRQ triggered before the hash had completed\n");
> > +       } else if (reg & CR_INT_RESULT_READ_ERR) {
> > +               dev_warn(hdev->dev,
> > +                        "Attempt to read from an empty result queue\n");
> > +       } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
> > +               dev_warn(hdev->dev,
> > +                        "Data written before the hardware was configured\n");
> > +       }
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +static struct ahash_alg img_algs[] = {
> > +       {
> > +               .init = img_hash_init,
> > +               .update = img_hash_update,
> > +               .final = img_hash_final,
> > +               .finup = img_hash_finup,
> > +               .digest = img_hash_digest,
> > +               .halg = {
> > +                       .digestsize = MD5_DIGEST_SIZE,
> > +                       .base = {
> > +                               .cra_name = "md5",
> > +                               .cra_driver_name = "img-md5",
> > +                               .cra_priority = 301,
> > +                               .cra_flags =
> > +                               CRYPTO_ALG_ASYNC |
> > +                               CRYPTO_ALG_NEED_FALLBACK,
> > +                               .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
> > +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> > +                               .cra_init = img_hash_cra_init,
> > +                               .cra_exit = img_hash_cra_exit,
> > +                               .cra_module = THIS_MODULE,
> > +                       }
> > +               }
> > +       },
> > +       {
> > +               .init = img_hash_init,
> > +               .update = img_hash_update,
> > +               .final = img_hash_final,
> > +               .finup = img_hash_finup,
> > +               .digest = img_hash_digest,
> > +               .halg = {
> > +                       .digestsize = SHA1_DIGEST_SIZE,
> > +                       .base = {
> > +                               .cra_name = "sha1",
> > +                               .cra_driver_name = "img-sha1",
> > +                               .cra_priority = 3000,
> > +                               .cra_flags =
> > +                               CRYPTO_ALG_ASYNC |
> > +                               CRYPTO_ALG_NEED_FALLBACK,
> > +                               .cra_blocksize = SHA1_BLOCK_SIZE,
> > +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> > +                               .cra_init = img_hash_cra_init,
> > +                               .cra_exit = img_hash_cra_exit,
> > +                               .cra_module = THIS_MODULE,
> > +                       }
> > +               }
> > +       },
> > +       {
> > +               .init = img_hash_init,
> > +               .update = img_hash_update,
> > +               .final = img_hash_final,
> > +               .finup = img_hash_finup,
> > +               .digest = img_hash_digest,
> > +               .halg = {
> > +                       .digestsize = SHA224_DIGEST_SIZE,
> > +                       .base = {
> > +                               .cra_name = "sha224",
> > +                               .cra_driver_name = "img-sha224",
> > +                               .cra_priority = 3000,
> > +                               .cra_flags =
> > +                               CRYPTO_ALG_ASYNC |
> > +                               CRYPTO_ALG_NEED_FALLBACK,
> > +                               .cra_blocksize = SHA224_BLOCK_SIZE,
> > +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> > +                               .cra_init = img_hash_cra_init,
> > +                               .cra_exit = img_hash_cra_exit,
> > +                               .cra_module = THIS_MODULE,
> > +                       }
> > +               }
> > +       },
> > +       {
> > +               .init = img_hash_init,
> > +               .update = img_hash_update,
> > +               .final = img_hash_final,
> > +               .finup = img_hash_finup,
> > +               .digest = img_hash_digest,
> > +               .halg = {
> > +                       .digestsize = SHA256_DIGEST_SIZE,
> > +                       .base = {
> > +                               .cra_name = "sha256",
> > +                               .cra_driver_name = "img-sha256",
> > +                               .cra_priority = 301,
> > +                               .cra_flags =
> > +                               CRYPTO_ALG_ASYNC |
> > +                               CRYPTO_ALG_NEED_FALLBACK,
> > +                               .cra_blocksize = SHA256_BLOCK_SIZE,
> > +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> > +                               .cra_init = img_hash_cra_init,
> > +                               .cra_exit = img_hash_cra_exit,
> > +                               .cra_module = THIS_MODULE,
> > +                       }
> > +               }
> > +       }
> > +};
> > +
> > +static int img_register_algs(struct img_hash_dev *hdev) {
> > +       int i, err;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
> > +               err = crypto_register_ahash(&img_algs[i]);
> > +               if (err)
> > +                       goto err_reg;
> > +       }
> > +       return 0;
> > +
> > +err_reg:
> > +       for (; i--; )
> > +               crypto_unregister_ahash(&img_algs[i]);
> > +
> > +       return err;
> > +}
> > +
> > +static int img_unregister_algs(struct img_hash_dev *hdev) {
> > +       int i;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(img_algs); i++)
> > +               crypto_unregister_ahash(&img_algs[i]);
> > +       return 0;
> > +}
> > +
> > +static void img_hash_done_task(unsigned long data) {
> > +       struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> > +       int err = 0;
> > +
> > +       if (hdev->err == -EINVAL) {
> > +               err = hdev->err;
> > +               goto finish;
> > +       }
> > +
> > +       if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
> > +               img_hash_handle_queue(hdev, NULL);
> > +               return;
> > +       }
> > +
> > +       if (DRIVER_FLAGS_CPU & hdev->flags) {
> > +               if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> > +                       hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
> > +                       goto finish;
> > +               }
> > +       } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
> > +               if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
> > +                       hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
> > +                       img_hash_write_via_dma_stop(hdev);
> > +                       if (hdev->err) {
> > +                               err = hdev->err;
> > +                               goto finish;
> > +                       }
> > +               }
> > +               if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> > +                       hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> > +                                       DRIVER_FLAGS_OUTPUT_READY);
> > +                       goto finish;
> > +               }
> > +       }
> > +       return;
> > +
> > +finish:
> > +       img_hash_finish_req(hdev->req, err); }
> > +
> > +static const struct of_device_id img_hash_match[] = {
> > +       { .compatible = "img,hash-accelerator" },
> > +       {}
> > +};
> > +MODULE_DEVICE_TABLE(of, img_hash_match)
> > +
> > +static int img_hash_probe(struct platform_device *pdev) {
> > +       struct img_hash_dev *hdev;
> > +       struct device *dev = &pdev->dev;
> > +       struct resource *hash_res;
> > +       int err;
> > +
> > +       hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
> > +       if (hdev == NULL) {
> > +               err = -ENOMEM;
> > +               goto sha_dev_err;
> 
> Why not just "return -ENOMEM" here?  There should be no cleanup
> necessary at this point?

Done

> 
> > +       }
> > +       spin_lock_init(&hdev->lock);
> > +
> > +       hdev->dev = dev;
> > +
> > +       platform_set_drvdata(pdev, hdev);
> > +
> > +       INIT_LIST_HEAD(&hdev->list);
> > +
> > +       tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned
> long)hdev);
> > +       tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned
> > + long)hdev);
> > +
> > +       crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
> > +
> > +       /* Register bank */
> > +       hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +
> > +       hdev->io_base = devm_ioremap_resource(dev, hash_res);
> > +       if (IS_ERR(hdev->io_base)) {
> > +               dev_err(dev, "can't ioremap\n");
> 
> nit: When printing error messages, it's helpful to print the error code as well.

Done

> 
> > +               err = PTR_ERR(hdev->io_base);
> > +               goto hash_io_err;
> > +       }
> > +
> > +       /* Write port (DMA or CPU) */
> > +       hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +       if (!hash_res) {
> > +               dev_err(dev, "no write port resource info\n");
> > +               err = -ENODEV;
> > +               goto res_err;
> > +       }
> > +       hdev->bus_addr = hash_res->start;
> 
> Maybe set this after devm_ioremap_resource() succeeds and avoid the extra
> error check?

Not quite following you here - which check are you saying can be removed?

> 
> > +       hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
> > +       if (IS_ERR(hdev->cpu_addr)) {
> > +               dev_err(dev, "can't ioremap write port\n");
> > +               err = PTR_ERR(hdev->cpu_addr);
> > +               goto res_err;
> > +       }
> > +
> > +       hdev->irq = platform_get_irq(pdev, 0);
> > +       if (hdev->irq < 0) {
> > +               dev_err(dev, "no IRQ resource info\n");
> > +               err = hdev->irq;
> > +               goto res_err;
> > +       }
> > +
> > +       err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
> > +                              dev_name(dev), hdev);
> > +       if (err) {
> > +               dev_err(dev, "unable to request %s irq\n",
> > + dev_name(dev));
> 
> dev_* already prints dev_name().

Yes, removed it

> 
> > +               goto res_err;
> > +       }
> > +       dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
> > +
> > +       hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
> > +       if (IS_ERR(hdev->iclk)) {
> > +               dev_err(dev, "clock initialization failed.\n");
> > +               err = PTR_ERR(hdev->iclk);
> > +               goto res_err;
> > +       }
> > +
> > +       hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");
> 
> I don't see this clock getting enabled anywhere.

Good catch - now added to img_hash_hw_init.  

> 
> > +       if (IS_ERR(hdev->fclk)) {
> > +               dev_err(dev, "clock initialization failed.\n");
> > +               err = PTR_ERR(hdev->fclk);
> > +               goto res_err;
> > +       }
> > +
> > +       err = img_hash_dma_init(hdev);
> > +       if (err)
> > +               goto err_dma;
> > +
> > +       dev_dbg(dev, "using %s for DMA transfers\n",
> > +               dma_chan_name(hdev->dma_lch));
> > +
> > +       spin_lock(&img_hash.lock);
> > +       list_add_tail(&hdev->list, &img_hash.dev_list);
> > +       spin_unlock(&img_hash.lock);
> > +
> > +       err = img_register_algs(hdev);
> > +       if (err)
> > +               goto err_algs;
> > +       dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr,
> > + 0x4);
> 
> dev_dbg()? (or maybe not at all)

Removed it

> 
> > +       dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator
> > + initialized\n");
> > +
> > +       return 0;
> > +
> > +err_algs:
> > +       spin_lock(&img_hash.lock);
> > +       list_del(&hdev->list);
> > +       spin_unlock(&img_hash.lock);
> > +       dma_release_channel(hdev->dma_lch);
> > +err_dma:
> > +hash_io_err:
> > +       devm_clk_put(dev, hdev->iclk);
> > +       devm_clk_put(dev, hdev->fclk);
> 
> Since the clocks were acquired via the devm_* API, there's no need to
> explicitly put() them.

Done

> 
> > +res_err:
> > +       tasklet_kill(&hdev->done_task);
> > +       tasklet_kill(&hdev->dma_task);
> > +sha_dev_err:
> > +       dev_err(dev, "initialization failed.\n");
> 
> The driver core will print an error message if a device probe fails (for
> reasons other than -ENODEV, at least), so there's no need to print here.

Removed

> 
> > +       return err;
> > +}
> > +
> > +static int img_hash_remove(struct platform_device *pdev) {
> > +       static struct img_hash_dev *hdev;
> > +
> > +       hdev = platform_get_drvdata(pdev);
> > +       spin_lock(&img_hash.lock);
> > +       list_del(&hdev->list);
> > +       spin_unlock(&img_hash.lock);
> > +
> > +       img_unregister_algs(hdev);
> > +
> > +       tasklet_kill(&hdev->done_task);
> > +       tasklet_kill(&hdev->dma_task);
> > +
> > +       dma_release_channel(hdev->dma_lch);
> > +
> > +       clk_disable_unprepare(hdev->iclk);
> > +       clk_disable_unprepare(hdev->fclk);
> 
> Unbalanced prepare_enable()/disable_unprepare().

Now enabled in img_hash_hw_init.  

> 
> > +       return 0;
> > +}
> > +
> > +static struct platform_driver img_hash_driver = {
> > +       .probe          = img_hash_probe,
> > +       .remove         = img_hash_remove,
> > +       .driver         = {
> > +               .name   = "img-hash-accelerator",
> > +               .of_match_table = of_match_ptr(img_hash_match),
> > +       }
> > +};
> > +module_platform_driver(img_hash_driver);
> > +
> > +MODULE_LICENSE("GPL v2");
> > +MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator
> > +driver"); MODULE_AUTHOR("Will Thomas."); MODULE_AUTHOR("James
> > +Hartley.");
> 
> It's helpful to include email addresses in the MODULE_AUTHOR tags.

I can do that for my email address, but not for Will's

Thanks for the review Andrew!

James.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V3 1/2] crypto: Add Imagination Technologies hw hash accelerator
       [not found] ` <1425610912-9930-2-git-send-email-james.hartley@imgtec.com>
@ 2015-03-09  5:42   ` Andrew Bresticker
  2015-03-10 15:31     ` James Hartley
  0 siblings, 1 reply; 13+ messages in thread
From: Andrew Bresticker @ 2015-03-09  5:42 UTC (permalink / raw)
  To: James Hartley; +Cc: linux-crypto

Hi James,

On Thu, Mar 5, 2015 at 7:01 PM, James Hartley <james.hartley@imgtec.com> wrote:
> This adds support for the Imagination Technologies hash
> accelerator which provides hardware acceleration for
> SHA1 SHA224 SHA256 and MD5 hashes.
>
> Signed-off-by: James Hartley <james.hartley@imgtec.com>

Some general comments below, I'll leave review of the crypto-specific
stuff to Herbert.

> diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
> index 3924f93..fb84be7 100644
> --- a/drivers/crypto/Makefile
> +++ b/drivers/crypto/Makefile
> @@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
>  obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
>  obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
>  obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
> +obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
>  obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
>  obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
>  obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
> @@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
>  obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
>  obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
>  obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
> +obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/

Unrelated change - perhaps a bad merge conflict resolution?

> diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
> new file mode 100644
> index 0000000..94a3a6f
> --- /dev/null
> +++ b/drivers/crypto/img-hash.c
> @@ -0,0 +1,1037 @@
> +/*
> + * Copyright (c) 2014 Imagination Technologies
> + * Authors:  Will Thomas, James Hartley
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as published
> + * by the Free Software Foundation.
> + *
> + *     Interface structure taken from omap-sham driver
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/dmaengine.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/of_device.h>
> +#include <linux/platform_device.h>
> +#include <linux/scatterlist.h>
> +
> +#include <crypto/internal/hash.h>
> +#include <crypto/md5.h>
> +#include <crypto/sha.h>
> +
> +#define CR_RESET                       0
> +#define CR_RESET_SET                   1
> +#define CR_RESET_UNSET                 0
> +
> +#define CR_MESSAGE_LENGTH_H            0x4
> +#define CR_MESSAGE_LENGTH_L            0x8
> +
> +#define CR_CONTROL                     0xc
> +#define CR_CONTROL_BYTE_ORDER_3210     0
> +#define CR_CONTROL_BYTE_ORDER_0123     1
> +#define CR_CONTROL_BYTE_ORDER_2310     2
> +#define CR_CONTROL_BYTE_ORDER_1032     3
> +#define CR_CONTROL_BYTE_ORDER_SHIFT    8
> +#define CR_CONTROL_ALGO_MD5    0
> +#define CR_CONTROL_ALGO_SHA1   1
> +#define CR_CONTROL_ALGO_SHA224 2
> +#define CR_CONTROL_ALGO_SHA256 3
> +
> +#define CR_INTSTAT                     0x10
> +#define CR_INTENAB                     0x14
> +#define CR_INTCLEAR                    0x18
> +#define CR_INT_RESULTS_AVAILABLE       BIT(0)
> +#define CR_INT_NEW_RESULTS_SET         BIT(1)
> +#define CR_INT_RESULT_READ_ERR         BIT(2)
> +#define CR_INT_MESSAGE_WRITE_ERROR     BIT(3)
> +#define CR_INT_STATUS                  BIT(8)
> +
> +#define CR_RESULT_QUEUE                0x1c
> +#define CR_RSD0                                0x40
> +#define CR_CORE_REV                    0x50
> +#define CR_CORE_DES1           0x60
> +#define CR_CORE_DES2           0x70
> +
> +#define DRIVER_FLAGS_BUSY              BIT(0)
> +#define DRIVER_FLAGS_FINAL             BIT(1)
> +#define DRIVER_FLAGS_DMA_ACTIVE                BIT(2)
> +#define DRIVER_FLAGS_OUTPUT_READY      BIT(3)
> +#define DRIVER_FLAGS_INIT              BIT(4)
> +#define DRIVER_FLAGS_CPU               BIT(5)
> +#define DRIVER_FLAGS_DMA_READY         BIT(6)
> +#define DRIVER_FLAGS_ERROR             BIT(7)
> +#define DRIVER_FLAGS_SG                        BIT(8)
> +#define DRIVER_FLAGS_SHA1              BIT(18)
> +#define DRIVER_FLAGS_SHA224            BIT(19)
> +#define DRIVER_FLAGS_SHA256            BIT(20)
> +#define DRIVER_FLAGS_MD5               BIT(21)
> +
> +#define IMG_HASH_QUEUE_LENGTH          20
> +#define IMG_HASH_DMA_THRESHOLD         64
> +
> +#ifdef __LITTLE_ENDIAN
> +#define IMG_HASH_BYTE_ORDER            (CR_CONTROL_BYTE_ORDER_3210)
> +#else
> +#define IMG_HASH_BYTE_ORDER            (CR_CONTROL_BYTE_ORDER_0123)
> +#endif

Unnecessary parenthesis.

> +struct img_hash_dev;
> +
> +struct img_hash_request_ctx {
> +       struct img_hash_dev     *hdev;
> +       u8 digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
> +       unsigned long           flags;
> +       size_t                  digsize;
> +
> +       dma_addr_t              dma_addr;
> +       size_t                  dma_ct;
> +
> +       /* sg root */
> +       struct scatterlist      *sgfirst;
> +       /* walk state */
> +       struct scatterlist      *sg;
> +       size_t                  nents;
> +       size_t                  offset;
> +       unsigned int            total;
> +       size_t                  sent;
> +
> +       unsigned long           op;
> +
> +       size_t                  bufcnt;
> +       u8 buffer[0] __aligned(sizeof(u32));
> +       struct ahash_request    fallback_req;
> +};
> +
> +struct img_hash_ctx {
> +       struct img_hash_dev     *hdev;
> +       unsigned long           flags;
> +       struct crypto_ahash     *fallback;
> +};
> +
> +struct img_hash_dev {
> +       struct list_head        list;
> +       struct device           *dev;
> +       struct clk              *iclk;
> +       struct clk              *fclk;

Maybe make these names a little more obvious so it's clear which clock is which.

> +       int                     irq;

This isn't used outside of probe(), so it need not be carried around
in this struct.

> +       void __iomem            *io_base;
> +
> +       phys_addr_t             bus_addr;
> +       void __iomem            *cpu_addr;
> +
> +       spinlock_t              lock;
> +       int                     err;
> +       struct tasklet_struct   done_task;
> +       struct tasklet_struct   dma_task;
> +
> +       unsigned long           flags;
> +       struct crypto_queue     queue;
> +       struct ahash_request    *req;
> +
> +       struct dma_chan         *dma_lch;
> +};
> +
> +struct img_hash_drv {
> +       struct list_head dev_list;
> +       spinlock_t lock;
> +};
> +
> +static struct img_hash_drv img_hash = {
> +       .dev_list = LIST_HEAD_INIT(img_hash.dev_list),
> +       .lock = __SPIN_LOCK_UNLOCKED(img_hash.lock),
> +};
> +
> +static inline u32 img_hash_read(struct img_hash_dev *hdev, u32 offset)
> +{
> +       return readl_relaxed(hdev->io_base + offset);
> +}
> +
> +static inline void img_hash_write(struct img_hash_dev *hdev,
> +                                 u32 offset, u32 value)
> +{
> +       writel_relaxed(value, hdev->io_base + offset);
> +}
> +
> +static inline u32 img_hash_read_result_queue(struct img_hash_dev *hdev)
> +{
> +       return be32_to_cpu(img_hash_read(hdev, CR_RESULT_QUEUE));
> +}
> +
> +static void img_hash_start(struct img_hash_dev *hdev, bool dma)
> +{
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +       u32 cr = IMG_HASH_BYTE_ORDER << CR_CONTROL_BYTE_ORDER_SHIFT;
> +
> +       if (ctx->flags & DRIVER_FLAGS_MD5)
> +               cr |= CR_CONTROL_ALGO_MD5;
> +       else if (ctx->flags & DRIVER_FLAGS_SHA1)
> +               cr |= CR_CONTROL_ALGO_SHA1;
> +       else if (ctx->flags & DRIVER_FLAGS_SHA224)
> +               cr |= CR_CONTROL_ALGO_SHA224;
> +       else if (ctx->flags & DRIVER_FLAGS_SHA256)
> +               cr |= CR_CONTROL_ALGO_SHA256;
> +       dev_dbg(hdev->dev, "Starting hash process\n");
> +       img_hash_write(hdev, CR_CONTROL, cr);
> +
> +       /*
> +        * The hardware block requires two cycles between writing the control
> +        * register and writing the first word of data in non DMA mode, to
> +        * ensure the first data write is not grouped in burst with the control
> +        * register write a read is issued to 'flush' the bus.
> +        */
> +       if (!dma)
> +               img_hash_read(hdev, CR_CONTROL);
> +}
> +
> +static int img_hash_xmit_cpu(struct img_hash_dev *hdev, const u8 *buf,
> +                            size_t length, int final)
> +{
> +       u32 count, len32;
> +       const u32 *buffer = (const u32 *)buf;
> +
> +       dev_dbg(hdev->dev, "xmit_cpu:  length: %u bytes\n", length);
> +
> +       if (final)
> +               hdev->flags |= DRIVER_FLAGS_FINAL;
> +
> +       len32 = DIV_ROUND_UP(length, sizeof(u32));
> +
> +       for (count = 0; count < len32; count++)
> +               writel_relaxed(buffer[count], hdev->cpu_addr);
> +
> +       return -EINPROGRESS;
> +}
> +
> +static void img_hash_dma_callback(void *data)
> +{
> +       struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> +       if (ctx->bufcnt) {
> +               img_hash_xmit_cpu(hdev, ctx->buffer, ctx->bufcnt, 0);
> +               ctx->bufcnt = 0;
> +       }
> +       if (ctx->sg)
> +               tasklet_schedule(&hdev->dma_task);
> +}
> +
> +static int img_hash_xmit_dma(struct img_hash_dev *hdev, struct scatterlist *sg)
> +{
> +       struct dma_async_tx_descriptor *desc;
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> +       ctx->dma_ct = dma_map_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> +       if (ctx->dma_ct == 0) {
> +               dev_err(hdev->dev, "Invalid DMA sg\n");
> +               hdev->err = -EINVAL;
> +               return -EINVAL;
> +       }
> +
> +       desc = dmaengine_prep_slave_sg(hdev->dma_lch,
> +                                       sg,

Line this up (and the rest of this statement) with the open paren of
dmaengine_prep_slave_sg().

> +                                       ctx->dma_ct,
> +                                       DMA_MEM_TO_DEV,
> +                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> +       if (!desc) {
> +               dev_err(hdev->dev, "Null DMA descriptor\n");
> +               hdev->err = -EINVAL;
> +               dma_unmap_sg(hdev->dev, sg, 1, DMA_MEM_TO_DEV);
> +               return -EINVAL;
> +       }
> +       desc->callback = img_hash_dma_callback;
> +       desc->callback_param = hdev;
> +       dmaengine_submit(desc);
> +       dma_async_issue_pending(hdev->dma_lch);
> +
> +       return 0;
> +}
> +
> +static int img_hash_write_via_cpu(struct img_hash_dev *hdev)
> +{
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +       int ret = 0;
> +
> +       ctx->bufcnt = sg_copy_to_buffer(hdev->req->src, sg_nents(ctx->sg),
> +                                       ctx->buffer, hdev->req->nbytes);
> +
> +       ctx->total = hdev->req->nbytes;
> +       ctx->bufcnt = 0;
> +
> +       hdev->flags |= (DRIVER_FLAGS_CPU | DRIVER_FLAGS_FINAL);
> +
> +       img_hash_start(hdev, false);
> +
> +       if (ctx->total)
> +               ret = img_hash_xmit_cpu(hdev, ctx->buffer, ctx->total, 1);
> +       else
> +               ret = 0;

The else block here (and possibly the entire check) seems unnecessary.
img_hash_xmit_cpu() won't do anything if the length is 0.

> +
> +       return ret;
> +}
> +
> +static int img_hash_finish(struct ahash_request *req)
> +{
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> +
> +       if (!req->result)
> +               return -EINVAL;
> +
> +       memcpy(req->result, ctx->digest, ctx->digsize);
> +
> +       return 0;
> +}
> +
> +static void img_hash_copy_hash(struct ahash_request *req)
> +{
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> +       u32 *hash = (u32 *)ctx->digest;
> +       int i;
> +
> +       for (i = (ctx->digsize / sizeof(u32)) - 1; i >= 0; i--)
> +               hash[i] = img_hash_read_result_queue(ctx->hdev);
> +}
> +
> +static void img_hash_finish_req(struct ahash_request *req, int err)
> +{
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> +       struct img_hash_dev *hdev =  ctx->hdev;
> +
> +       if (!err) {
> +               img_hash_copy_hash(req);
> +               if (DRIVER_FLAGS_FINAL & hdev->flags)
> +                       err = img_hash_finish(req);
> +       } else {
> +               dev_warn(hdev->dev, "Hash failed with error %d\n", err);
> +               ctx->flags |= DRIVER_FLAGS_ERROR;
> +       }
> +
> +       hdev->flags &= ~(DRIVER_FLAGS_DMA_READY | DRIVER_FLAGS_OUTPUT_READY |
> +               DRIVER_FLAGS_CPU | DRIVER_FLAGS_BUSY | DRIVER_FLAGS_FINAL);
> +
> +       if (req->base.complete)
> +               req->base.complete(&req->base, err);
> +}
> +
> +static int img_hash_write_via_dma(struct img_hash_dev *hdev)
> +{
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> +       img_hash_start(hdev, true);
> +
> +       dev_dbg(hdev->dev, "xmit dma size: %d\n", ctx->total);
> +
> +       if (!ctx->total)
> +               hdev->flags |= DRIVER_FLAGS_FINAL;
> +
> +       hdev->flags |= DRIVER_FLAGS_DMA_ACTIVE | DRIVER_FLAGS_FINAL;
> +
> +       tasklet_schedule(&hdev->dma_task);
> +
> +       return -EINPROGRESS;
> +}
> +
> +static int img_hash_dma_init(struct img_hash_dev *hdev)
> +{
> +       struct dma_slave_config dma_conf;
> +       int err = -EINVAL;
> +
> +       hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
> +       if (!hdev->dma_lch) {
> +               dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
> +               return -EBUSY;
> +       }
> +       dma_conf.direction = DMA_MEM_TO_DEV;
> +       dma_conf.dst_addr = hdev->bus_addr;
> +       dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
> +       dma_conf.dst_maxburst = 16;
> +       dma_conf.device_fc = false;
> +
> +       err = dmaengine_slave_config(hdev->dma_lch,  &dma_conf);
> +
> +       if (err) {
> +               dev_err(hdev->dev, "Couldn't configure DMA slave.\n");
> +               dma_release_channel(hdev->dma_lch);
> +               return err;
> +       }
> +       return 0;

nit: I would expect there to be a newline before the return rather
than before the if().

> +}
> +
> +static void img_hash_dma_task(unsigned long d)
> +{
> +       struct img_hash_dev *hdev = (struct img_hash_dev *)d;
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +       u8 *addr;
> +       size_t nbytes, bleft, wsend, len, tbc;
> +       struct scatterlist tsg;
> +
> +       if (!ctx->sg)
> +               return;
> +
> +       addr = sg_virt(ctx->sg);
> +       nbytes = ctx->sg->length - ctx->offset;
> +
> +       /* The hash accelerator does not support a data valid mask. This means
> +        * that if each dma (i.e. per page) is not a multiple of 4 bytes, the
> +        * padding bytes in the last word written by that dma would erroneously
> +        * be included in the hash. To avoid this we round down the transfer,
> +        * and add the excess to the start of the next dma. It does not matter
> +        * that the final dma may not be a multiple of 4 bytes as the hashing
> +        * block is programmed to accept the correct number of bytes.
> +        */

nit: Multi-line comment style is:

/*
 * blah blah blah
 */

> +
> +       bleft = nbytes % 4;
> +       wsend = (nbytes / 4);
> +
> +       if (wsend) {
> +               sg_init_one(&tsg, addr + ctx->offset, wsend * 4);
> +               if (img_hash_xmit_dma(hdev, &tsg)) {
> +                       dev_err(hdev->dev, "DMA failed, falling back to CPU");
> +                       ctx->flags |= DRIVER_FLAGS_CPU;
> +                       hdev->err = 0;
> +                       img_hash_xmit_cpu(hdev, addr + ctx->offset,
> +                                         wsend * 4, 0);
> +                       ctx->sent += wsend * 4;
> +                       wsend = 0;
> +               } else {
> +                       ctx->sent += wsend * 4;
> +               }
> +       }
> +
> +       if (bleft) {
> +               ctx->bufcnt = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> +                                                ctx->buffer, bleft, ctx->sent);
> +               tbc = 0;
> +               ctx->sg = sg_next(ctx->sg);
> +               while (ctx->sg && (ctx->bufcnt < 4)) {
> +                       len = ctx->sg->length;
> +                       if (likely(len > (4 - ctx->bufcnt)))
> +                               len = 4 - ctx->bufcnt;
> +                       tbc = sg_pcopy_to_buffer(ctx->sgfirst, ctx->nents,
> +                                                ctx->buffer + ctx->bufcnt, len,
> +                                       ctx->sent + ctx->bufcnt);
> +                       ctx->bufcnt += tbc;
> +                       if (tbc >= ctx->sg->length) {
> +                               ctx->sg = sg_next(ctx->sg);
> +                               tbc = 0;
> +                       }
> +               }
> +
> +               ctx->sent += ctx->bufcnt;
> +               ctx->offset = tbc;
> +
> +               if (!wsend)
> +                       img_hash_dma_callback(hdev);
> +       } else {
> +               ctx->offset = 0;
> +               ctx->sg = sg_next(ctx->sg);
> +       }
> +}
> +
> +static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev)
> +{
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
> +
> +       if (ctx->flags & DRIVER_FLAGS_SG)
> +               dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE);
> +
> +       return 0;
> +}
> +
> +static int img_hash_process_data(struct img_hash_dev *hdev)
> +{
> +       struct ahash_request *req = hdev->req;
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> +       int err = 0;
> +
> +       ctx->bufcnt = 0;
> +
> +       if (req->nbytes >= IMG_HASH_DMA_THRESHOLD) {
> +               dev_dbg(hdev->dev, "process data request(%d bytes) using DMA\n",
> +                       req->nbytes);
> +               err = img_hash_write_via_dma(hdev);
> +       } else {
> +               dev_dbg(hdev->dev, "process data request(%d bytes) using CPU\n",
> +                       req->nbytes);
> +               err = img_hash_write_via_cpu(hdev);
> +       }
> +       return err;
> +}
> +
> +static int img_hash_hw_init(struct img_hash_dev *hdev)
> +{
> +       unsigned long long nbits;
> +       u32 u, l;
> +       int ret;
> +
> +       ret = clk_prepare_enable(hdev->iclk);

It looks like there's no corresponding clk_disable_unprepare() to
this, so the prepare/enable counts will just keep increasing.  Perhaps
it would be better to manage enabling/disabling the clocks with
runtime PM?

> +       if (ret)
> +               return ret;
> +
> +       img_hash_write(hdev, CR_RESET, CR_RESET_SET);
> +       img_hash_write(hdev, CR_RESET, CR_RESET_UNSET);
> +       img_hash_write(hdev, CR_INTENAB, CR_INT_NEW_RESULTS_SET);
> +
> +       nbits = (hdev->req->nbytes << 3);
> +       u = nbits >> 32;
> +       l = nbits;
> +       img_hash_write(hdev, CR_MESSAGE_LENGTH_H, u);
> +       img_hash_write(hdev, CR_MESSAGE_LENGTH_L, l);
> +
> +       if (!(DRIVER_FLAGS_INIT & hdev->flags)) {
> +               hdev->flags |= DRIVER_FLAGS_INIT;
> +               hdev->err = 0;
> +       }
> +       dev_dbg(hdev->dev, "hw initialized, nbits: %llx\n", nbits);
> +       return 0;
> +}
> +
> +static int img_hash_init(struct ahash_request *req)
> +{
> +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> +       rctx->fallback_req.base.flags = req->base.flags
> +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> +
> +       return crypto_ahash_init(&rctx->fallback_req);
> +}
> +
> +static int img_hash_handle_queue(struct img_hash_dev *hdev,
> +                                struct ahash_request *req)
> +{
> +       struct crypto_async_request *async_req, *backlog;
> +       struct img_hash_request_ctx *ctx;
> +       unsigned long flags;
> +       int err = 0, res = 0;
> +
> +       spin_lock_irqsave(&hdev->lock, flags);
> +
> +       if (req)
> +               res = ahash_enqueue_request(&hdev->queue, req);
> +
> +       if (DRIVER_FLAGS_BUSY & hdev->flags) {
> +               spin_unlock_irqrestore(&hdev->lock, flags);
> +               return res;
> +       }
> +
> +       backlog = crypto_get_backlog(&hdev->queue);
> +       async_req = crypto_dequeue_request(&hdev->queue);
> +       if (async_req)
> +               hdev->flags |= DRIVER_FLAGS_BUSY;
> +
> +       spin_unlock_irqrestore(&hdev->lock, flags);
> +
> +       if (!async_req)
> +               return res;
> +
> +       if (backlog)
> +               backlog->complete(backlog, -EINPROGRESS);
> +
> +       req = ahash_request_cast(async_req);
> +       hdev->req = req;
> +
> +       ctx = ahash_request_ctx(req);
> +
> +       dev_info(hdev->dev, "processing req, op: %lu, bytes: %d\n",
> +                ctx->op, req->nbytes);
> +
> +       err = img_hash_hw_init(hdev);
> +
> +       if (!err)
> +               err = img_hash_process_data(hdev);
> +
> +       if (err != -EINPROGRESS) {
> +               /* done_task will not finish so do it here */
> +               img_hash_finish_req(req, err);
> +       }
> +       return res;
> +}
> +
> +static int img_hash_update(struct ahash_request *req)
> +{
> +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> +       rctx->fallback_req.base.flags = req->base.flags
> +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> +       rctx->fallback_req.nbytes = req->nbytes;
> +       rctx->fallback_req.src = req->src;
> +
> +       return crypto_ahash_update(&rctx->fallback_req);
> +}
> +
> +static int img_hash_final(struct ahash_request *req)
> +{
> +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> +       rctx->fallback_req.base.flags = req->base.flags
> +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> +       rctx->fallback_req.result = req->result;
> +
> +       return crypto_ahash_final(&rctx->fallback_req);
> +}
> +
> +static int img_hash_finup(struct ahash_request *req)
> +{
> +       struct img_hash_request_ctx *rctx = ahash_request_ctx(req);
> +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> +       struct img_hash_ctx *ctx = crypto_ahash_ctx(tfm);
> +
> +       ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback);
> +       rctx->fallback_req.base.flags = req->base.flags
> +               & CRYPTO_TFM_REQ_MAY_SLEEP;
> +       rctx->fallback_req.nbytes = req->nbytes;
> +       rctx->fallback_req.src = req->src;
> +       rctx->fallback_req.result = req->result;
> +
> +       return crypto_ahash_finup(&rctx->fallback_req);
> +}
> +
> +static int img_hash_digest(struct ahash_request *req)
> +{
> +       struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
> +       struct img_hash_ctx *tctx = crypto_ahash_ctx(tfm);
> +       struct img_hash_request_ctx *ctx = ahash_request_ctx(req);
> +       struct img_hash_dev *hdev = NULL;
> +       struct img_hash_dev *tmp;
> +       int err;
> +
> +       spin_lock_bh(&img_hash.lock);

Why spin_{lock,unlock}_bh() here?

> +       if (!tctx->hdev) {
> +               list_for_each_entry(tmp, &img_hash.dev_list, list) {
> +                       hdev = tmp;
> +                       break;
> +               }
> +               tctx->hdev = hdev;
> +
> +       } else {
> +               hdev = tctx->hdev;
> +       }
> +
> +       spin_unlock_bh(&img_hash.lock);
> +       ctx->hdev = hdev;
> +       ctx->flags = 0;
> +       ctx->digsize = crypto_ahash_digestsize(tfm);
> +
> +       switch (ctx->digsize) {
> +       case SHA1_DIGEST_SIZE:
> +               ctx->flags |= DRIVER_FLAGS_SHA1;
> +               break;
> +       case SHA256_DIGEST_SIZE:
> +               ctx->flags |= DRIVER_FLAGS_SHA256;
> +               break;
> +       case SHA224_DIGEST_SIZE:
> +               ctx->flags |= DRIVER_FLAGS_SHA224;
> +               break;
> +       case MD5_DIGEST_SIZE:
> +               ctx->flags |= DRIVER_FLAGS_MD5;
> +               break;
> +       default:
> +               return -EINVAL;
> +       }
> +
> +       ctx->bufcnt = 0;
> +       ctx->offset = 0;
> +       ctx->sent = 0;
> +       ctx->total = req->nbytes;
> +       ctx->sg = req->src;
> +       ctx->sgfirst = req->src;
> +       ctx->nents = sg_nents(ctx->sg);
> +
> +       err = img_hash_handle_queue(tctx->hdev, req);
> +
> +       return err;
> +}
> +
> +static int img_hash_cra_init(struct crypto_tfm *tfm)
> +{
> +       struct img_hash_ctx *ctx = crypto_tfm_ctx(tfm);
> +       const char *alg_name = crypto_tfm_alg_name(tfm);
> +       int err = -ENOMEM;
> +
> +       ctx->fallback = crypto_alloc_ahash(alg_name, 0,
> +                                          CRYPTO_ALG_NEED_FALLBACK);
> +       if (IS_ERR(ctx->fallback)) {
> +               pr_err("img_hash: Could not load fallback driver.\n");
> +               err = PTR_ERR(ctx->fallback);
> +               goto err;
> +       }
> +       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
> +                                sizeof(struct img_hash_request_ctx) +
> +                                IMG_HASH_DMA_THRESHOLD);
> +
> +       return 0;
> +
> +err:
> +       return err;
> +}
> +
> +static void img_hash_cra_exit(struct crypto_tfm *tfm)
> +{
> +       struct img_hash_ctx *tctx = crypto_tfm_ctx(tfm);
> +
> +       crypto_free_ahash(tctx->fallback);
> +}
> +
> +static irqreturn_t img_irq_handler(int irq, void *dev_id)
> +{
> +       struct img_hash_dev *hdev = dev_id;
> +       u32 reg;
> +
> +       reg = img_hash_read(hdev, CR_INTSTAT);
> +       img_hash_write(hdev, CR_INTCLEAR, reg);
> +
> +       if (reg & CR_INT_NEW_RESULTS_SET) {
> +               dev_dbg(hdev->dev, "IRQ CR_INT_NEW_RESULTS_SET\n");
> +               if (DRIVER_FLAGS_BUSY & hdev->flags) {
> +                       hdev->flags |= DRIVER_FLAGS_OUTPUT_READY;
> +                       if (!(DRIVER_FLAGS_CPU & hdev->flags))
> +                               hdev->flags |= DRIVER_FLAGS_DMA_READY;
> +                       tasklet_schedule(&hdev->done_task);

Since this done_task only gets scheduled from here, why not make this
a threaded IRQ handler and just do the work here instead of separating
it between a hard IRQ handler and a tasklet?

> +               } else {
> +                       dev_warn(hdev->dev,
> +                                "HASH interrupt when no active requests.\n");
> +               }
> +       } else if (reg & CR_INT_RESULTS_AVAILABLE) {
> +               dev_warn(hdev->dev,
> +                        "IRQ triggered before the hash had completed\n");
> +       } else if (reg & CR_INT_RESULT_READ_ERR) {
> +               dev_warn(hdev->dev,
> +                        "Attempt to read from an empty result queue\n");
> +       } else if (reg & CR_INT_MESSAGE_WRITE_ERROR) {
> +               dev_warn(hdev->dev,
> +                        "Data written before the hardware was configured\n");
> +       }
> +       return IRQ_HANDLED;
> +}
> +
> +static struct ahash_alg img_algs[] = {
> +       {
> +               .init = img_hash_init,
> +               .update = img_hash_update,
> +               .final = img_hash_final,
> +               .finup = img_hash_finup,
> +               .digest = img_hash_digest,
> +               .halg = {
> +                       .digestsize = MD5_DIGEST_SIZE,
> +                       .base = {
> +                               .cra_name = "md5",
> +                               .cra_driver_name = "img-md5",
> +                               .cra_priority = 301,
> +                               .cra_flags =
> +                               CRYPTO_ALG_ASYNC |
> +                               CRYPTO_ALG_NEED_FALLBACK,
> +                               .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
> +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> +                               .cra_init = img_hash_cra_init,
> +                               .cra_exit = img_hash_cra_exit,
> +                               .cra_module = THIS_MODULE,
> +                       }
> +               }
> +       },
> +       {
> +               .init = img_hash_init,
> +               .update = img_hash_update,
> +               .final = img_hash_final,
> +               .finup = img_hash_finup,
> +               .digest = img_hash_digest,
> +               .halg = {
> +                       .digestsize = SHA1_DIGEST_SIZE,
> +                       .base = {
> +                               .cra_name = "sha1",
> +                               .cra_driver_name = "img-sha1",
> +                               .cra_priority = 3000,
> +                               .cra_flags =
> +                               CRYPTO_ALG_ASYNC |
> +                               CRYPTO_ALG_NEED_FALLBACK,
> +                               .cra_blocksize = SHA1_BLOCK_SIZE,
> +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> +                               .cra_init = img_hash_cra_init,
> +                               .cra_exit = img_hash_cra_exit,
> +                               .cra_module = THIS_MODULE,
> +                       }
> +               }
> +       },
> +       {
> +               .init = img_hash_init,
> +               .update = img_hash_update,
> +               .final = img_hash_final,
> +               .finup = img_hash_finup,
> +               .digest = img_hash_digest,
> +               .halg = {
> +                       .digestsize = SHA224_DIGEST_SIZE,
> +                       .base = {
> +                               .cra_name = "sha224",
> +                               .cra_driver_name = "img-sha224",
> +                               .cra_priority = 3000,
> +                               .cra_flags =
> +                               CRYPTO_ALG_ASYNC |
> +                               CRYPTO_ALG_NEED_FALLBACK,
> +                               .cra_blocksize = SHA224_BLOCK_SIZE,
> +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> +                               .cra_init = img_hash_cra_init,
> +                               .cra_exit = img_hash_cra_exit,
> +                               .cra_module = THIS_MODULE,
> +                       }
> +               }
> +       },
> +       {
> +               .init = img_hash_init,
> +               .update = img_hash_update,
> +               .final = img_hash_final,
> +               .finup = img_hash_finup,
> +               .digest = img_hash_digest,
> +               .halg = {
> +                       .digestsize = SHA256_DIGEST_SIZE,
> +                       .base = {
> +                               .cra_name = "sha256",
> +                               .cra_driver_name = "img-sha256",
> +                               .cra_priority = 301,
> +                               .cra_flags =
> +                               CRYPTO_ALG_ASYNC |
> +                               CRYPTO_ALG_NEED_FALLBACK,
> +                               .cra_blocksize = SHA256_BLOCK_SIZE,
> +                               .cra_ctxsize = sizeof(struct img_hash_ctx),
> +                               .cra_init = img_hash_cra_init,
> +                               .cra_exit = img_hash_cra_exit,
> +                               .cra_module = THIS_MODULE,
> +                       }
> +               }
> +       }
> +};
> +
> +static int img_register_algs(struct img_hash_dev *hdev)
> +{
> +       int i, err;
> +
> +       for (i = 0; i < ARRAY_SIZE(img_algs); i++) {
> +               err = crypto_register_ahash(&img_algs[i]);
> +               if (err)
> +                       goto err_reg;
> +       }
> +       return 0;
> +
> +err_reg:
> +       for (; i--; )
> +               crypto_unregister_ahash(&img_algs[i]);
> +
> +       return err;
> +}
> +
> +static int img_unregister_algs(struct img_hash_dev *hdev)
> +{
> +       int i;
> +
> +       for (i = 0; i < ARRAY_SIZE(img_algs); i++)
> +               crypto_unregister_ahash(&img_algs[i]);
> +       return 0;
> +}
> +
> +static void img_hash_done_task(unsigned long data)
> +{
> +       struct img_hash_dev *hdev = (struct img_hash_dev *)data;
> +       int err = 0;
> +
> +       if (hdev->err == -EINVAL) {
> +               err = hdev->err;
> +               goto finish;
> +       }
> +
> +       if (!(DRIVER_FLAGS_BUSY & hdev->flags)) {
> +               img_hash_handle_queue(hdev, NULL);
> +               return;
> +       }
> +
> +       if (DRIVER_FLAGS_CPU & hdev->flags) {
> +               if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> +                       hdev->flags &= ~DRIVER_FLAGS_OUTPUT_READY;
> +                       goto finish;
> +               }
> +       } else if (DRIVER_FLAGS_DMA_READY & hdev->flags) {
> +               if (DRIVER_FLAGS_DMA_ACTIVE & hdev->flags) {
> +                       hdev->flags &= ~DRIVER_FLAGS_DMA_ACTIVE;
> +                       img_hash_write_via_dma_stop(hdev);
> +                       if (hdev->err) {
> +                               err = hdev->err;
> +                               goto finish;
> +                       }
> +               }
> +               if (DRIVER_FLAGS_OUTPUT_READY & hdev->flags) {
> +                       hdev->flags &= ~(DRIVER_FLAGS_DMA_READY |
> +                                       DRIVER_FLAGS_OUTPUT_READY);
> +                       goto finish;
> +               }
> +       }
> +       return;
> +
> +finish:
> +       img_hash_finish_req(hdev->req, err);
> +}
> +
> +static const struct of_device_id img_hash_match[] = {
> +       { .compatible = "img,hash-accelerator" },
> +       {}
> +};
> +MODULE_DEVICE_TABLE(of, img_hash_match)
> +
> +static int img_hash_probe(struct platform_device *pdev)
> +{
> +       struct img_hash_dev *hdev;
> +       struct device *dev = &pdev->dev;
> +       struct resource *hash_res;
> +       int err;
> +
> +       hdev = devm_kzalloc(dev, sizeof(*hdev), GFP_KERNEL);
> +       if (hdev == NULL) {
> +               err = -ENOMEM;
> +               goto sha_dev_err;

Why not just "return -ENOMEM" here?  There should be no cleanup
necessary at this point?

> +       }
> +       spin_lock_init(&hdev->lock);
> +
> +       hdev->dev = dev;
> +
> +       platform_set_drvdata(pdev, hdev);
> +
> +       INIT_LIST_HEAD(&hdev->list);
> +
> +       tasklet_init(&hdev->done_task, img_hash_done_task, (unsigned long)hdev);
> +       tasklet_init(&hdev->dma_task, img_hash_dma_task, (unsigned long)hdev);
> +
> +       crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
> +
> +       /* Register bank */
> +       hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +
> +       hdev->io_base = devm_ioremap_resource(dev, hash_res);
> +       if (IS_ERR(hdev->io_base)) {
> +               dev_err(dev, "can't ioremap\n");

nit: When printing error messages, it's helpful to print the error code as well.

> +               err = PTR_ERR(hdev->io_base);
> +               goto hash_io_err;
> +       }
> +
> +       /* Write port (DMA or CPU) */
> +       hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +       if (!hash_res) {
> +               dev_err(dev, "no write port resource info\n");
> +               err = -ENODEV;
> +               goto res_err;
> +       }
> +       hdev->bus_addr = hash_res->start;

Maybe set this after devm_ioremap_resource() succeeds and avoid the
extra error check?

> +       hdev->cpu_addr = devm_ioremap_resource(dev, hash_res);
> +       if (IS_ERR(hdev->cpu_addr)) {
> +               dev_err(dev, "can't ioremap write port\n");
> +               err = PTR_ERR(hdev->cpu_addr);
> +               goto res_err;
> +       }
> +
> +       hdev->irq = platform_get_irq(pdev, 0);
> +       if (hdev->irq < 0) {
> +               dev_err(dev, "no IRQ resource info\n");
> +               err = hdev->irq;
> +               goto res_err;
> +       }
> +
> +       err = devm_request_irq(dev, hdev->irq, img_irq_handler, 0,
> +                              dev_name(dev), hdev);
> +       if (err) {
> +               dev_err(dev, "unable to request %s irq\n", dev_name(dev));

dev_* already prints dev_name().

> +               goto res_err;
> +       }
> +       dev_dbg(dev, "using IRQ channel %d\n", hdev->irq);
> +
> +       hdev->iclk = devm_clk_get(&pdev->dev, "hash_clk");
> +       if (IS_ERR(hdev->iclk)) {
> +               dev_err(dev, "clock initialization failed.\n");
> +               err = PTR_ERR(hdev->iclk);
> +               goto res_err;
> +       }
> +
> +       hdev->fclk = devm_clk_get(&pdev->dev, "hash_reg_clk");

I don't see this clock getting enabled anywhere.

> +       if (IS_ERR(hdev->fclk)) {
> +               dev_err(dev, "clock initialization failed.\n");
> +               err = PTR_ERR(hdev->fclk);
> +               goto res_err;
> +       }
> +
> +       err = img_hash_dma_init(hdev);
> +       if (err)
> +               goto err_dma;
> +
> +       dev_dbg(dev, "using %s for DMA transfers\n",
> +               dma_chan_name(hdev->dma_lch));
> +
> +       spin_lock(&img_hash.lock);
> +       list_add_tail(&hdev->list, &img_hash.dev_list);
> +       spin_unlock(&img_hash.lock);
> +
> +       err = img_register_algs(hdev);
> +       if (err)
> +               goto err_algs;
> +       dev_info(dev, "data bus: 0x%x;\tsize:0x%x\n", hdev->bus_addr, 0x4);

dev_dbg()? (or maybe not at all)

> +       dev_info(dev, "Img MD5/SHA1/SHA224/SHA256 Hardware accelerator initialized\n");
> +
> +       return 0;
> +
> +err_algs:
> +       spin_lock(&img_hash.lock);
> +       list_del(&hdev->list);
> +       spin_unlock(&img_hash.lock);
> +       dma_release_channel(hdev->dma_lch);
> +err_dma:
> +hash_io_err:
> +       devm_clk_put(dev, hdev->iclk);
> +       devm_clk_put(dev, hdev->fclk);

Since the clocks were acquired via the devm_* API, there's no need to
explicitly put() them.

> +res_err:
> +       tasklet_kill(&hdev->done_task);
> +       tasklet_kill(&hdev->dma_task);
> +sha_dev_err:
> +       dev_err(dev, "initialization failed.\n");

The driver core will print an error message if a device probe fails
(for reasons other than -ENODEV, at least), so there's no need to
print here.

> +       return err;
> +}
> +
> +static int img_hash_remove(struct platform_device *pdev)
> +{
> +       static struct img_hash_dev *hdev;
> +
> +       hdev = platform_get_drvdata(pdev);
> +       spin_lock(&img_hash.lock);
> +       list_del(&hdev->list);
> +       spin_unlock(&img_hash.lock);
> +
> +       img_unregister_algs(hdev);
> +
> +       tasklet_kill(&hdev->done_task);
> +       tasklet_kill(&hdev->dma_task);
> +
> +       dma_release_channel(hdev->dma_lch);
> +
> +       clk_disable_unprepare(hdev->iclk);
> +       clk_disable_unprepare(hdev->fclk);

Unbalanced prepare_enable()/disable_unprepare().

> +       return 0;
> +}
> +
> +static struct platform_driver img_hash_driver = {
> +       .probe          = img_hash_probe,
> +       .remove         = img_hash_remove,
> +       .driver         = {
> +               .name   = "img-hash-accelerator",
> +               .of_match_table = of_match_ptr(img_hash_match),
> +       }
> +};
> +module_platform_driver(img_hash_driver);
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_DESCRIPTION("Imgtec SHA1/224/256 & MD5 hw accelerator driver");
> +MODULE_AUTHOR("Will Thomas.");
> +MODULE_AUTHOR("James Hartley.");

It's helpful to include email addresses in the MODULE_AUTHOR tags.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2015-03-12  0:13 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-06  2:58 [PATCH V3 0/2] Add support for the IMG hash accelerator James Hartley
2015-03-06  2:58 ` [PATCH V3 1/2] crypto: Add Imagination Technologies hw " James Hartley
2015-03-09  6:35   ` Stephan Mueller
2015-03-10  9:37     ` Herbert Xu
2015-03-10  9:53       ` James Hartley
2015-03-10  9:54       ` James Hartley
2015-03-10  9:55         ` Herbert Xu
2015-03-10 12:30           ` James Hartley
2015-03-06  2:58 ` [PATCH V3 2/2] Documentation: crypto: Add DT binding info for the img " James Hartley
     [not found] <1425610912-9930-1-git-send-email-james.hartley@imgtec.com>
     [not found] ` <1425610912-9930-2-git-send-email-james.hartley@imgtec.com>
2015-03-09  5:42   ` [PATCH V3 1/2] crypto: Add Imagination Technologies " Andrew Bresticker
2015-03-10 15:31     ` James Hartley
2015-03-10 18:02       ` Andrew Bresticker
2015-03-12  0:13         ` James Hartley

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.