linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 0/5] crypto: hisilicon - Improve SEC performance
@ 2020-03-02  6:15 Zaibo Xu
  2020-03-02  6:15 ` [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp Zaibo Xu
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Zaibo Xu @ 2020-03-02  6:15 UTC (permalink / raw)
  To: herbert, davem
  Cc: linux-crypto, linuxarm, wangzhou1, tanghui20, yekai13,
	liulongfang, qianweili, zhangwei375, fanghao11, forest.zhouchang

From: liulongfang <liulongfang@huawei.com>

Improve SEC throughput by allocating a workqueue for each device
instead of one workqueue for all SEC devices. What's more,
when IOMMU translation is turned on, the plat buffer (pbuffer)
will be reserved for small packets (<512Bytes) to
which small packets are copied. This can avoid DMA mapping on
user small packets and improve performance.

This series is based on:
git://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git

Changes v1 -> v2:
	- Split pbuf patch into two patches.
	- Move 'use_pbuf' from 'qp_ctx' to TFM request.
	- Misc fixes on coding style.

Shukun Tan (1):
  crypto: hisilicon - Use one workqueue per qm instead of per qp

liulongfang (3):
  crypto: hisilicon/sec2 - Add iommu status check
  crypto: hisilicon/sec2 - Update IV and MAC operation
  crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver

yekai13 (1):
  crypto: hisilicon/sec2 - Add workqueue for SEC driver.

 drivers/crypto/hisilicon/qm.c              |  38 ++---
 drivers/crypto/hisilicon/qm.h              |   5 +-
 drivers/crypto/hisilicon/sec2/sec.h        |   7 +
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 242 ++++++++++++++++++++++++-----
 drivers/crypto/hisilicon/sec2/sec_main.c   |  45 +++++-
 5 files changed, 274 insertions(+), 63 deletions(-)

-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp
  2020-03-02  6:15 [PATCH v2 0/5] crypto: hisilicon - Improve SEC performance Zaibo Xu
@ 2020-03-02  6:15 ` Zaibo Xu
  2020-03-02 11:39   ` Jonathan Cameron
  2020-03-02  6:15 ` [PATCH v2 2/5] crypto: hisilicon/sec2 - Add workqueue for SEC driver Zaibo Xu
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Zaibo Xu @ 2020-03-02  6:15 UTC (permalink / raw)
  To: herbert, davem
  Cc: linux-crypto, linuxarm, wangzhou1, tanghui20, yekai13,
	liulongfang, qianweili, zhangwei375, fanghao11, forest.zhouchang

From: Shukun Tan <tanshukun1@huawei.com>

Because so many work queues are not needed. Using one workqueue
per QM will reduce the number of kworker threads as well as
reducing usage of CPU.This would not degrade any performance.

Signed-off-by: Shukun Tan <tanshukun1@huawei.com>
---
 drivers/crypto/hisilicon/qm.c | 38 +++++++++++++++-----------------------
 drivers/crypto/hisilicon/qm.h |  5 +++--
 2 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index ad7146a..13b0a6f 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -494,17 +494,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
 	}
 }
 
-static void qm_qp_work_func(struct work_struct *work)
+static void qm_work_process(struct work_struct *work)
 {
-	struct hisi_qp *qp;
-
-	qp = container_of(work, struct hisi_qp, work);
-	qm_poll_qp(qp, qp->qm);
-}
-
-static irqreturn_t qm_irq_handler(int irq, void *data)
-{
-	struct hisi_qm *qm = data;
+	struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
 	struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
 	struct hisi_qp *qp;
 	int eqe_num = 0;
@@ -513,7 +505,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
 		eqe_num++;
 		qp = qm_to_hisi_qp(qm, eqe);
 		if (qp)
-			queue_work(qp->wq, &qp->work);
+			qm_poll_qp(qp, qm);
 
 		if (qm->status.eq_head == QM_Q_DEPTH - 1) {
 			qm->status.eqc_phase = !qm->status.eqc_phase;
@@ -531,6 +523,16 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
 	}
 
 	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+}
+
+static irqreturn_t do_qm_irq(int irq, void *data)
+{
+	struct hisi_qm *qm = (struct hisi_qm *)data;
+
+	if (qm->wq)
+		queue_work(qm->wq, &qm->work);
+	else
+		schedule_work(&qm->work);
 
 	return IRQ_HANDLED;
 }
@@ -540,7 +542,7 @@ static irqreturn_t qm_irq(int irq, void *data)
 	struct hisi_qm *qm = data;
 
 	if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
-		return qm_irq_handler(irq, data);
+		return do_qm_irq(irq, data);
 
 	dev_err(&qm->pdev->dev, "invalid int source\n");
 	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
@@ -1159,20 +1161,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
 
 	qp->qp_id = qp_id;
 	qp->alg_type = alg_type;
-	INIT_WORK(&qp->work, qm_qp_work_func);
-	qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI |
-				 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0);
-	if (!qp->wq) {
-		ret = -EFAULT;
-		goto err_free_qp_mem;
-	}
 
 	return qp;
 
-err_free_qp_mem:
-	if (qm->use_dma_api)
-		dma_free_coherent(dev, qp->qdma.size, qp->qdma.va,
-				  qp->qdma.dma);
 err_clear_bit:
 	write_lock(&qm->qps_lock);
 	qm->qp_array[qp_id] = NULL;
@@ -1704,6 +1695,7 @@ int hisi_qm_init(struct hisi_qm *qm)
 	qm->qp_in_used = 0;
 	mutex_init(&qm->mailbox_lock);
 	rwlock_init(&qm->qps_lock);
+	INIT_WORK(&qm->work, qm_work_process);
 
 	dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf",
 		qm->use_dma_api ? "dma api" : "iommu api");
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 1a4f208..c72c2e6 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -183,6 +183,9 @@ struct hisi_qm {
 	u32 error_mask;
 	u32 msi_mask;
 
+	struct workqueue_struct *wq;
+	struct work_struct work;
+
 	const char *algs;
 	bool use_dma_api;
 	bool use_sva;
@@ -219,8 +222,6 @@ struct hisi_qp {
 	void *qp_ctx;
 	void (*req_cb)(struct hisi_qp *qp, void *data);
 	void (*event_cb)(struct hisi_qp *qp);
-	struct work_struct work;
-	struct workqueue_struct *wq;
 
 	struct hisi_qm *qm;
 	u16 pasid;
-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 2/5] crypto: hisilicon/sec2 - Add workqueue for SEC driver.
  2020-03-02  6:15 [PATCH v2 0/5] crypto: hisilicon - Improve SEC performance Zaibo Xu
  2020-03-02  6:15 ` [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp Zaibo Xu
@ 2020-03-02  6:15 ` Zaibo Xu
  2020-03-02 11:51   ` Jonathan Cameron
  2020-03-02  6:15 ` [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check Zaibo Xu
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Zaibo Xu @ 2020-03-02  6:15 UTC (permalink / raw)
  To: herbert, davem
  Cc: linux-crypto, linuxarm, wangzhou1, tanghui20, yekai13,
	liulongfang, qianweili, zhangwei375, fanghao11, forest.zhouchang

From: yekai13 <yekai13@huawei.com>

Allocate one workqueue for each QM instead of one for all QMs,
we found the throughput of SEC engine can be increased to
the hardware limit throughput during testing sec2 performance.
so we added this scheme.

Signed-off-by: yekai13 <yekai13@huawei.com>
Signed-off-by: liulongfang <liulongfang@huawei.com>
---
 drivers/crypto/hisilicon/sec2/sec_main.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 3767fdb..ebafc1c 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -774,12 +774,24 @@ static void sec_qm_uninit(struct hisi_qm *qm)
 
 static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
 {
+	int ret;
+
+	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE |
+		WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(),
+		pci_name(qm->pdev));
+	if (!qm->wq) {
+		pci_err(qm->pdev, "fail to alloc workqueue\n");
+		return -ENOMEM;
+	}
+
 	if (qm->fun_type == QM_HW_PF) {
 		qm->qp_base = SEC_PF_DEF_Q_BASE;
 		qm->qp_num = pf_q_num;
 		qm->debug.curr_qm_qp_num = pf_q_num;
 
-		return sec_pf_probe_init(sec);
+		ret = sec_pf_probe_init(sec);
+		if (ret)
+			goto err_probe_uninit;
 	} else if (qm->fun_type == QM_HW_VF) {
 		/*
 		 * have no way to get qm configure in VM in v1 hardware,
@@ -792,18 +804,26 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
 			qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
 		} else if (qm->ver == QM_HW_V2) {
 			/* v2 starts to support get vft by mailbox */
-			return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+			ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+			if (ret)
+				goto err_probe_uninit;
 		}
 	} else {
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_probe_uninit;
 	}
 
 	return 0;
+err_probe_uninit:
+	destroy_workqueue(qm->wq);
+	return ret;
 }
 
 static void sec_probe_uninit(struct hisi_qm *qm)
 {
 	hisi_qm_dev_err_uninit(qm);
+
+	destroy_workqueue(qm->wq);
 }
 
 static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check
  2020-03-02  6:15 [PATCH v2 0/5] crypto: hisilicon - Improve SEC performance Zaibo Xu
  2020-03-02  6:15 ` [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp Zaibo Xu
  2020-03-02  6:15 ` [PATCH v2 2/5] crypto: hisilicon/sec2 - Add workqueue for SEC driver Zaibo Xu
@ 2020-03-02  6:15 ` Zaibo Xu
  2020-03-02 11:54   ` Jonathan Cameron
  2020-03-02  6:15 ` [PATCH v2 4/5] crypto: hisilicon/sec2 - Update IV and MAC operation Zaibo Xu
  2020-03-02  6:15 ` [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver Zaibo Xu
  4 siblings, 1 reply; 18+ messages in thread
From: Zaibo Xu @ 2020-03-02  6:15 UTC (permalink / raw)
  To: herbert, davem
  Cc: linux-crypto, linuxarm, wangzhou1, tanghui20, yekai13,
	liulongfang, qianweili, zhangwei375, fanghao11, forest.zhouchang

From: liulongfang <liulongfang@huawei.com>

In order to improve performance of small packets (<512Bytes)
in SMMU translation scenario,We need to identify the type of IOMMU
in the SEC probe to process small packets by a different method.

Signed-off-by: liulongfang <liulongfang@huawei.com>
Reviewed-by: Zaibo Xu <xuzaibo@huawei.com>
---
 drivers/crypto/hisilicon/sec2/sec.h      |  1 +
 drivers/crypto/hisilicon/sec2/sec_main.c | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 13e2d8d..eab0d22 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -165,6 +165,7 @@ struct sec_dev {
 	struct list_head list;
 	struct sec_debug debug;
 	u32 ctx_q_num;
+	bool iommu_used;
 	u32 num_vfs;
 	unsigned long status;
 };
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index ebafc1c..6466d90 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -7,6 +7,7 @@
 #include <linux/debugfs.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -826,6 +827,23 @@ static void sec_probe_uninit(struct hisi_qm *qm)
 	destroy_workqueue(qm->wq);
 }
 
+static void sec_iommu_used_check(struct sec_dev *sec)
+{
+	struct iommu_domain *domain;
+	struct device *dev = &sec->qm.pdev->dev;
+
+	domain = iommu_get_domain_for_dev(dev);
+
+	/* Check if iommu is used */
+	sec->iommu_used = false;
+	if (domain) {
+		if (domain->type & __IOMMU_DOMAIN_PAGING)
+			sec->iommu_used = true;
+		dev_info(dev, "SMMU Opened, the iommu type = %u\n",
+			domain->type);
+	}
+}
+
 static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct sec_dev *sec;
@@ -839,6 +857,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, sec);
 
 	sec->ctx_q_num = ctx_q_num;
+	sec_iommu_used_check(sec);
 
 	qm = &sec->qm;
 
-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 4/5] crypto: hisilicon/sec2 - Update IV and MAC operation
  2020-03-02  6:15 [PATCH v2 0/5] crypto: hisilicon - Improve SEC performance Zaibo Xu
                   ` (2 preceding siblings ...)
  2020-03-02  6:15 ` [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check Zaibo Xu
@ 2020-03-02  6:15 ` Zaibo Xu
  2020-03-02 11:58   ` Jonathan Cameron
  2020-03-02  6:15 ` [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver Zaibo Xu
  4 siblings, 1 reply; 18+ messages in thread
From: Zaibo Xu @ 2020-03-02  6:15 UTC (permalink / raw)
  To: herbert, davem
  Cc: linux-crypto, linuxarm, wangzhou1, tanghui20, yekai13,
	liulongfang, qianweili, zhangwei375, fanghao11, forest.zhouchang

From: liulongfang <liulongfang@huawei.com>

We have updated the operation method of IV and MAC address
to prepare for pbuf patch.

Signed-off-by: liulongfang <liulongfang@huawei.com>
Signed-off-by: Zaibo Xu <xuzaibo@huawei.com>
---
 drivers/crypto/hisilicon/sec2/sec.h        |  2 +
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 73 +++++++++++++++++-------------
 2 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index eab0d22..e67b416 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -23,6 +23,8 @@ struct sec_cipher_req {
 	dma_addr_t c_in_dma;
 	struct hisi_acc_hw_sgl *c_out;
 	dma_addr_t c_out_dma;
+	u8 *c_ivin;
+	dma_addr_t c_ivin_dma;
 	struct skcipher_request *sk_req;
 	u32 c_len;
 	bool encrypt;
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index acd1550..1eeaa74 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -46,6 +46,7 @@
 #define SEC_CIPHER_AUTH		0xfe
 #define SEC_AUTH_CIPHER		0x1
 #define SEC_MAX_MAC_LEN		64
+#define SEC_MAX_AAD_LEN		65535
 #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
 #define SEC_SQE_LEN_RATE	4
 #define SEC_SQE_CFLAG		2
@@ -110,12 +111,12 @@ static void sec_free_req_id(struct sec_req *req)
 	mutex_unlock(&qp_ctx->req_lock);
 }
 
-static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
+static int sec_aead_verify(struct sec_req *req)
 {
 	struct aead_request *aead_req = req->aead_req.aead_req;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
-	u8 *mac_out = qp_ctx->res[req->req_id].out_mac;
 	size_t authsize = crypto_aead_authsize(tfm);
+	u8 *mac_out = req->aead_req.out_mac;
 	u8 *mac = mac_out + SEC_MAX_MAC_LEN;
 	struct scatterlist *sgl = aead_req->src;
 	size_t sz;
@@ -163,7 +164,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 	}
 
 	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
-		err = sec_aead_verify(req, qp_ctx);
+		err = sec_aead_verify(req);
 
 	atomic64_inc(&ctx->sec->debug.dfx.recv_cnt);
 
@@ -259,11 +260,11 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
 	if (ctx->alg_type == SEC_AEAD) {
 		ret = sec_alloc_mac_resource(dev, res);
 		if (ret)
-			goto get_fail;
+			goto alloc_fail;
 	}
 
 	return 0;
-get_fail:
+alloc_fail:
 	sec_free_civ_resource(dev, res);
 
 	return ret;
@@ -590,11 +591,21 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
 GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
 GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
 
-static int sec_cipher_map(struct device *dev, struct sec_req *req,
+static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 			  struct scatterlist *src, struct scatterlist *dst)
 {
 	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_aead_req *a_req = &req->aead_req;
 	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	struct sec_alg_res *res = &qp_ctx->res[req->req_id];
+	struct device *dev = SEC_CTX_DEV(ctx);
+
+	c_req->c_ivin = res->c_ivin;
+	c_req->c_ivin_dma = res->c_ivin_dma;
+	if (ctx->alg_type == SEC_AEAD) {
+		a_req->out_mac = res->out_mac;
+		a_req->out_mac_dma = res->out_mac_dma;
+	}
 
 	c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
 						    qp_ctx->c_in_pool,
@@ -625,29 +636,30 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req,
 	return 0;
 }
 
-static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req,
+static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
 			     struct scatterlist *src, struct scatterlist *dst)
 {
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct device *dev = SEC_CTX_DEV(ctx);
+
 	if (dst != src)
-		hisi_acc_sg_buf_unmap(dev, src, req->c_in);
+		hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
 
-	hisi_acc_sg_buf_unmap(dev, dst, req->c_out);
+	hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
 }
 
 static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct skcipher_request *sq = req->c_req.sk_req;
 
-	return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst);
+	return sec_cipher_map(ctx, req, sq->src, sq->dst);
 }
 
 static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
 {
-	struct device *dev = SEC_CTX_DEV(ctx);
-	struct sec_cipher_req *c_req = &req->c_req;
-	struct skcipher_request *sk_req = c_req->sk_req;
+	struct skcipher_request *sq = req->c_req.sk_req;
 
-	sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst);
+	sec_cipher_unmap(ctx, req, sq->src, sq->dst);
 }
 
 static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx,
@@ -758,16 +770,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct aead_request *aq = req->aead_req.aead_req;
 
-	return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst);
+	return sec_cipher_map(ctx, req, aq->src, aq->dst);
 }
 
 static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
 {
-	struct device *dev = SEC_CTX_DEV(ctx);
-	struct sec_cipher_req *cq = &req->c_req;
 	struct aead_request *aq = req->aead_req.aead_req;
 
-	sec_cipher_unmap(dev, cq, aq->src, aq->dst);
+	sec_cipher_unmap(ctx, req, aq->src, aq->dst);
 }
 
 static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req)
@@ -800,9 +810,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req)
 static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct skcipher_request *sk_req = req->c_req.sk_req;
-	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+	struct sec_cipher_req *c_req = &req->c_req;
 
-	memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
+	memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
 }
 
 static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
@@ -817,8 +827,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 	memset(sec_sqe, 0, sizeof(struct sec_sqe));
 
 	sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
-	sec_sqe->type2.c_ivin_addr =
-		cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma);
+	sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
 	sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
 	sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
 
@@ -903,9 +912,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
 static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct aead_request *aead_req = req->aead_req.aead_req;
-	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
+	struct sec_cipher_req *c_req = &req->c_req;
 
-	memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
+	memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
 }
 
 static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
@@ -938,8 +947,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
 
 	sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
 
-	sec_sqe->type2.mac_addr =
-		cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma);
+	sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma);
 }
 
 static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
@@ -963,6 +971,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
 {
 	struct aead_request *a_req = req->aead_req.aead_req;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
+	struct sec_aead_req *aead_req = &req->aead_req;
 	struct sec_cipher_req *c_req = &req->c_req;
 	size_t authsize = crypto_aead_authsize(tfm);
 	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
@@ -978,7 +987,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
 		struct scatterlist *sgl = a_req->dst;
 
 		sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl),
-					  qp_ctx->res[req->req_id].out_mac,
+					  aead_req->out_mac,
 					  authsize, a_req->cryptlen +
 					  a_req->assoclen);
 
@@ -1030,6 +1039,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
 
 static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
 {
+	struct sec_cipher_req *c_req = &req->c_req;
 	int ret;
 
 	ret = sec_request_init(ctx, req);
@@ -1056,12 +1066,10 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
 	/* As failing, restore the IV from user */
 	if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) {
 		if (ctx->alg_type == SEC_SKCIPHER)
-			memcpy(req->c_req.sk_req->iv,
-			       req->qp_ctx->res[req->req_id].c_ivin,
+			memcpy(req->c_req.sk_req->iv, c_req->c_ivin,
 			       ctx->c_ctx.ivsize);
 		else
-			memcpy(req->aead_req.aead_req->iv,
-			       req->qp_ctx->res[req->req_id].c_ivin,
+			memcpy(req->aead_req.aead_req->iv, c_req->c_ivin,
 			       ctx->c_ctx.ivsize);
 	}
 
@@ -1320,7 +1328,8 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	size_t authsize = crypto_aead_authsize(tfm);
 
-	if (unlikely(!req->src || !req->dst || !req->cryptlen)) {
+	if (unlikely(!req->src || !req->dst || !req->cryptlen ||
+		req->assoclen > SEC_MAX_AAD_LEN)) {
 		dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n");
 		return -EINVAL;
 	}
-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver
  2020-03-02  6:15 [PATCH v2 0/5] crypto: hisilicon - Improve SEC performance Zaibo Xu
                   ` (3 preceding siblings ...)
  2020-03-02  6:15 ` [PATCH v2 4/5] crypto: hisilicon/sec2 - Update IV and MAC operation Zaibo Xu
@ 2020-03-02  6:15 ` Zaibo Xu
  2020-03-02 12:49   ` Jonathan Cameron
  4 siblings, 1 reply; 18+ messages in thread
From: Zaibo Xu @ 2020-03-02  6:15 UTC (permalink / raw)
  To: herbert, davem
  Cc: linux-crypto, linuxarm, wangzhou1, tanghui20, yekai13,
	liulongfang, qianweili, zhangwei375, fanghao11, forest.zhouchang

From: liulongfang <liulongfang@huawei.com>

In the scenario of SMMU translation, the SEC performance of short messages
(<512Bytes) cannot meet our expectations. To avoid this, we reserve the
plat buffer (PBUF) memory for small packets when creating TFM.

Signed-off-by: liulongfang <liulongfang@huawei.com>
Signed-off-by: Zaibo Xu <xuzaibo@huawei.com>
---
 drivers/crypto/hisilicon/sec2/sec.h        |   4 +
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 173 ++++++++++++++++++++++++++++-
 2 files changed, 172 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index e67b416..a73d82c 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -11,6 +11,8 @@
 
 /* Algorithm resource per hardware SEC queue */
 struct sec_alg_res {
+	u8 *pbuf;
+	dma_addr_t pbuf_dma;
 	u8 *c_ivin;
 	dma_addr_t c_ivin_dma;
 	u8 *out_mac;
@@ -50,6 +52,7 @@ struct sec_req {
 
 	/* Status of the SEC request */
 	bool fake_busy;
+	bool use_pbuf;
 };
 
 /**
@@ -130,6 +133,7 @@ struct sec_ctx {
 	atomic_t dec_qcyclic;
 
 	enum sec_alg_type alg_type;
+	bool pbuf_supported;
 	struct sec_cipher_ctx c_ctx;
 	struct sec_auth_ctx a_ctx;
 };
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 1eeaa74..3136ada 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -48,6 +48,19 @@
 #define SEC_MAX_MAC_LEN		64
 #define SEC_MAX_AAD_LEN		65535
 #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
+
+#define SEC_PBUF_SZ			512
+#define SEC_PBUF_IV_OFFSET		SEC_PBUF_SZ
+#define SEC_PBUF_MAC_OFFSET		(SEC_PBUF_SZ + SEC_IV_SIZE)
+#define SEC_PBUF_PKG		(SEC_PBUF_SZ + SEC_IV_SIZE +	\
+			SEC_MAX_MAC_LEN * 2)
+#define SEC_PBUF_NUM		(PAGE_SIZE / SEC_PBUF_PKG)
+#define SEC_PBUF_PAGE_NUM	(QM_Q_DEPTH / SEC_PBUF_NUM)
+#define SEC_PBUF_LEFT_SZ	(SEC_PBUF_PKG * (QM_Q_DEPTH -	\
+			SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
+#define SEC_TOTAL_PBUF_SZ	(PAGE_SIZE * SEC_PBUF_PAGE_NUM +	\
+			SEC_PBUF_LEFT_SZ)
+
 #define SEC_SQE_LEN_RATE	4
 #define SEC_SQE_CFLAG		2
 #define SEC_SQE_AEAD_FLAG	3
@@ -246,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
 				  res->out_mac, res->out_mac_dma);
 }
 
+static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+	if (res->pbuf)
+		dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
+				  res->pbuf, res->pbuf_dma);
+}
+
+/*
+ * To improve performance, pbuffer is used for
+ * small packets (< 512Bytes) as IOMMU translation using.
+ */
+static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+	int pbuf_page_offset;
+	int i, j, k;
+
+	res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
+				&res->pbuf_dma, GFP_KERNEL);
+	if (!res->pbuf)
+		return -ENOMEM;
+
+	/*
+	 * SEC_PBUF_PKG contains data pbuf, iv and
+	 * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
+	 * Every PAGE contains six SEC_PBUF_PKG
+	 * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
+	 * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
+	 * for the SEC_TOTAL_PBUF_SZ
+	 */
+	for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
+		pbuf_page_offset = PAGE_SIZE * i;
+		for (j = 0; j < SEC_PBUF_NUM; j++) {
+			k = i * SEC_PBUF_NUM + j;
+			if (k == QM_Q_DEPTH)
+				break;
+			res[k].pbuf = res->pbuf +
+				j * SEC_PBUF_PKG + pbuf_page_offset;
+			res[k].pbuf_dma = res->pbuf_dma +
+				j * SEC_PBUF_PKG + pbuf_page_offset;
+		}
+	}
+	return 0;
+}
+
 static int sec_alg_resource_alloc(struct sec_ctx *ctx,
 				  struct sec_qp_ctx *qp_ctx)
 {
@@ -262,6 +319,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
 		if (ret)
 			goto alloc_fail;
 	}
+	if (ctx->pbuf_supported) {
+		ret = sec_alloc_pbuf_resource(dev, res);
+		if (ret) {
+			dev_err(dev, "fail to alloc pbuf dma resource!\n");
+			goto alloc_fail;
+		}
+	}
 
 	return 0;
 alloc_fail:
@@ -279,6 +343,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
 
 	if (ctx->alg_type == SEC_AEAD)
 		sec_free_mac_resource(dev, qp_ctx->res);
+	if (ctx->pbuf_supported)
+		sec_free_pbuf_resource(dev, qp_ctx->res);
 }
 
 static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
@@ -369,6 +435,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
 	ctx->sec = sec;
 	ctx->hlf_q_num = sec->ctx_q_num >> 1;
 
+	ctx->pbuf_supported = ctx->sec->iommu_used;
+
 	/* Half of queue depth is taken as fake requests limit in the queue. */
 	ctx->fake_req_limit = QM_Q_DEPTH >> 1;
 	ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
@@ -591,6 +659,66 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
 GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
 GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
 
+static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
+			struct scatterlist *src)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	struct device *dev = SEC_CTX_DEV(ctx);
+	int copy_size, pbuf_length;
+	int req_id = req->req_id;
+
+	if (ctx->alg_type == SEC_AEAD)
+		copy_size = aead_req->cryptlen + aead_req->assoclen;
+	else
+		copy_size = c_req->c_len;
+
+	pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
+				qp_ctx->res[req_id].pbuf,
+				copy_size);
+
+	if (unlikely(pbuf_length != copy_size)) {
+		dev_err(dev, "copy src data to pbuf error!\n");
+		return -EINVAL;
+	}
+
+	c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
+
+	if (!c_req->c_in_dma) {
+		dev_err(dev, "fail to set pbuffer address!\n");
+		return -ENOMEM;
+	}
+
+	c_req->c_out_dma = c_req->c_in_dma;
+
+	return 0;
+}
+
+static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
+			struct scatterlist *dst)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+	struct device *dev = SEC_CTX_DEV(ctx);
+	int copy_size, pbuf_length;
+	int req_id = req->req_id;
+
+	if (ctx->alg_type == SEC_AEAD)
+		copy_size = c_req->c_len + aead_req->assoclen;
+	else
+		copy_size = c_req->c_len;
+
+	pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
+				qp_ctx->res[req_id].pbuf,
+				copy_size);
+
+	if (unlikely(pbuf_length != copy_size))
+		dev_err(dev, "copy pbuf data to dst error!\n");
+
+}
+
 static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 			  struct scatterlist *src, struct scatterlist *dst)
 {
@@ -599,7 +727,20 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
 	struct sec_alg_res *res = &qp_ctx->res[req->req_id];
 	struct device *dev = SEC_CTX_DEV(ctx);
+	int ret;
+
+	if (req->use_pbuf) {
+		ret = sec_cipher_pbuf_map(ctx, req, src);
+		c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
+		c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
+		if (ctx->alg_type == SEC_AEAD) {
+			a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
+			a_req->out_mac_dma = res->pbuf_dma +
+					SEC_PBUF_MAC_OFFSET;
+		}
 
+		return ret;
+	}
 	c_req->c_ivin = res->c_ivin;
 	c_req->c_ivin_dma = res->c_ivin_dma;
 	if (ctx->alg_type == SEC_AEAD) {
@@ -642,10 +783,14 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
 	struct sec_cipher_req *c_req = &req->c_req;
 	struct device *dev = SEC_CTX_DEV(ctx);
 
-	if (dst != src)
-		hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
+	if (req->use_pbuf) {
+		sec_cipher_pbuf_unmap(ctx, req, dst);
+	} else {
+		if (dst != src)
+			hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
 
-	hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
+		hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
+	}
 }
 
 static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
@@ -844,7 +989,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 		cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
 	sec_sqe->type_cipher_auth = bd_type | cipher;
 
-	sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
+	if (req->use_pbuf)
+		sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
+	else
+		sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
 	scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
 	if (c_req->c_in_dma != c_req->c_out_dma)
 		de = 0x1 << SEC_DE_OFFSET;
@@ -852,7 +1000,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 	sec_sqe->sds_sa_type = (de | scene | sa_type);
 
 	/* Just set DST address type */
-	da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
+	if (req->use_pbuf)
+		da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
+	else
+		da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
 	sec_sqe->sdm_addr_type |= da_type;
 
 	sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
@@ -1215,6 +1366,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 		return -EINVAL;
 	}
 	sreq->c_req.c_len = sk_req->cryptlen;
+
+	if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
+		sreq->use_pbuf = true;
+	else
+		sreq->use_pbuf = false;
+
 	if (c_alg == SEC_CALG_3DES) {
 		if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
 			dev_err(dev, "skcipher 3des input length error!\n");
@@ -1334,6 +1491,12 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 		return -EINVAL;
 	}
 
+	if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
+		SEC_PBUF_SZ)
+		sreq->use_pbuf = true;
+	else
+		sreq->use_pbuf = false;
+
 	/* Support AES only */
 	if (unlikely(c_alg != SEC_CALG_AES)) {
 		dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");
-- 
2.8.1


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp
  2020-03-02  6:15 ` [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp Zaibo Xu
@ 2020-03-02 11:39   ` Jonathan Cameron
  2020-03-03  1:38     ` Xu Zaibo
  0 siblings, 1 reply; 18+ messages in thread
From: Jonathan Cameron @ 2020-03-02 11:39 UTC (permalink / raw)
  To: Zaibo Xu
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

On Mon, 2 Mar 2020 14:15:12 +0800
Zaibo Xu <xuzaibo@huawei.com> wrote:

> From: Shukun Tan <tanshukun1@huawei.com>
> 
> Because so many work queues are not needed. Using one workqueue
> per QM will reduce the number of kworker threads as well as
> reducing usage of CPU.This would not degrade any performance.
> 
> Signed-off-by: Shukun Tan <tanshukun1@huawei.com>

Hi, this is more or less fine I think other than:

1) Needs a sign off from xuzaibo to reflect the handling of the patch.
2) The description doesn't mention that we aren't actually creating the
   per QM workqueue in this patch (it comes later in the series)
3) The fallback to the system workqueue needs documentation inline.

So tidy those up for v3 and I'm happy.
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks,

Jonathan

> ---
>  drivers/crypto/hisilicon/qm.c | 38 +++++++++++++++-----------------------
>  drivers/crypto/hisilicon/qm.h |  5 +++--
>  2 files changed, 18 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
> index ad7146a..13b0a6f 100644
> --- a/drivers/crypto/hisilicon/qm.c
> +++ b/drivers/crypto/hisilicon/qm.c
> @@ -494,17 +494,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
>  	}
>  }
>  
> -static void qm_qp_work_func(struct work_struct *work)
> +static void qm_work_process(struct work_struct *work)
>  {
> -	struct hisi_qp *qp;
> -
> -	qp = container_of(work, struct hisi_qp, work);
> -	qm_poll_qp(qp, qp->qm);
> -}
> -
> -static irqreturn_t qm_irq_handler(int irq, void *data)
> -{
> -	struct hisi_qm *qm = data;
> +	struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
>  	struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
>  	struct hisi_qp *qp;
>  	int eqe_num = 0;
> @@ -513,7 +505,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
>  		eqe_num++;
>  		qp = qm_to_hisi_qp(qm, eqe);
>  		if (qp)
> -			queue_work(qp->wq, &qp->work);
> +			qm_poll_qp(qp, qm);
>  
>  		if (qm->status.eq_head == QM_Q_DEPTH - 1) {
>  			qm->status.eqc_phase = !qm->status.eqc_phase;
> @@ -531,6 +523,16 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
>  	}
>  
>  	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
> +}
> +
> +static irqreturn_t do_qm_irq(int irq, void *data)
> +{
> +	struct hisi_qm *qm = (struct hisi_qm *)data;
> +
> +	if (qm->wq)
> +		queue_work(qm->wq, &qm->work);
> +	else
> +		schedule_work(&qm->work);

This subtle difference between these two could do with an explanatory
comment.

From an initial look I'm not actually seeing qm->wq being set anywhere?
Ah it's in a later patch. Please add comment to say that in the introduction.





>  
>  	return IRQ_HANDLED;
>  }
> @@ -540,7 +542,7 @@ static irqreturn_t qm_irq(int irq, void *data)
>  	struct hisi_qm *qm = data;
>  
>  	if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
> -		return qm_irq_handler(irq, data);
> +		return do_qm_irq(irq, data);
>  
>  	dev_err(&qm->pdev->dev, "invalid int source\n");
>  	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
> @@ -1159,20 +1161,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
>  
>  	qp->qp_id = qp_id;
>  	qp->alg_type = alg_type;
> -	INIT_WORK(&qp->work, qm_qp_work_func);
> -	qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI |
> -				 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0);
> -	if (!qp->wq) {
> -		ret = -EFAULT;
> -		goto err_free_qp_mem;
> -	}
>  
>  	return qp;
>  
> -err_free_qp_mem:
> -	if (qm->use_dma_api)
> -		dma_free_coherent(dev, qp->qdma.size, qp->qdma.va,
> -				  qp->qdma.dma);
>  err_clear_bit:
>  	write_lock(&qm->qps_lock);
>  	qm->qp_array[qp_id] = NULL;
> @@ -1704,6 +1695,7 @@ int hisi_qm_init(struct hisi_qm *qm)
>  	qm->qp_in_used = 0;
>  	mutex_init(&qm->mailbox_lock);
>  	rwlock_init(&qm->qps_lock);
> +	INIT_WORK(&qm->work, qm_work_process);
>  
>  	dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf",
>  		qm->use_dma_api ? "dma api" : "iommu api");
> diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
> index 1a4f208..c72c2e6 100644
> --- a/drivers/crypto/hisilicon/qm.h
> +++ b/drivers/crypto/hisilicon/qm.h
> @@ -183,6 +183,9 @@ struct hisi_qm {
>  	u32 error_mask;
>  	u32 msi_mask;
>  
> +	struct workqueue_struct *wq;
> +	struct work_struct work;
> +
>  	const char *algs;
>  	bool use_dma_api;
>  	bool use_sva;
> @@ -219,8 +222,6 @@ struct hisi_qp {
>  	void *qp_ctx;
>  	void (*req_cb)(struct hisi_qp *qp, void *data);
>  	void (*event_cb)(struct hisi_qp *qp);
> -	struct work_struct work;
> -	struct workqueue_struct *wq;
>  
>  	struct hisi_qm *qm;
>  	u16 pasid;



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/5] crypto: hisilicon/sec2 - Add workqueue for SEC driver.
  2020-03-02  6:15 ` [PATCH v2 2/5] crypto: hisilicon/sec2 - Add workqueue for SEC driver Zaibo Xu
@ 2020-03-02 11:51   ` Jonathan Cameron
  2020-03-03  1:35     ` Xu Zaibo
  0 siblings, 1 reply; 18+ messages in thread
From: Jonathan Cameron @ 2020-03-02 11:51 UTC (permalink / raw)
  To: Zaibo Xu
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

On Mon, 2 Mar 2020 14:15:13 +0800
Zaibo Xu <xuzaibo@huawei.com> wrote:

> From: yekai13 <yekai13@huawei.com>
> 
> Allocate one workqueue for each QM instead of one for all QMs,
> we found the throughput of SEC engine can be increased to
> the hardware limit throughput during testing sec2 performance.
> so we added this scheme.
> 
> Signed-off-by: yekai13 <yekai13@huawei.com>
> Signed-off-by: liulongfang <liulongfang@huawei.com>

That first sign off needs fixing.  Needs to be a real name.

Also missing xuzaibo's sign offf.

A question inline that might be worth a follow up patch.

With signoffs fixed

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

Thanks,

Jonathan

> ---
>  drivers/crypto/hisilicon/sec2/sec_main.c | 26 +++++++++++++++++++++++---
>  1 file changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
> index 3767fdb..ebafc1c 100644
> --- a/drivers/crypto/hisilicon/sec2/sec_main.c
> +++ b/drivers/crypto/hisilicon/sec2/sec_main.c
> @@ -774,12 +774,24 @@ static void sec_qm_uninit(struct hisi_qm *qm)
>  
>  static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
>  {
> +	int ret;
> +
> +	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE |
> +		WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(),
> +		pci_name(qm->pdev));

I appreciate that you have the same parameters here as were originally in
qm.c, but I would like to fully understand why some of these flags are set.

Perhaps a comment for each of them?  I'm not sure I'd consider the work
to be done in this work queue CPU_INTENSIVE for example.

This could be a follow up patch though as not actually related to this
change.

> +	if (!qm->wq) {
> +		pci_err(qm->pdev, "fail to alloc workqueue\n");
> +		return -ENOMEM;
> +	}
> +
>  	if (qm->fun_type == QM_HW_PF) {
>  		qm->qp_base = SEC_PF_DEF_Q_BASE;
>  		qm->qp_num = pf_q_num;
>  		qm->debug.curr_qm_qp_num = pf_q_num;
>  
> -		return sec_pf_probe_init(sec);
> +		ret = sec_pf_probe_init(sec);
> +		if (ret)
> +			goto err_probe_uninit;
>  	} else if (qm->fun_type == QM_HW_VF) {
>  		/*
>  		 * have no way to get qm configure in VM in v1 hardware,
> @@ -792,18 +804,26 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
>  			qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
>  		} else if (qm->ver == QM_HW_V2) {
>  			/* v2 starts to support get vft by mailbox */
> -			return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
> +			ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
> +			if (ret)
> +				goto err_probe_uninit;
>  		}
>  	} else {
> -		return -ENODEV;
> +		ret = -ENODEV;
> +		goto err_probe_uninit;
>  	}
>  
>  	return 0;
> +err_probe_uninit:
> +	destroy_workqueue(qm->wq);
> +	return ret;
>  }
>  
>  static void sec_probe_uninit(struct hisi_qm *qm)
>  {
>  	hisi_qm_dev_err_uninit(qm);
> +
> +	destroy_workqueue(qm->wq);
>  }
>  
>  static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check
  2020-03-02  6:15 ` [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check Zaibo Xu
@ 2020-03-02 11:54   ` Jonathan Cameron
  2020-03-03  1:25     ` Xu Zaibo
  2020-03-03  2:16     ` Yunsheng Lin
  0 siblings, 2 replies; 18+ messages in thread
From: Jonathan Cameron @ 2020-03-02 11:54 UTC (permalink / raw)
  To: Zaibo Xu
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

On Mon, 2 Mar 2020 14:15:14 +0800
Zaibo Xu <xuzaibo@huawei.com> wrote:

> From: liulongfang <liulongfang@huawei.com>
> 
> In order to improve performance of small packets (<512Bytes)
> in SMMU translation scenario,We need to identify the type of IOMMU
> in the SEC probe to process small packets by a different method.
> 
> Signed-off-by: liulongfang <liulongfang@huawei.com>
> Reviewed-by: Zaibo Xu <xuzaibo@huawei.com>

This looks like what we ended up with for the SECv1 driver.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  drivers/crypto/hisilicon/sec2/sec.h      |  1 +
>  drivers/crypto/hisilicon/sec2/sec_main.c | 19 +++++++++++++++++++
>  2 files changed, 20 insertions(+)
> 
> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
> index 13e2d8d..eab0d22 100644
> --- a/drivers/crypto/hisilicon/sec2/sec.h
> +++ b/drivers/crypto/hisilicon/sec2/sec.h
> @@ -165,6 +165,7 @@ struct sec_dev {
>  	struct list_head list;
>  	struct sec_debug debug;
>  	u32 ctx_q_num;
> +	bool iommu_used;
>  	u32 num_vfs;
>  	unsigned long status;
>  };
> diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
> index ebafc1c..6466d90 100644
> --- a/drivers/crypto/hisilicon/sec2/sec_main.c
> +++ b/drivers/crypto/hisilicon/sec2/sec_main.c
> @@ -7,6 +7,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/init.h>
>  #include <linux/io.h>
> +#include <linux/iommu.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/pci.h>
> @@ -826,6 +827,23 @@ static void sec_probe_uninit(struct hisi_qm *qm)
>  	destroy_workqueue(qm->wq);
>  }
>  
> +static void sec_iommu_used_check(struct sec_dev *sec)
> +{
> +	struct iommu_domain *domain;
> +	struct device *dev = &sec->qm.pdev->dev;
> +
> +	domain = iommu_get_domain_for_dev(dev);
> +
> +	/* Check if iommu is used */
> +	sec->iommu_used = false;
> +	if (domain) {
> +		if (domain->type & __IOMMU_DOMAIN_PAGING)
> +			sec->iommu_used = true;
> +		dev_info(dev, "SMMU Opened, the iommu type = %u\n",
> +			domain->type);
> +	}
> +}
> +
>  static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  {
>  	struct sec_dev *sec;
> @@ -839,6 +857,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	pci_set_drvdata(pdev, sec);
>  
>  	sec->ctx_q_num = ctx_q_num;
> +	sec_iommu_used_check(sec);
>  
>  	qm = &sec->qm;
>  



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 4/5] crypto: hisilicon/sec2 - Update IV and MAC operation
  2020-03-02  6:15 ` [PATCH v2 4/5] crypto: hisilicon/sec2 - Update IV and MAC operation Zaibo Xu
@ 2020-03-02 11:58   ` Jonathan Cameron
  2020-03-03  1:24     ` Xu Zaibo
  0 siblings, 1 reply; 18+ messages in thread
From: Jonathan Cameron @ 2020-03-02 11:58 UTC (permalink / raw)
  To: Zaibo Xu
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

On Mon, 2 Mar 2020 14:15:15 +0800
Zaibo Xu <xuzaibo@huawei.com> wrote:

> From: liulongfang <liulongfang@huawei.com>
> 
> We have updated the operation method of IV and MAC address
> to prepare for pbuf patch.
> 
> Signed-off-by: liulongfang <liulongfang@huawei.com>
> Signed-off-by: Zaibo Xu <xuzaibo@huawei.com>
Trivial comment inline.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/crypto/hisilicon/sec2/sec.h        |  2 +
>  drivers/crypto/hisilicon/sec2/sec_crypto.c | 73 +++++++++++++++++-------------
>  2 files changed, 43 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
> index eab0d22..e67b416 100644
> --- a/drivers/crypto/hisilicon/sec2/sec.h
> +++ b/drivers/crypto/hisilicon/sec2/sec.h
> @@ -23,6 +23,8 @@ struct sec_cipher_req {
>  	dma_addr_t c_in_dma;
>  	struct hisi_acc_hw_sgl *c_out;
>  	dma_addr_t c_out_dma;
> +	u8 *c_ivin;
> +	dma_addr_t c_ivin_dma;
>  	struct skcipher_request *sk_req;
>  	u32 c_len;
>  	bool encrypt;
> diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
> index acd1550..1eeaa74 100644
> --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
> +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
> @@ -46,6 +46,7 @@
>  #define SEC_CIPHER_AUTH		0xfe
>  #define SEC_AUTH_CIPHER		0x1
>  #define SEC_MAX_MAC_LEN		64
> +#define SEC_MAX_AAD_LEN		65535
>  #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
>  #define SEC_SQE_LEN_RATE	4
>  #define SEC_SQE_CFLAG		2
> @@ -110,12 +111,12 @@ static void sec_free_req_id(struct sec_req *req)
>  	mutex_unlock(&qp_ctx->req_lock);
>  }
>  
> -static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
> +static int sec_aead_verify(struct sec_req *req)
>  {
>  	struct aead_request *aead_req = req->aead_req.aead_req;
>  	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
> -	u8 *mac_out = qp_ctx->res[req->req_id].out_mac;
>  	size_t authsize = crypto_aead_authsize(tfm);
> +	u8 *mac_out = req->aead_req.out_mac;
>  	u8 *mac = mac_out + SEC_MAX_MAC_LEN;
>  	struct scatterlist *sgl = aead_req->src;
>  	size_t sz;
> @@ -163,7 +164,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
>  	}
>  
>  	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
> -		err = sec_aead_verify(req, qp_ctx);
> +		err = sec_aead_verify(req);
>  
>  	atomic64_inc(&ctx->sec->debug.dfx.recv_cnt);
>  
> @@ -259,11 +260,11 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>  	if (ctx->alg_type == SEC_AEAD) {
>  		ret = sec_alloc_mac_resource(dev, res);
>  		if (ret)
> -			goto get_fail;
> +			goto alloc_fail;

This looks like an unrelated change. 
At very least should be mentioned as an additional cleanup in the patch intro.

>  	}
>  
>  	return 0;
> -get_fail:
> +alloc_fail:
>  	sec_free_civ_resource(dev, res);
>  
>  	return ret;
> @@ -590,11 +591,21 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
>  GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
>  GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
>  
> -static int sec_cipher_map(struct device *dev, struct sec_req *req,
> +static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>  			  struct scatterlist *src, struct scatterlist *dst)
>  {
>  	struct sec_cipher_req *c_req = &req->c_req;
> +	struct sec_aead_req *a_req = &req->aead_req;
>  	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
> +	struct sec_alg_res *res = &qp_ctx->res[req->req_id];
> +	struct device *dev = SEC_CTX_DEV(ctx);
> +
> +	c_req->c_ivin = res->c_ivin;
> +	c_req->c_ivin_dma = res->c_ivin_dma;
> +	if (ctx->alg_type == SEC_AEAD) {
> +		a_req->out_mac = res->out_mac;
> +		a_req->out_mac_dma = res->out_mac_dma;
> +	}
>  
>  	c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
>  						    qp_ctx->c_in_pool,
> @@ -625,29 +636,30 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req,
>  	return 0;
>  }
>  
> -static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req,
> +static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
>  			     struct scatterlist *src, struct scatterlist *dst)
>  {
> +	struct sec_cipher_req *c_req = &req->c_req;
> +	struct device *dev = SEC_CTX_DEV(ctx);
> +
>  	if (dst != src)
> -		hisi_acc_sg_buf_unmap(dev, src, req->c_in);
> +		hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>  
> -	hisi_acc_sg_buf_unmap(dev, dst, req->c_out);
> +	hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
>  }
>  
>  static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
>  {
>  	struct skcipher_request *sq = req->c_req.sk_req;
>  
> -	return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst);
> +	return sec_cipher_map(ctx, req, sq->src, sq->dst);
>  }
>  
>  static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
>  {
> -	struct device *dev = SEC_CTX_DEV(ctx);
> -	struct sec_cipher_req *c_req = &req->c_req;
> -	struct skcipher_request *sk_req = c_req->sk_req;
> +	struct skcipher_request *sq = req->c_req.sk_req;
>  
> -	sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst);
> +	sec_cipher_unmap(ctx, req, sq->src, sq->dst);
>  }
>  
>  static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx,
> @@ -758,16 +770,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
>  {
>  	struct aead_request *aq = req->aead_req.aead_req;
>  
> -	return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst);
> +	return sec_cipher_map(ctx, req, aq->src, aq->dst);
>  }
>  
>  static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
>  {
> -	struct device *dev = SEC_CTX_DEV(ctx);
> -	struct sec_cipher_req *cq = &req->c_req;
>  	struct aead_request *aq = req->aead_req.aead_req;
>  
> -	sec_cipher_unmap(dev, cq, aq->src, aq->dst);
> +	sec_cipher_unmap(ctx, req, aq->src, aq->dst);
>  }
>  
>  static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req)
> @@ -800,9 +810,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req)
>  static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
>  {
>  	struct skcipher_request *sk_req = req->c_req.sk_req;
> -	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
> +	struct sec_cipher_req *c_req = &req->c_req;
>  
> -	memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
> +	memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
>  }
>  
>  static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
> @@ -817,8 +827,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>  	memset(sec_sqe, 0, sizeof(struct sec_sqe));
>  
>  	sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
> -	sec_sqe->type2.c_ivin_addr =
> -		cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma);
> +	sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
>  	sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
>  	sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
>  
> @@ -903,9 +912,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
>  static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
>  {
>  	struct aead_request *aead_req = req->aead_req.aead_req;
> -	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
> +	struct sec_cipher_req *c_req = &req->c_req;
>  
> -	memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
> +	memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
>  }
>  
>  static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
> @@ -938,8 +947,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
>  
>  	sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
>  
> -	sec_sqe->type2.mac_addr =
> -		cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma);
> +	sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma);
>  }
>  
>  static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
> @@ -963,6 +971,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
>  {
>  	struct aead_request *a_req = req->aead_req.aead_req;
>  	struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
> +	struct sec_aead_req *aead_req = &req->aead_req;
>  	struct sec_cipher_req *c_req = &req->c_req;
>  	size_t authsize = crypto_aead_authsize(tfm);
>  	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
> @@ -978,7 +987,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
>  		struct scatterlist *sgl = a_req->dst;
>  
>  		sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl),
> -					  qp_ctx->res[req->req_id].out_mac,
> +					  aead_req->out_mac,
>  					  authsize, a_req->cryptlen +
>  					  a_req->assoclen);
>  
> @@ -1030,6 +1039,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
>  
>  static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
>  {
> +	struct sec_cipher_req *c_req = &req->c_req;
>  	int ret;
>  
>  	ret = sec_request_init(ctx, req);
> @@ -1056,12 +1066,10 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
>  	/* As failing, restore the IV from user */
>  	if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) {
>  		if (ctx->alg_type == SEC_SKCIPHER)
> -			memcpy(req->c_req.sk_req->iv,
> -			       req->qp_ctx->res[req->req_id].c_ivin,
> +			memcpy(req->c_req.sk_req->iv, c_req->c_ivin,
>  			       ctx->c_ctx.ivsize);
>  		else
> -			memcpy(req->aead_req.aead_req->iv,
> -			       req->qp_ctx->res[req->req_id].c_ivin,
> +			memcpy(req->aead_req.aead_req->iv, c_req->c_ivin,
>  			       ctx->c_ctx.ivsize);
>  	}
>  
> @@ -1320,7 +1328,8 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>  	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
>  	size_t authsize = crypto_aead_authsize(tfm);
>  
> -	if (unlikely(!req->src || !req->dst || !req->cryptlen)) {
> +	if (unlikely(!req->src || !req->dst || !req->cryptlen ||
> +		req->assoclen > SEC_MAX_AAD_LEN)) {
>  		dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n");
>  		return -EINVAL;
>  	}



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver
  2020-03-02  6:15 ` [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver Zaibo Xu
@ 2020-03-02 12:49   ` Jonathan Cameron
  2020-03-03  1:22     ` Xu Zaibo
  0 siblings, 1 reply; 18+ messages in thread
From: Jonathan Cameron @ 2020-03-02 12:49 UTC (permalink / raw)
  To: Zaibo Xu
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

On Mon, 2 Mar 2020 14:15:16 +0800
Zaibo Xu <xuzaibo@huawei.com> wrote:

> From: liulongfang <liulongfang@huawei.com>
> 
> In the scenario of SMMU translation, the SEC performance of short messages
> (<512Bytes) cannot meet our expectations. To avoid this, we reserve the
> plat buffer (PBUF) memory for small packets when creating TFM.
> 
> Signed-off-by: liulongfang <liulongfang@huawei.com>
> Signed-off-by: Zaibo Xu <xuzaibo@huawei.com>

Hmm. I guess we pay the price for this performance trick in memory usage.
Oh well, if it becomes a problem we can always look at more clever solutions.

Looks good to me.  Fix up liulongfang's sign off..

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  drivers/crypto/hisilicon/sec2/sec.h        |   4 +
>  drivers/crypto/hisilicon/sec2/sec_crypto.c | 173 ++++++++++++++++++++++++++++-
>  2 files changed, 172 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
> index e67b416..a73d82c 100644
> --- a/drivers/crypto/hisilicon/sec2/sec.h
> +++ b/drivers/crypto/hisilicon/sec2/sec.h
> @@ -11,6 +11,8 @@
>  
>  /* Algorithm resource per hardware SEC queue */
>  struct sec_alg_res {
> +	u8 *pbuf;
> +	dma_addr_t pbuf_dma;
>  	u8 *c_ivin;
>  	dma_addr_t c_ivin_dma;
>  	u8 *out_mac;
> @@ -50,6 +52,7 @@ struct sec_req {
>  
>  	/* Status of the SEC request */
>  	bool fake_busy;
> +	bool use_pbuf;
>  };
>  
>  /**
> @@ -130,6 +133,7 @@ struct sec_ctx {
>  	atomic_t dec_qcyclic;
>  
>  	enum sec_alg_type alg_type;
> +	bool pbuf_supported;
>  	struct sec_cipher_ctx c_ctx;
>  	struct sec_auth_ctx a_ctx;
>  };
> diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
> index 1eeaa74..3136ada 100644
> --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
> +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
> @@ -48,6 +48,19 @@
>  #define SEC_MAX_MAC_LEN		64
>  #define SEC_MAX_AAD_LEN		65535
>  #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
> +
> +#define SEC_PBUF_SZ			512
> +#define SEC_PBUF_IV_OFFSET		SEC_PBUF_SZ
> +#define SEC_PBUF_MAC_OFFSET		(SEC_PBUF_SZ + SEC_IV_SIZE)
> +#define SEC_PBUF_PKG		(SEC_PBUF_SZ + SEC_IV_SIZE +	\
> +			SEC_MAX_MAC_LEN * 2)
> +#define SEC_PBUF_NUM		(PAGE_SIZE / SEC_PBUF_PKG)
> +#define SEC_PBUF_PAGE_NUM	(QM_Q_DEPTH / SEC_PBUF_NUM)
> +#define SEC_PBUF_LEFT_SZ	(SEC_PBUF_PKG * (QM_Q_DEPTH -	\
> +			SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
> +#define SEC_TOTAL_PBUF_SZ	(PAGE_SIZE * SEC_PBUF_PAGE_NUM +	\
> +			SEC_PBUF_LEFT_SZ)
> +
>  #define SEC_SQE_LEN_RATE	4
>  #define SEC_SQE_CFLAG		2
>  #define SEC_SQE_AEAD_FLAG	3
> @@ -246,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
>  				  res->out_mac, res->out_mac_dma);
>  }
>  
> +static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
> +{
> +	if (res->pbuf)
> +		dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
> +				  res->pbuf, res->pbuf_dma);
> +}
> +
> +/*
> + * To improve performance, pbuffer is used for
> + * small packets (< 512Bytes) as IOMMU translation using.
> + */
> +static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
> +{
> +	int pbuf_page_offset;
> +	int i, j, k;
> +
> +	res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
> +				&res->pbuf_dma, GFP_KERNEL);
> +	if (!res->pbuf)
> +		return -ENOMEM;
> +
> +	/*
> +	 * SEC_PBUF_PKG contains data pbuf, iv and
> +	 * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
> +	 * Every PAGE contains six SEC_PBUF_PKG
> +	 * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
> +	 * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
> +	 * for the SEC_TOTAL_PBUF_SZ
> +	 */
> +	for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
> +		pbuf_page_offset = PAGE_SIZE * i;
> +		for (j = 0; j < SEC_PBUF_NUM; j++) {
> +			k = i * SEC_PBUF_NUM + j;
> +			if (k == QM_Q_DEPTH)
> +				break;
> +			res[k].pbuf = res->pbuf +
> +				j * SEC_PBUF_PKG + pbuf_page_offset;
> +			res[k].pbuf_dma = res->pbuf_dma +
> +				j * SEC_PBUF_PKG + pbuf_page_offset;
> +		}
> +	}
> +	return 0;
> +}
> +
>  static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>  				  struct sec_qp_ctx *qp_ctx)
>  {
> @@ -262,6 +319,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>  		if (ret)
>  			goto alloc_fail;
>  	}
> +	if (ctx->pbuf_supported) {
> +		ret = sec_alloc_pbuf_resource(dev, res);
> +		if (ret) {
> +			dev_err(dev, "fail to alloc pbuf dma resource!\n");
> +			goto alloc_fail;
> +		}
> +	}
>  
>  	return 0;
>  alloc_fail:
> @@ -279,6 +343,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
>  
>  	if (ctx->alg_type == SEC_AEAD)
>  		sec_free_mac_resource(dev, qp_ctx->res);
> +	if (ctx->pbuf_supported)
> +		sec_free_pbuf_resource(dev, qp_ctx->res);
Is the ordering right here?  Seems like this is allocated after the mac_resource
so should be freed before it.

I would prefer specific gotos for resource_alloc failure cases to make it clear
that different cleanup is needed if we fail allocating this from doing the
mac_resource allocation.


>  }
>  
>  static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
> @@ -369,6 +435,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
>  	ctx->sec = sec;
>  	ctx->hlf_q_num = sec->ctx_q_num >> 1;
>  
> +	ctx->pbuf_supported = ctx->sec->iommu_used;
> +
>  	/* Half of queue depth is taken as fake requests limit in the queue. */
>  	ctx->fake_req_limit = QM_Q_DEPTH >> 1;
>  	ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
> @@ -591,6 +659,66 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
>  GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
>  GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
>  
> +static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
> +			struct scatterlist *src)
> +{
> +	struct aead_request *aead_req = req->aead_req.aead_req;
> +	struct sec_cipher_req *c_req = &req->c_req;
> +	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
> +	struct device *dev = SEC_CTX_DEV(ctx);
> +	int copy_size, pbuf_length;
> +	int req_id = req->req_id;
> +
> +	if (ctx->alg_type == SEC_AEAD)
> +		copy_size = aead_req->cryptlen + aead_req->assoclen;
> +	else
> +		copy_size = c_req->c_len;
> +
> +	pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
> +				qp_ctx->res[req_id].pbuf,
> +				copy_size);
> +
> +	if (unlikely(pbuf_length != copy_size)) {
> +		dev_err(dev, "copy src data to pbuf error!\n");
> +		return -EINVAL;
> +	}
> +
> +	c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
> +
> +	if (!c_req->c_in_dma) {
> +		dev_err(dev, "fail to set pbuffer address!\n");
> +		return -ENOMEM;
> +	}
> +
> +	c_req->c_out_dma = c_req->c_in_dma;
> +
> +	return 0;
> +}
> +
> +static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
> +			struct scatterlist *dst)
> +{
> +	struct aead_request *aead_req = req->aead_req.aead_req;
> +	struct sec_cipher_req *c_req = &req->c_req;
> +	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
> +	struct device *dev = SEC_CTX_DEV(ctx);
> +	int copy_size, pbuf_length;
> +	int req_id = req->req_id;
> +
> +	if (ctx->alg_type == SEC_AEAD)
> +		copy_size = c_req->c_len + aead_req->assoclen;
> +	else
> +		copy_size = c_req->c_len;
> +
> +	pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
> +				qp_ctx->res[req_id].pbuf,
> +				copy_size);
> +
> +	if (unlikely(pbuf_length != copy_size))
> +		dev_err(dev, "copy pbuf data to dst error!\n");
> +
> +}
> +
>  static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>  			  struct scatterlist *src, struct scatterlist *dst)
>  {
> @@ -599,7 +727,20 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>  	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>  	struct sec_alg_res *res = &qp_ctx->res[req->req_id];
>  	struct device *dev = SEC_CTX_DEV(ctx);
> +	int ret;
> +
> +	if (req->use_pbuf) {
> +		ret = sec_cipher_pbuf_map(ctx, req, src);
> +		c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
> +		c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
> +		if (ctx->alg_type == SEC_AEAD) {
> +			a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
> +			a_req->out_mac_dma = res->pbuf_dma +
> +					SEC_PBUF_MAC_OFFSET;
> +		}
>  
> +		return ret;
> +	}
>  	c_req->c_ivin = res->c_ivin;
>  	c_req->c_ivin_dma = res->c_ivin_dma;
>  	if (ctx->alg_type == SEC_AEAD) {
> @@ -642,10 +783,14 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
>  	struct sec_cipher_req *c_req = &req->c_req;
>  	struct device *dev = SEC_CTX_DEV(ctx);
>  
> -	if (dst != src)
> -		hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
> +	if (req->use_pbuf) {
> +		sec_cipher_pbuf_unmap(ctx, req, dst);
> +	} else {
> +		if (dst != src)
> +			hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>  
> -	hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
> +		hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
> +	}
>  }
>  
>  static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
> @@ -844,7 +989,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>  		cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
>  	sec_sqe->type_cipher_auth = bd_type | cipher;
>  
> -	sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
> +	if (req->use_pbuf)
> +		sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
> +	else
> +		sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
>  	scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
>  	if (c_req->c_in_dma != c_req->c_out_dma)
>  		de = 0x1 << SEC_DE_OFFSET;
> @@ -852,7 +1000,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>  	sec_sqe->sds_sa_type = (de | scene | sa_type);
>  
>  	/* Just set DST address type */
> -	da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
> +	if (req->use_pbuf)
> +		da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
> +	else
> +		da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
>  	sec_sqe->sdm_addr_type |= da_type;
>  
>  	sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
> @@ -1215,6 +1366,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>  		return -EINVAL;
>  	}
>  	sreq->c_req.c_len = sk_req->cryptlen;
> +
> +	if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
> +		sreq->use_pbuf = true;
> +	else
> +		sreq->use_pbuf = false;
> +
>  	if (c_alg == SEC_CALG_3DES) {
>  		if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
>  			dev_err(dev, "skcipher 3des input length error!\n");
> @@ -1334,6 +1491,12 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>  		return -EINVAL;
>  	}
>  
> +	if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
> +		SEC_PBUF_SZ)
> +		sreq->use_pbuf = true;
> +	else
> +		sreq->use_pbuf = false;
> +
>  	/* Support AES only */
>  	if (unlikely(c_alg != SEC_CALG_AES)) {
>  		dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver
  2020-03-02 12:49   ` Jonathan Cameron
@ 2020-03-03  1:22     ` Xu Zaibo
  0 siblings, 0 replies; 18+ messages in thread
From: Xu Zaibo @ 2020-03-03  1:22 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

Hi,
On 2020/3/2 20:49, Jonathan Cameron wrote:
> On Mon, 2 Mar 2020 14:15:16 +0800
> Zaibo Xu <xuzaibo@huawei.com> wrote:
>
>> From: liulongfang <liulongfang@huawei.com>
>>
>> In the scenario of SMMU translation, the SEC performance of short messages
>> (<512Bytes) cannot meet our expectations. To avoid this, we reserve the
>> plat buffer (PBUF) memory for small packets when creating TFM.
>>
>> Signed-off-by: liulongfang <liulongfang@huawei.com>
>> Signed-off-by: Zaibo Xu <xuzaibo@huawei.com>
> Hmm. I guess we pay the price for this performance trick in memory usage.
> Oh well, if it becomes a problem we can always look at more clever solutions.
>
> Looks good to me.  Fix up liulongfang's sign off..
>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Okay. And we have done the performance test with DMA pool, which show 
worse performance.
About -5.5% income at 512 bytes packets with 16 kthreads.
>
>> ---
>>   drivers/crypto/hisilicon/sec2/sec.h        |   4 +
>>   drivers/crypto/hisilicon/sec2/sec_crypto.c | 173 ++++++++++++++++++++++++++++-
>>   2 files changed, 172 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
>> index e67b416..a73d82c 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec.h
>> +++ b/drivers/crypto/hisilicon/sec2/sec.h
>> @@ -11,6 +11,8 @@
>>   
>>   /* Algorithm resource per hardware SEC queue */
>>   struct sec_alg_res {
>> +	u8 *pbuf;
>> +	dma_addr_t pbuf_dma;
>>   	u8 *c_ivin;
>>   	dma_addr_t c_ivin_dma;
>>   	u8 *out_mac;
>> @@ -50,6 +52,7 @@ struct sec_req {
>>   
>>   	/* Status of the SEC request */
>>   	bool fake_busy;
>> +	bool use_pbuf;
>>   };
>>   
>>   /**
>> @@ -130,6 +133,7 @@ struct sec_ctx {
>>   	atomic_t dec_qcyclic;
>>   
>>   	enum sec_alg_type alg_type;
>> +	bool pbuf_supported;
>>   	struct sec_cipher_ctx c_ctx;
>>   	struct sec_auth_ctx a_ctx;
>>   };
>> diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> index 1eeaa74..3136ada 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> @@ -48,6 +48,19 @@
>>   #define SEC_MAX_MAC_LEN		64
>>   #define SEC_MAX_AAD_LEN		65535
>>   #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
>> +
>> +#define SEC_PBUF_SZ			512
>> +#define SEC_PBUF_IV_OFFSET		SEC_PBUF_SZ
>> +#define SEC_PBUF_MAC_OFFSET		(SEC_PBUF_SZ + SEC_IV_SIZE)
>> +#define SEC_PBUF_PKG		(SEC_PBUF_SZ + SEC_IV_SIZE +	\
>> +			SEC_MAX_MAC_LEN * 2)
>> +#define SEC_PBUF_NUM		(PAGE_SIZE / SEC_PBUF_PKG)
>> +#define SEC_PBUF_PAGE_NUM	(QM_Q_DEPTH / SEC_PBUF_NUM)
>> +#define SEC_PBUF_LEFT_SZ	(SEC_PBUF_PKG * (QM_Q_DEPTH -	\
>> +			SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
>> +#define SEC_TOTAL_PBUF_SZ	(PAGE_SIZE * SEC_PBUF_PAGE_NUM +	\
>> +			SEC_PBUF_LEFT_SZ)
>> +
>>   #define SEC_SQE_LEN_RATE	4
>>   #define SEC_SQE_CFLAG		2
>>   #define SEC_SQE_AEAD_FLAG	3
>> @@ -246,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
>>   				  res->out_mac, res->out_mac_dma);
>>   }
>>   
>> +static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
>> +{
>> +	if (res->pbuf)
>> +		dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
>> +				  res->pbuf, res->pbuf_dma);
>> +}
>> +
>> +/*
>> + * To improve performance, pbuffer is used for
>> + * small packets (< 512Bytes) as IOMMU translation using.
>> + */
>> +static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
>> +{
>> +	int pbuf_page_offset;
>> +	int i, j, k;
>> +
>> +	res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
>> +				&res->pbuf_dma, GFP_KERNEL);
>> +	if (!res->pbuf)
>> +		return -ENOMEM;
>> +
>> +	/*
>> +	 * SEC_PBUF_PKG contains data pbuf, iv and
>> +	 * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
>> +	 * Every PAGE contains six SEC_PBUF_PKG
>> +	 * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
>> +	 * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
>> +	 * for the SEC_TOTAL_PBUF_SZ
>> +	 */
>> +	for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
>> +		pbuf_page_offset = PAGE_SIZE * i;
>> +		for (j = 0; j < SEC_PBUF_NUM; j++) {
>> +			k = i * SEC_PBUF_NUM + j;
>> +			if (k == QM_Q_DEPTH)
>> +				break;
>> +			res[k].pbuf = res->pbuf +
>> +				j * SEC_PBUF_PKG + pbuf_page_offset;
>> +			res[k].pbuf_dma = res->pbuf_dma +
>> +				j * SEC_PBUF_PKG + pbuf_page_offset;
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>>   static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>>   				  struct sec_qp_ctx *qp_ctx)
>>   {
>> @@ -262,6 +319,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>>   		if (ret)
>>   			goto alloc_fail;
>>   	}
>> +	if (ctx->pbuf_supported) {
>> +		ret = sec_alloc_pbuf_resource(dev, res);
>> +		if (ret) {
>> +			dev_err(dev, "fail to alloc pbuf dma resource!\n");
>> +			goto alloc_fail;
>> +		}
>> +	}
>>   
>>   	return 0;
>>   alloc_fail:
>> @@ -279,6 +343,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
>>   
>>   	if (ctx->alg_type == SEC_AEAD)
>>   		sec_free_mac_resource(dev, qp_ctx->res);
>> +	if (ctx->pbuf_supported)
>> +		sec_free_pbuf_resource(dev, qp_ctx->res);
> Is the ordering right here?  Seems like this is allocated after the mac_resource
> so should be freed before it.
>
> I would prefer specific gotos for resource_alloc failure cases to make it clear
> that different cleanup is needed if we fail allocating this from doing the
> mac_resource allocation.
Yes, agree. 'sec_alg_resource_alloc' also has a problem.

Cheers,
Zaibo

.
>
>>   }
>>   
>>   static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
>> @@ -369,6 +435,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
>>   	ctx->sec = sec;
>>   	ctx->hlf_q_num = sec->ctx_q_num >> 1;
>>   
>> +	ctx->pbuf_supported = ctx->sec->iommu_used;
>> +
>>   	/* Half of queue depth is taken as fake requests limit in the queue. */
>>   	ctx->fake_req_limit = QM_Q_DEPTH >> 1;
>>   	ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
>> @@ -591,6 +659,66 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
>>   GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
>>   GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
>>   
>> +static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
>> +			struct scatterlist *src)
>> +{
>> +	struct aead_request *aead_req = req->aead_req.aead_req;
>> +	struct sec_cipher_req *c_req = &req->c_req;
>> +	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>> +	struct device *dev = SEC_CTX_DEV(ctx);
>> +	int copy_size, pbuf_length;
>> +	int req_id = req->req_id;
>> +
>> +	if (ctx->alg_type == SEC_AEAD)
>> +		copy_size = aead_req->cryptlen + aead_req->assoclen;
>> +	else
>> +		copy_size = c_req->c_len;
>> +
>> +	pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
>> +				qp_ctx->res[req_id].pbuf,
>> +				copy_size);
>> +
>> +	if (unlikely(pbuf_length != copy_size)) {
>> +		dev_err(dev, "copy src data to pbuf error!\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
>> +
>> +	if (!c_req->c_in_dma) {
>> +		dev_err(dev, "fail to set pbuffer address!\n");
>> +		return -ENOMEM;
>> +	}
>> +
>> +	c_req->c_out_dma = c_req->c_in_dma;
>> +
>> +	return 0;
>> +}
>> +
>> +static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
>> +			struct scatterlist *dst)
>> +{
>> +	struct aead_request *aead_req = req->aead_req.aead_req;
>> +	struct sec_cipher_req *c_req = &req->c_req;
>> +	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>> +	struct device *dev = SEC_CTX_DEV(ctx);
>> +	int copy_size, pbuf_length;
>> +	int req_id = req->req_id;
>> +
>> +	if (ctx->alg_type == SEC_AEAD)
>> +		copy_size = c_req->c_len + aead_req->assoclen;
>> +	else
>> +		copy_size = c_req->c_len;
>> +
>> +	pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
>> +				qp_ctx->res[req_id].pbuf,
>> +				copy_size);
>> +
>> +	if (unlikely(pbuf_length != copy_size))
>> +		dev_err(dev, "copy pbuf data to dst error!\n");
>> +
>> +}
>> +
>>   static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>>   			  struct scatterlist *src, struct scatterlist *dst)
>>   {
>> @@ -599,7 +727,20 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>>   	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>>   	struct sec_alg_res *res = &qp_ctx->res[req->req_id];
>>   	struct device *dev = SEC_CTX_DEV(ctx);
>> +	int ret;
>> +
>> +	if (req->use_pbuf) {
>> +		ret = sec_cipher_pbuf_map(ctx, req, src);
>> +		c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
>> +		c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
>> +		if (ctx->alg_type == SEC_AEAD) {
>> +			a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
>> +			a_req->out_mac_dma = res->pbuf_dma +
>> +					SEC_PBUF_MAC_OFFSET;
>> +		}
>>   
>> +		return ret;
>> +	}
>>   	c_req->c_ivin = res->c_ivin;
>>   	c_req->c_ivin_dma = res->c_ivin_dma;
>>   	if (ctx->alg_type == SEC_AEAD) {
>> @@ -642,10 +783,14 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
>>   	struct sec_cipher_req *c_req = &req->c_req;
>>   	struct device *dev = SEC_CTX_DEV(ctx);
>>   
>> -	if (dst != src)
>> -		hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>> +	if (req->use_pbuf) {
>> +		sec_cipher_pbuf_unmap(ctx, req, dst);
>> +	} else {
>> +		if (dst != src)
>> +			hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>>   
>> -	hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
>> +		hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
>> +	}
>>   }
>>   
>>   static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
>> @@ -844,7 +989,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>>   		cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
>>   	sec_sqe->type_cipher_auth = bd_type | cipher;
>>   
>> -	sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
>> +	if (req->use_pbuf)
>> +		sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
>> +	else
>> +		sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
>>   	scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
>>   	if (c_req->c_in_dma != c_req->c_out_dma)
>>   		de = 0x1 << SEC_DE_OFFSET;
>> @@ -852,7 +1000,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>>   	sec_sqe->sds_sa_type = (de | scene | sa_type);
>>   
>>   	/* Just set DST address type */
>> -	da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
>> +	if (req->use_pbuf)
>> +		da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
>> +	else
>> +		da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
>>   	sec_sqe->sdm_addr_type |= da_type;
>>   
>>   	sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
>> @@ -1215,6 +1366,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>>   		return -EINVAL;
>>   	}
>>   	sreq->c_req.c_len = sk_req->cryptlen;
>> +
>> +	if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
>> +		sreq->use_pbuf = true;
>> +	else
>> +		sreq->use_pbuf = false;
>> +
>>   	if (c_alg == SEC_CALG_3DES) {
>>   		if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
>>   			dev_err(dev, "skcipher 3des input length error!\n");
>> @@ -1334,6 +1491,12 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>>   		return -EINVAL;
>>   	}
>>   
>> +	if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
>> +		SEC_PBUF_SZ)
>> +		sreq->use_pbuf = true;
>> +	else
>> +		sreq->use_pbuf = false;
>> +
>>   	/* Support AES only */
>>   	if (unlikely(c_alg != SEC_CALG_AES)) {
>>   		dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");
>
> .
>



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 4/5] crypto: hisilicon/sec2 - Update IV and MAC operation
  2020-03-02 11:58   ` Jonathan Cameron
@ 2020-03-03  1:24     ` Xu Zaibo
  0 siblings, 0 replies; 18+ messages in thread
From: Xu Zaibo @ 2020-03-03  1:24 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

Hi,

On 2020/3/2 19:58, Jonathan Cameron wrote:
> On Mon, 2 Mar 2020 14:15:15 +0800
> Zaibo Xu <xuzaibo@huawei.com> wrote:
>
>> From: liulongfang <liulongfang@huawei.com>
>>
>> We have updated the operation method of IV and MAC address
>> to prepare for pbuf patch.
>>
>> Signed-off-by: liulongfang <liulongfang@huawei.com>
>> Signed-off-by: Zaibo Xu <xuzaibo@huawei.com>
> Trivial comment inline.
>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Okay.
>
>> ---
>>   drivers/crypto/hisilicon/sec2/sec.h        |  2 +
>>   drivers/crypto/hisilicon/sec2/sec_crypto.c | 73 +++++++++++++++++-------------
>>   2 files changed, 43 insertions(+), 32 deletions(-)
>>
>> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
>> index eab0d22..e67b416 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec.h
>> +++ b/drivers/crypto/hisilicon/sec2/sec.h
>> @@ -23,6 +23,8 @@ struct sec_cipher_req {
>>   	dma_addr_t c_in_dma;
>>   	struct hisi_acc_hw_sgl *c_out;
>>   	dma_addr_t c_out_dma;
>> +	u8 *c_ivin;
>> +	dma_addr_t c_ivin_dma;
>>   	struct skcipher_request *sk_req;
>>   	u32 c_len;
>>   	bool encrypt;
>> diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> index acd1550..1eeaa74 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> @@ -46,6 +46,7 @@
>>   #define SEC_CIPHER_AUTH		0xfe
>>   #define SEC_AUTH_CIPHER		0x1
>>   #define SEC_MAX_MAC_LEN		64
>> +#define SEC_MAX_AAD_LEN		65535
>>   #define SEC_TOTAL_MAC_SZ	(SEC_MAX_MAC_LEN * QM_Q_DEPTH)
>>   #define SEC_SQE_LEN_RATE	4
>>   #define SEC_SQE_CFLAG		2
>> @@ -110,12 +111,12 @@ static void sec_free_req_id(struct sec_req *req)
>>   	mutex_unlock(&qp_ctx->req_lock);
>>   }
>>   
>> -static int sec_aead_verify(struct sec_req *req, struct sec_qp_ctx *qp_ctx)
>> +static int sec_aead_verify(struct sec_req *req)
>>   {
>>   	struct aead_request *aead_req = req->aead_req.aead_req;
>>   	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
>> -	u8 *mac_out = qp_ctx->res[req->req_id].out_mac;
>>   	size_t authsize = crypto_aead_authsize(tfm);
>> +	u8 *mac_out = req->aead_req.out_mac;
>>   	u8 *mac = mac_out + SEC_MAX_MAC_LEN;
>>   	struct scatterlist *sgl = aead_req->src;
>>   	size_t sz;
>> @@ -163,7 +164,7 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
>>   	}
>>   
>>   	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
>> -		err = sec_aead_verify(req, qp_ctx);
>> +		err = sec_aead_verify(req);
>>   
>>   	atomic64_inc(&ctx->sec->debug.dfx.recv_cnt);
>>   
>> @@ -259,11 +260,11 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>>   	if (ctx->alg_type == SEC_AEAD) {
>>   		ret = sec_alloc_mac_resource(dev, res);
>>   		if (ret)
>> -			goto get_fail;
>> +			goto alloc_fail;
> This looks like an unrelated change.
> At very least should be mentioned as an additional cleanup in the patch intro.
Yes, will add it.

Thanks,
Zaibo

.
>
>>   	}
>>   
>>   	return 0;
>> -get_fail:
>> +alloc_fail:
>>   	sec_free_civ_resource(dev, res);
>>   
>>   	return ret;
>> @@ -590,11 +591,21 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
>>   GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
>>   GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
>>   
>> -static int sec_cipher_map(struct device *dev, struct sec_req *req,
>> +static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>>   			  struct scatterlist *src, struct scatterlist *dst)
>>   {
>>   	struct sec_cipher_req *c_req = &req->c_req;
>> +	struct sec_aead_req *a_req = &req->aead_req;
>>   	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>> +	struct sec_alg_res *res = &qp_ctx->res[req->req_id];
>> +	struct device *dev = SEC_CTX_DEV(ctx);
>> +
>> +	c_req->c_ivin = res->c_ivin;
>> +	c_req->c_ivin_dma = res->c_ivin_dma;
>> +	if (ctx->alg_type == SEC_AEAD) {
>> +		a_req->out_mac = res->out_mac;
>> +		a_req->out_mac_dma = res->out_mac_dma;
>> +	}
>>   
>>   	c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
>>   						    qp_ctx->c_in_pool,
>> @@ -625,29 +636,30 @@ static int sec_cipher_map(struct device *dev, struct sec_req *req,
>>   	return 0;
>>   }
>>   
>> -static void sec_cipher_unmap(struct device *dev, struct sec_cipher_req *req,
>> +static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
>>   			     struct scatterlist *src, struct scatterlist *dst)
>>   {
>> +	struct sec_cipher_req *c_req = &req->c_req;
>> +	struct device *dev = SEC_CTX_DEV(ctx);
>> +
>>   	if (dst != src)
>> -		hisi_acc_sg_buf_unmap(dev, src, req->c_in);
>> +		hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>>   
>> -	hisi_acc_sg_buf_unmap(dev, dst, req->c_out);
>> +	hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
>>   }
>>   
>>   static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
>>   {
>>   	struct skcipher_request *sq = req->c_req.sk_req;
>>   
>> -	return sec_cipher_map(SEC_CTX_DEV(ctx), req, sq->src, sq->dst);
>> +	return sec_cipher_map(ctx, req, sq->src, sq->dst);
>>   }
>>   
>>   static void sec_skcipher_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
>>   {
>> -	struct device *dev = SEC_CTX_DEV(ctx);
>> -	struct sec_cipher_req *c_req = &req->c_req;
>> -	struct skcipher_request *sk_req = c_req->sk_req;
>> +	struct skcipher_request *sq = req->c_req.sk_req;
>>   
>> -	sec_cipher_unmap(dev, c_req, sk_req->src, sk_req->dst);
>> +	sec_cipher_unmap(ctx, req, sq->src, sq->dst);
>>   }
>>   
>>   static int sec_aead_aes_set_key(struct sec_cipher_ctx *c_ctx,
>> @@ -758,16 +770,14 @@ static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
>>   {
>>   	struct aead_request *aq = req->aead_req.aead_req;
>>   
>> -	return sec_cipher_map(SEC_CTX_DEV(ctx), req, aq->src, aq->dst);
>> +	return sec_cipher_map(ctx, req, aq->src, aq->dst);
>>   }
>>   
>>   static void sec_aead_sgl_unmap(struct sec_ctx *ctx, struct sec_req *req)
>>   {
>> -	struct device *dev = SEC_CTX_DEV(ctx);
>> -	struct sec_cipher_req *cq = &req->c_req;
>>   	struct aead_request *aq = req->aead_req.aead_req;
>>   
>> -	sec_cipher_unmap(dev, cq, aq->src, aq->dst);
>> +	sec_cipher_unmap(ctx, req, aq->src, aq->dst);
>>   }
>>   
>>   static int sec_request_transfer(struct sec_ctx *ctx, struct sec_req *req)
>> @@ -800,9 +810,9 @@ static void sec_request_untransfer(struct sec_ctx *ctx, struct sec_req *req)
>>   static void sec_skcipher_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
>>   {
>>   	struct skcipher_request *sk_req = req->c_req.sk_req;
>> -	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
>> +	struct sec_cipher_req *c_req = &req->c_req;
>>   
>> -	memcpy(c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
>> +	memcpy(c_req->c_ivin, sk_req->iv, ctx->c_ctx.ivsize);
>>   }
>>   
>>   static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>> @@ -817,8 +827,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>>   	memset(sec_sqe, 0, sizeof(struct sec_sqe));
>>   
>>   	sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
>> -	sec_sqe->type2.c_ivin_addr =
>> -		cpu_to_le64(req->qp_ctx->res[req->req_id].c_ivin_dma);
>> +	sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
>>   	sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
>>   	sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
>>   
>> @@ -903,9 +912,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
>>   static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
>>   {
>>   	struct aead_request *aead_req = req->aead_req.aead_req;
>> -	u8 *c_ivin = req->qp_ctx->res[req->req_id].c_ivin;
>> +	struct sec_cipher_req *c_req = &req->c_req;
>>   
>> -	memcpy(c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
>> +	memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
>>   }
>>   
>>   static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
>> @@ -938,8 +947,7 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
>>   
>>   	sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
>>   
>> -	sec_sqe->type2.mac_addr =
>> -		cpu_to_le64(req->qp_ctx->res[req->req_id].out_mac_dma);
>> +	sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma);
>>   }
>>   
>>   static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>> @@ -963,6 +971,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
>>   {
>>   	struct aead_request *a_req = req->aead_req.aead_req;
>>   	struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
>> +	struct sec_aead_req *aead_req = &req->aead_req;
>>   	struct sec_cipher_req *c_req = &req->c_req;
>>   	size_t authsize = crypto_aead_authsize(tfm);
>>   	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>> @@ -978,7 +987,7 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
>>   		struct scatterlist *sgl = a_req->dst;
>>   
>>   		sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl),
>> -					  qp_ctx->res[req->req_id].out_mac,
>> +					  aead_req->out_mac,
>>   					  authsize, a_req->cryptlen +
>>   					  a_req->assoclen);
>>   
>> @@ -1030,6 +1039,7 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req)
>>   
>>   static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
>>   {
>> +	struct sec_cipher_req *c_req = &req->c_req;
>>   	int ret;
>>   
>>   	ret = sec_request_init(ctx, req);
>> @@ -1056,12 +1066,10 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
>>   	/* As failing, restore the IV from user */
>>   	if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) {
>>   		if (ctx->alg_type == SEC_SKCIPHER)
>> -			memcpy(req->c_req.sk_req->iv,
>> -			       req->qp_ctx->res[req->req_id].c_ivin,
>> +			memcpy(req->c_req.sk_req->iv, c_req->c_ivin,
>>   			       ctx->c_ctx.ivsize);
>>   		else
>> -			memcpy(req->aead_req.aead_req->iv,
>> -			       req->qp_ctx->res[req->req_id].c_ivin,
>> +			memcpy(req->aead_req.aead_req->iv, c_req->c_ivin,
>>   			       ctx->c_ctx.ivsize);
>>   	}
>>   
>> @@ -1320,7 +1328,8 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>>   	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
>>   	size_t authsize = crypto_aead_authsize(tfm);
>>   
>> -	if (unlikely(!req->src || !req->dst || !req->cryptlen)) {
>> +	if (unlikely(!req->src || !req->dst || !req->cryptlen ||
>> +		req->assoclen > SEC_MAX_AAD_LEN)) {
>>   		dev_err(SEC_CTX_DEV(ctx), "aead input param error!\n");
>>   		return -EINVAL;
>>   	}
>
> .
>



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check
  2020-03-02 11:54   ` Jonathan Cameron
@ 2020-03-03  1:25     ` Xu Zaibo
  2020-03-03  2:16     ` Yunsheng Lin
  1 sibling, 0 replies; 18+ messages in thread
From: Xu Zaibo @ 2020-03-03  1:25 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

Hi,


On 2020/3/2 19:54, Jonathan Cameron wrote:
> On Mon, 2 Mar 2020 14:15:14 +0800
> Zaibo Xu <xuzaibo@huawei.com> wrote:
>
>> From: liulongfang <liulongfang@huawei.com>
>>
>> In order to improve performance of small packets (<512Bytes)
>> in SMMU translation scenario,We need to identify the type of IOMMU
>> in the SEC probe to process small packets by a different method.
>>
>> Signed-off-by: liulongfang <liulongfang@huawei.com>
>> Reviewed-by: Zaibo Xu <xuzaibo@huawei.com>
> This looks like what we ended up with for the SECv1 driver.
Yes.
>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Okay

.
>> ---
>>   drivers/crypto/hisilicon/sec2/sec.h      |  1 +
>>   drivers/crypto/hisilicon/sec2/sec_main.c | 19 +++++++++++++++++++
>>   2 files changed, 20 insertions(+)
>>
>> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
>> index 13e2d8d..eab0d22 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec.h
>> +++ b/drivers/crypto/hisilicon/sec2/sec.h
>> @@ -165,6 +165,7 @@ struct sec_dev {
>>   	struct list_head list;
>>   	struct sec_debug debug;
>>   	u32 ctx_q_num;
>> +	bool iommu_used;
>>   	u32 num_vfs;
>>   	unsigned long status;
>>   };
>> diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
>> index ebafc1c..6466d90 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec_main.c
>> +++ b/drivers/crypto/hisilicon/sec2/sec_main.c
>> @@ -7,6 +7,7 @@
>>   #include <linux/debugfs.h>
>>   #include <linux/init.h>
>>   #include <linux/io.h>
>> +#include <linux/iommu.h>
>>   #include <linux/kernel.h>
>>   #include <linux/module.h>
>>   #include <linux/pci.h>
>> @@ -826,6 +827,23 @@ static void sec_probe_uninit(struct hisi_qm *qm)
>>   	destroy_workqueue(qm->wq);
>>   }
>>   
>> +static void sec_iommu_used_check(struct sec_dev *sec)
>> +{
>> +	struct iommu_domain *domain;
>> +	struct device *dev = &sec->qm.pdev->dev;
>> +
>> +	domain = iommu_get_domain_for_dev(dev);
>> +
>> +	/* Check if iommu is used */
>> +	sec->iommu_used = false;
>> +	if (domain) {
>> +		if (domain->type & __IOMMU_DOMAIN_PAGING)
>> +			sec->iommu_used = true;
>> +		dev_info(dev, "SMMU Opened, the iommu type = %u\n",
>> +			domain->type);
>> +	}
>> +}
>> +
>>   static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   {
>>   	struct sec_dev *sec;
>> @@ -839,6 +857,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   	pci_set_drvdata(pdev, sec);
>>   
>>   	sec->ctx_q_num = ctx_q_num;
>> +	sec_iommu_used_check(sec);
>>   
>>   	qm = &sec->qm;
>>   
>
> .
>



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/5] crypto: hisilicon/sec2 - Add workqueue for SEC driver.
  2020-03-02 11:51   ` Jonathan Cameron
@ 2020-03-03  1:35     ` Xu Zaibo
  0 siblings, 0 replies; 18+ messages in thread
From: Xu Zaibo @ 2020-03-03  1:35 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

Hi,
On 2020/3/2 19:51, Jonathan Cameron wrote:
> On Mon, 2 Mar 2020 14:15:13 +0800
> Zaibo Xu <xuzaibo@huawei.com> wrote:
>
>> From: yekai13 <yekai13@huawei.com>
>>
>> Allocate one workqueue for each QM instead of one for all QMs,
>> we found the throughput of SEC engine can be increased to
>> the hardware limit throughput during testing sec2 performance.
>> so we added this scheme.
>>
>> Signed-off-by: yekai13 <yekai13@huawei.com>
>> Signed-off-by: liulongfang <liulongfang@huawei.com>
> That first sign off needs fixing.  Needs to be a real name.
Okay, will fix it.
>
> Also missing xuzaibo's sign offf.
>
> A question inline that might be worth a follow up patch.
>
> With signoffs fixed
>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Okay.
>
> Thanks,
>
> Jonathan
>
>> ---
>>   drivers/crypto/hisilicon/sec2/sec_main.c | 26 +++++++++++++++++++++++---
>>   1 file changed, 23 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
>> index 3767fdb..ebafc1c 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec_main.c
>> +++ b/drivers/crypto/hisilicon/sec2/sec_main.c
>> @@ -774,12 +774,24 @@ static void sec_qm_uninit(struct hisi_qm *qm)
>>   
>>   static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
>>   {
>> +	int ret;
>> +
>> +	qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE |
>> +		WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(),
>> +		pci_name(qm->pdev));
> I appreciate that you have the same parameters here as were originally in
> qm.c, but I would like to fully understand why some of these flags are set.
>
> Perhaps a comment for each of them?  I'm not sure I'd consider the work
> to be done in this work queue CPU_INTENSIVE for example.
Okay. I thinks this is borrowed from the dm-crypto's workqueue flags :)
>
> This could be a follow up patch though as not actually related to this
> change.
This change is to improve the throughput as running multiple threads.
As only one workqueue for all the QMs, the bottleneck is here.

Thanks,
Zaibo

.
>
>> +	if (!qm->wq) {
>> +		pci_err(qm->pdev, "fail to alloc workqueue\n");
>> +		return -ENOMEM;
>> +	}
>> +
>>   	if (qm->fun_type == QM_HW_PF) {
>>   		qm->qp_base = SEC_PF_DEF_Q_BASE;
>>   		qm->qp_num = pf_q_num;
>>   		qm->debug.curr_qm_qp_num = pf_q_num;
>>   
>> -		return sec_pf_probe_init(sec);
>> +		ret = sec_pf_probe_init(sec);
>> +		if (ret)
>> +			goto err_probe_uninit;
>>   	} else if (qm->fun_type == QM_HW_VF) {
>>   		/*
>>   		 * have no way to get qm configure in VM in v1 hardware,
>> @@ -792,18 +804,26 @@ static int sec_probe_init(struct hisi_qm *qm, struct sec_dev *sec)
>>   			qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
>>   		} else if (qm->ver == QM_HW_V2) {
>>   			/* v2 starts to support get vft by mailbox */
>> -			return hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
>> +			ret = hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
>> +			if (ret)
>> +				goto err_probe_uninit;
>>   		}
>>   	} else {
>> -		return -ENODEV;
>> +		ret = -ENODEV;
>> +		goto err_probe_uninit;
>>   	}
>>   
>>   	return 0;
>> +err_probe_uninit:
>> +	destroy_workqueue(qm->wq);
>> +	return ret;
>>   }
>>   
>>   static void sec_probe_uninit(struct hisi_qm *qm)
>>   {
>>   	hisi_qm_dev_err_uninit(qm);
>> +
>> +	destroy_workqueue(qm->wq);
>>   }
>>   
>>   static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>
> .
>



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp
  2020-03-02 11:39   ` Jonathan Cameron
@ 2020-03-03  1:38     ` Xu Zaibo
  0 siblings, 0 replies; 18+ messages in thread
From: Xu Zaibo @ 2020-03-03  1:38 UTC (permalink / raw)
  To: Jonathan Cameron
  Cc: herbert, davem, qianweili, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, linux-crypto

Hi,
On 2020/3/2 19:39, Jonathan Cameron wrote:
> On Mon, 2 Mar 2020 14:15:12 +0800
> Zaibo Xu <xuzaibo@huawei.com> wrote:
>
>> From: Shukun Tan <tanshukun1@huawei.com>
>>
>> Because so many work queues are not needed. Using one workqueue
>> per QM will reduce the number of kworker threads as well as
>> reducing usage of CPU.This would not degrade any performance.
>>
>> Signed-off-by: Shukun Tan <tanshukun1@huawei.com>
> Hi, this is more or less fine I think other than:
>
> 1) Needs a sign off from xuzaibo to reflect the handling of the patch.
Okay
> 2) The description doesn't mention that we aren't actually creating the
>     per QM workqueue in this patch (it comes later in the series)
yes, will add description.
> 3) The fallback to the system workqueue needs documentation inline.
>
> So tidy those up for v3 and I'm happy.
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>
Okay, no problem.
>
>> ---
>>   drivers/crypto/hisilicon/qm.c | 38 +++++++++++++++-----------------------
>>   drivers/crypto/hisilicon/qm.h |  5 +++--
>>   2 files changed, 18 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
>> index ad7146a..13b0a6f 100644
>> --- a/drivers/crypto/hisilicon/qm.c
>> +++ b/drivers/crypto/hisilicon/qm.c
>> @@ -494,17 +494,9 @@ static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
>>   	}
>>   }
>>   
>> -static void qm_qp_work_func(struct work_struct *work)
>> +static void qm_work_process(struct work_struct *work)
>>   {
>> -	struct hisi_qp *qp;
>> -
>> -	qp = container_of(work, struct hisi_qp, work);
>> -	qm_poll_qp(qp, qp->qm);
>> -}
>> -
>> -static irqreturn_t qm_irq_handler(int irq, void *data)
>> -{
>> -	struct hisi_qm *qm = data;
>> +	struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
>>   	struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
>>   	struct hisi_qp *qp;
>>   	int eqe_num = 0;
>> @@ -513,7 +505,7 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
>>   		eqe_num++;
>>   		qp = qm_to_hisi_qp(qm, eqe);
>>   		if (qp)
>> -			queue_work(qp->wq, &qp->work);
>> +			qm_poll_qp(qp, qm);
>>   
>>   		if (qm->status.eq_head == QM_Q_DEPTH - 1) {
>>   			qm->status.eqc_phase = !qm->status.eqc_phase;
>> @@ -531,6 +523,16 @@ static irqreturn_t qm_irq_handler(int irq, void *data)
>>   	}
>>   
>>   	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
>> +}
>> +
>> +static irqreturn_t do_qm_irq(int irq, void *data)
>> +{
>> +	struct hisi_qm *qm = (struct hisi_qm *)data;
>> +
>> +	if (qm->wq)
>> +		queue_work(qm->wq, &qm->work);
>> +	else
>> +		schedule_work(&qm->work);
> This subtle difference between these two could do with an explanatory
> comment.
>
> >From an initial look I'm not actually seeing qm->wq being set anywhere?
> Ah it's in a later patch. Please add comment to say that in the introduction.
Agree, will add comments here in V3.

Thanks,

Zaibo

.
>
>
>
>
>
>>   
>>   	return IRQ_HANDLED;
>>   }
>> @@ -540,7 +542,7 @@ static irqreturn_t qm_irq(int irq, void *data)
>>   	struct hisi_qm *qm = data;
>>   
>>   	if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
>> -		return qm_irq_handler(irq, data);
>> +		return do_qm_irq(irq, data);
>>   
>>   	dev_err(&qm->pdev->dev, "invalid int source\n");
>>   	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
>> @@ -1159,20 +1161,9 @@ struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
>>   
>>   	qp->qp_id = qp_id;
>>   	qp->alg_type = alg_type;
>> -	INIT_WORK(&qp->work, qm_qp_work_func);
>> -	qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI |
>> -				 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0);
>> -	if (!qp->wq) {
>> -		ret = -EFAULT;
>> -		goto err_free_qp_mem;
>> -	}
>>   
>>   	return qp;
>>   
>> -err_free_qp_mem:
>> -	if (qm->use_dma_api)
>> -		dma_free_coherent(dev, qp->qdma.size, qp->qdma.va,
>> -				  qp->qdma.dma);
>>   err_clear_bit:
>>   	write_lock(&qm->qps_lock);
>>   	qm->qp_array[qp_id] = NULL;
>> @@ -1704,6 +1695,7 @@ int hisi_qm_init(struct hisi_qm *qm)
>>   	qm->qp_in_used = 0;
>>   	mutex_init(&qm->mailbox_lock);
>>   	rwlock_init(&qm->qps_lock);
>> +	INIT_WORK(&qm->work, qm_work_process);
>>   
>>   	dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf",
>>   		qm->use_dma_api ? "dma api" : "iommu api");
>> diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
>> index 1a4f208..c72c2e6 100644
>> --- a/drivers/crypto/hisilicon/qm.h
>> +++ b/drivers/crypto/hisilicon/qm.h
>> @@ -183,6 +183,9 @@ struct hisi_qm {
>>   	u32 error_mask;
>>   	u32 msi_mask;
>>   
>> +	struct workqueue_struct *wq;
>> +	struct work_struct work;
>> +
>>   	const char *algs;
>>   	bool use_dma_api;
>>   	bool use_sva;
>> @@ -219,8 +222,6 @@ struct hisi_qp {
>>   	void *qp_ctx;
>>   	void (*req_cb)(struct hisi_qp *qp, void *data);
>>   	void (*event_cb)(struct hisi_qp *qp);
>> -	struct work_struct work;
>> -	struct workqueue_struct *wq;
>>   
>>   	struct hisi_qm *qm;
>>   	u16 pasid;
>
> .
>



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check
  2020-03-02 11:54   ` Jonathan Cameron
  2020-03-03  1:25     ` Xu Zaibo
@ 2020-03-03  2:16     ` Yunsheng Lin
  2020-03-03  2:22       ` Xu Zaibo
  1 sibling, 1 reply; 18+ messages in thread
From: Yunsheng Lin @ 2020-03-03  2:16 UTC (permalink / raw)
  To: Jonathan Cameron, Zaibo Xu
  Cc: qianweili, herbert, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, davem, linux-crypto

On 2020/3/2 19:54, Jonathan Cameron wrote:
> On Mon, 2 Mar 2020 14:15:14 +0800
> Zaibo Xu <xuzaibo@huawei.com> wrote:
> 
>> From: liulongfang <liulongfang@huawei.com>
>>
>> In order to improve performance of small packets (<512Bytes)
>> in SMMU translation scenario,We need to identify the type of IOMMU

nit: space after ','. and We -> we for lower case?

>> in the SEC probe to process small packets by a different method.
>>
>> Signed-off-by: liulongfang <liulongfang@huawei.com>
>> Reviewed-by: Zaibo Xu <xuzaibo@huawei.com>
> 
> This looks like what we ended up with for the SECv1 driver.
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> ---
>>  drivers/crypto/hisilicon/sec2/sec.h      |  1 +
>>  drivers/crypto/hisilicon/sec2/sec_main.c | 19 +++++++++++++++++++
>>  2 files changed, 20 insertions(+)
>>
>> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
>> index 13e2d8d..eab0d22 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec.h
>> +++ b/drivers/crypto/hisilicon/sec2/sec.h
>> @@ -165,6 +165,7 @@ struct sec_dev {
>>  	struct list_head list;
>>  	struct sec_debug debug;
>>  	u32 ctx_q_num;
>> +	bool iommu_used;
>>  	u32 num_vfs;
>>  	unsigned long status;
>>  };
>> diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
>> index ebafc1c..6466d90 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec_main.c
>> +++ b/drivers/crypto/hisilicon/sec2/sec_main.c
>> @@ -7,6 +7,7 @@
>>  #include <linux/debugfs.h>
>>  #include <linux/init.h>
>>  #include <linux/io.h>
>> +#include <linux/iommu.h>
>>  #include <linux/kernel.h>
>>  #include <linux/module.h>
>>  #include <linux/pci.h>
>> @@ -826,6 +827,23 @@ static void sec_probe_uninit(struct hisi_qm *qm)
>>  	destroy_workqueue(qm->wq);
>>  }
>>  
>> +static void sec_iommu_used_check(struct sec_dev *sec)
>> +{
>> +	struct iommu_domain *domain;
>> +	struct device *dev = &sec->qm.pdev->dev;
>> +
>> +	domain = iommu_get_domain_for_dev(dev);
>> +
>> +	/* Check if iommu is used */
>> +	sec->iommu_used = false;
>> +	if (domain) {
>> +		if (domain->type & __IOMMU_DOMAIN_PAGING)
>> +			sec->iommu_used = true;
>> +		dev_info(dev, "SMMU Opened, the iommu type = %u\n",
>> +			domain->type);
>> +	}
>> +}
>> +
>>  static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>  {
>>  	struct sec_dev *sec;
>> @@ -839,6 +857,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>  	pci_set_drvdata(pdev, sec);
>>  
>>  	sec->ctx_q_num = ctx_q_num;
>> +	sec_iommu_used_check(sec);
>>  
>>  	qm = &sec->qm;
>>  
> 
> 
> _______________________________________________
> Linuxarm mailing list
> Linuxarm@huawei.com
> http://hulk.huawei.com/mailman/listinfo/linuxarm
> 
> .
> 


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check
  2020-03-03  2:16     ` Yunsheng Lin
@ 2020-03-03  2:22       ` Xu Zaibo
  0 siblings, 0 replies; 18+ messages in thread
From: Xu Zaibo @ 2020-03-03  2:22 UTC (permalink / raw)
  To: Yunsheng Lin, Jonathan Cameron
  Cc: qianweili, herbert, tanghui20, forest.zhouchang, linuxarm,
	zhangwei375, yekai13, davem, linux-crypto


On 2020/3/3 10:16, Yunsheng Lin wrote:
> On 2020/3/2 19:54, Jonathan Cameron wrote:
>> On Mon, 2 Mar 2020 14:15:14 +0800
>> Zaibo Xu <xuzaibo@huawei.com> wrote:
>>
>>> From: liulongfang <liulongfang@huawei.com>
>>>
>>> In order to improve performance of small packets (<512Bytes)
>>> in SMMU translation scenario,We need to identify the type of IOMMU
> nit: space after ','. and We -> we for lower case?
yes, indeed.

Thanks,
Zaibo

.
>
>>> in the SEC probe to process small packets by a different method.
>>>
>>> Signed-off-by: liulongfang <liulongfang@huawei.com>
>>> Reviewed-by: Zaibo Xu <xuzaibo@huawei.com>
>> This looks like what we ended up with for the SECv1 driver.
>>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>> ---
>>>   drivers/crypto/hisilicon/sec2/sec.h      |  1 +
>>>   drivers/crypto/hisilicon/sec2/sec_main.c | 19 +++++++++++++++++++
>>>   2 files changed, 20 insertions(+)
>>>
>>> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
>>> index 13e2d8d..eab0d22 100644
>>> --- a/drivers/crypto/hisilicon/sec2/sec.h
>>> +++ b/drivers/crypto/hisilicon/sec2/sec.h
>>> @@ -165,6 +165,7 @@ struct sec_dev {
>>>   	struct list_head list;
>>>   	struct sec_debug debug;
>>>   	u32 ctx_q_num;
>>> +	bool iommu_used;
>>>   	u32 num_vfs;
>>>   	unsigned long status;
>>>   };
>>> diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
>>> index ebafc1c..6466d90 100644
>>> --- a/drivers/crypto/hisilicon/sec2/sec_main.c
>>> +++ b/drivers/crypto/hisilicon/sec2/sec_main.c
>>> @@ -7,6 +7,7 @@
>>>   #include <linux/debugfs.h>
>>>   #include <linux/init.h>
>>>   #include <linux/io.h>
>>> +#include <linux/iommu.h>
>>>   #include <linux/kernel.h>
>>>   #include <linux/module.h>
>>>   #include <linux/pci.h>
>>> @@ -826,6 +827,23 @@ static void sec_probe_uninit(struct hisi_qm *qm)
>>>   	destroy_workqueue(qm->wq);
>>>   }
>>>   
>>> +static void sec_iommu_used_check(struct sec_dev *sec)
>>> +{
>>> +	struct iommu_domain *domain;
>>> +	struct device *dev = &sec->qm.pdev->dev;
>>> +
>>> +	domain = iommu_get_domain_for_dev(dev);
>>> +
>>> +	/* Check if iommu is used */
>>> +	sec->iommu_used = false;
>>> +	if (domain) {
>>> +		if (domain->type & __IOMMU_DOMAIN_PAGING)
>>> +			sec->iommu_used = true;
>>> +		dev_info(dev, "SMMU Opened, the iommu type = %u\n",
>>> +			domain->type);
>>> +	}
>>> +}
>>> +
>>>   static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>   {
>>>   	struct sec_dev *sec;
>>> @@ -839,6 +857,7 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>   	pci_set_drvdata(pdev, sec);
>>>   
>>>   	sec->ctx_q_num = ctx_q_num;
>>> +	sec_iommu_used_check(sec);
>>>   
>>>   	qm = &sec->qm;
>>>   
>>
>> _______________________________________________
>> Linuxarm mailing list
>> Linuxarm@huawei.com
>> http://hulk.huawei.com/mailman/listinfo/linuxarm
>>
>> .
>>
> .
>



^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2020-03-03  2:22 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-02  6:15 [PATCH v2 0/5] crypto: hisilicon - Improve SEC performance Zaibo Xu
2020-03-02  6:15 ` [PATCH v2 1/5] crypto: hisilicon - Use one workqueue per qm instead of per qp Zaibo Xu
2020-03-02 11:39   ` Jonathan Cameron
2020-03-03  1:38     ` Xu Zaibo
2020-03-02  6:15 ` [PATCH v2 2/5] crypto: hisilicon/sec2 - Add workqueue for SEC driver Zaibo Xu
2020-03-02 11:51   ` Jonathan Cameron
2020-03-03  1:35     ` Xu Zaibo
2020-03-02  6:15 ` [PATCH v2 3/5] crypto: hisilicon/sec2 - Add iommu status check Zaibo Xu
2020-03-02 11:54   ` Jonathan Cameron
2020-03-03  1:25     ` Xu Zaibo
2020-03-03  2:16     ` Yunsheng Lin
2020-03-03  2:22       ` Xu Zaibo
2020-03-02  6:15 ` [PATCH v2 4/5] crypto: hisilicon/sec2 - Update IV and MAC operation Zaibo Xu
2020-03-02 11:58   ` Jonathan Cameron
2020-03-03  1:24     ` Xu Zaibo
2020-03-02  6:15 ` [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver Zaibo Xu
2020-03-02 12:49   ` Jonathan Cameron
2020-03-03  1:22     ` Xu Zaibo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).