All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cheng Xu <chengyou@linux.alibaba.com>
To: jgg@ziepe.ca, dledford@redhat.com
Cc: leon@kernel.org, linux-rdma@vger.kernel.org,
	KaiShen@linux.alibaba.com, chengyou@linux.alibaba.com,
	tonylu@linux.alibaba.com, BMT@zurich.ibm.com
Subject: [PATCH for-next v4 06/12] RDMA/erdma: Add event queue implementation
Date: Mon, 14 Mar 2022 14:47:33 +0800	[thread overview]
Message-ID: <20220314064739.81647-7-chengyou@linux.alibaba.com> (raw)
In-Reply-To: <20220314064739.81647-1-chengyou@linux.alibaba.com>

Event queue (EQ) is the main notifcaition way from erdma hardware to its
driver. Each erdma device contains 2 kinds EQs: asynchronous EQ (AEQ) and
completion EQ (CEQ). Per device has 1 AEQ, which used for RDMA async event
report, and max to 32 CEQs (numbered for CEQ0 to CEQ31). CEQ0 is used for
cmdq completion event report, and the rest CEQs are used for RDMA
completion event report.

Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
---
 drivers/infiniband/hw/erdma/erdma_eq.c | 347 +++++++++++++++++++++++++
 1 file changed, 347 insertions(+)
 create mode 100644 drivers/infiniband/hw/erdma/erdma_eq.c

diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c
new file mode 100644
index 000000000000..45d58d40dc6f
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_eq.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/*          Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_hw.h"
+#include "erdma_verbs.h"
+
+#define MAX_POLL_CHUNK_SIZE 16
+
+void notify_eq(struct erdma_eq *eq)
+{
+	u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) |
+		      FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1);
+
+	*eq->db_record = db_data;
+	writeq(db_data, eq->db_addr);
+
+	atomic64_inc(&eq->notify_num);
+}
+
+static void *get_eq_entry(struct erdma_eq *eq, u16 idx)
+{
+	idx &= (eq->depth - 1);
+
+	return eq->qbuf + (idx << EQE_SHIFT);
+}
+
+void *get_next_valid_eqe(struct erdma_eq *eq)
+{
+	u64 *eqe = (u64 *)get_eq_entry(eq, eq->ci);
+	u64 val = READ_ONCE(*eqe);
+
+	if (FIELD_GET(ERDMA_CEQE_HDR_O_MASK, val) == eq->owner) {
+		dma_rmb();
+		eq->ci++;
+		if ((eq->ci & (eq->depth - 1)) == 0)
+			eq->owner = !eq->owner;
+
+		atomic64_inc(&eq->event_num);
+		return eqe;
+	}
+
+	return NULL;
+}
+
+void erdma_aeq_event_handler(struct erdma_dev *dev)
+{
+	struct erdma_aeqe *aeqe;
+	u32 cqn, qpn;
+	struct erdma_qp *qp;
+	struct erdma_cq *cq;
+	struct ib_event event;
+	u32 poll_cnt = 0;
+
+	memset(&event, 0, sizeof(event));
+
+	while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+		aeqe = (struct erdma_aeqe *)get_next_valid_eqe(&dev->aeq);
+		if (!aeqe)
+			break;
+
+		poll_cnt++;
+
+		if (FIELD_GET(ERDMA_AEQE_HDR_TYPE_MASK, aeqe->hdr) ==
+		    ERDMA_AE_TYPE_CQ_ERR) {
+			cqn = aeqe->event_data0;
+			cq = find_cq_by_cqn(dev, cqn);
+			if (!cq)
+				continue;
+			event.device = cq->ibcq.device;
+			event.element.cq = &cq->ibcq;
+			event.event = IB_EVENT_CQ_ERR;
+			if (cq->ibcq.event_handler)
+				cq->ibcq.event_handler(&event,
+						       cq->ibcq.cq_context);
+		} else {
+			qpn = aeqe->event_data0;
+			qp = find_qp_by_qpn(dev, qpn);
+			if (!qp)
+				continue;
+
+			event.device = qp->ibqp.device;
+			event.element.qp = &qp->ibqp;
+			event.event = IB_EVENT_QP_FATAL;
+			if (qp->ibqp.event_handler)
+				qp->ibqp.event_handler(&event,
+						       qp->ibqp.qp_context);
+		}
+	}
+
+	notify_eq(&dev->aeq);
+}
+
+int erdma_aeq_init(struct erdma_dev *dev)
+{
+	struct erdma_eq *eq = &dev->aeq;
+	u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
+
+	eq->qbuf = dma_alloc_coherent(&dev->pdev->dev,
+				      WARPPED_BUFSIZE(buf_size),
+				      &eq->qbuf_dma_addr,
+				      GFP_KERNEL | __GFP_ZERO);
+	if (!eq->qbuf)
+		return -ENOMEM;
+
+	spin_lock_init(&eq->lock);
+	atomic64_set(&eq->event_num, 0);
+	atomic64_set(&eq->notify_num, 0);
+
+	eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
+	eq->db_addr = (u64 __iomem *)(dev->func_bar + ERDMA_REGS_AEQ_DB_REG);
+	eq->db_record = (u64 *)(eq->qbuf + buf_size);
+	eq->owner = 1;
+
+	erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG,
+			  upper_32_bits(eq->qbuf_dma_addr));
+	erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG,
+			  lower_32_bits(eq->qbuf_dma_addr));
+	erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth);
+	erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG,
+			  eq->qbuf_dma_addr + buf_size);
+
+	return 0;
+}
+
+void erdma_aeq_destroy(struct erdma_dev *dev)
+{
+	struct erdma_eq *eq = &dev->aeq;
+	u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
+
+	dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), eq->qbuf,
+			  eq->qbuf_dma_addr);
+}
+
+void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb)
+{
+	int cqn;
+	struct erdma_cq *cq;
+	struct erdma_dev *dev = ceq_cb->dev;
+	u32 poll_cnt = 0;
+	u64 *hdr;
+
+	if (!ceq_cb->ready)
+		return;
+
+	while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+		hdr = (u64 *)get_next_valid_eqe(&ceq_cb->eq);
+		if (!hdr)
+			break;
+
+		poll_cnt++;
+		cqn = FIELD_GET(ERDMA_CEQE_HDR_CQN_MASK, READ_ONCE(*hdr));
+
+		cq = find_cq_by_cqn(dev, cqn);
+		if (!cq)
+			continue;
+
+		if (rdma_is_kernel_res(&cq->ibcq.res))
+			cq->kern_cq.cmdsn++;
+
+		if (cq->ibcq.comp_handler)
+			cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+	}
+
+	notify_eq(&ceq_cb->eq);
+}
+
+static irqreturn_t erdma_intr_ceq_handler(int irq, void *data)
+{
+	struct erdma_eq_cb *ceq_cb = data;
+
+	tasklet_schedule(&ceq_cb->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static void erdma_intr_ceq_task(unsigned long data)
+{
+	erdma_ceq_completion_handler((struct erdma_eq_cb *)data);
+}
+
+static int erdma_set_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+	struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+	cpumask_t affinity_hint_mask;
+	u32 cpu;
+	int err;
+
+	snprintf(eqc->irq_name, ERDMA_IRQNAME_SIZE, "erdma-ceq%u@pci:%s",
+		ceqn, pci_name(dev->pdev));
+	eqc->msix_vector = pci_irq_vector(dev->pdev, ceqn + 1);
+
+	tasklet_init(&dev->ceqs[ceqn].tasklet, erdma_intr_ceq_task,
+		     (unsigned long)&dev->ceqs[ceqn]);
+
+	cpu = cpumask_local_spread(ceqn + 1, dev->attrs.numa_node);
+	cpumask_set_cpu(cpu, &affinity_hint_mask);
+
+	err = request_irq(eqc->msix_vector, erdma_intr_ceq_handler, 0,
+			  eqc->irq_name, eqc);
+	if (err) {
+		dev_err(&dev->pdev->dev, "failed to request_irq(%d)\n", err);
+		return err;
+	}
+
+	irq_set_affinity_hint(eqc->msix_vector, &affinity_hint_mask);
+
+	return 0;
+}
+
+static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+	struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+
+	irq_set_affinity_hint(eqc->msix_vector, NULL);
+	free_irq(eqc->msix_vector, eqc);
+}
+
+static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
+{
+	struct erdma_cmdq_create_eq_req req;
+	dma_addr_t db_info_dma_addr;
+
+	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+				CMDQ_OPCODE_CREATE_EQ);
+	req.eqn = eqn;
+	req.depth = ilog2(eq->depth);
+	req.qbuf_addr = eq->qbuf_dma_addr;
+	req.qtype = 1; /* CEQ */
+	/* Vector index is the same as EQN. */
+	req.vector_idx = eqn;
+	db_info_dma_addr = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT);
+	req.db_dma_addr_l = lower_32_bits(db_info_dma_addr);
+	req.db_dma_addr_h = upper_32_bits(db_info_dma_addr);
+
+	return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req,
+				   sizeof(struct erdma_cmdq_create_eq_req),
+				   NULL, NULL);
+}
+
+static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
+{
+	struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+	u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
+	int ret;
+
+	eq->qbuf = dma_alloc_coherent(&dev->pdev->dev,
+				      WARPPED_BUFSIZE(buf_size),
+				      &eq->qbuf_dma_addr,
+				      GFP_KERNEL | __GFP_ZERO);
+	if (!eq->qbuf)
+		return -ENOMEM;
+
+	spin_lock_init(&eq->lock);
+	atomic64_set(&eq->event_num, 0);
+	atomic64_set(&eq->notify_num, 0);
+
+	eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
+	eq->db_addr = (u64 __iomem *)(dev->func_bar +
+				      ERDMA_REGS_CEQ_DB_BASE_REG +
+				      (ceqn + 1) * 8);
+	eq->db_record = (u64 *)(eq->qbuf + buf_size);
+	eq->ci = 0;
+	eq->owner = 1;
+	dev->ceqs[ceqn].dev = dev;
+
+	/* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+	ret = create_eq_cmd(dev, ceqn + 1, eq);
+	dev->ceqs[ceqn].ready = ret ? false : true;
+
+	return ret;
+}
+
+static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
+{
+	struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+	u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
+	struct erdma_cmdq_destroy_eq_req req;
+	int err;
+
+	dev->ceqs[ceqn].ready = 0;
+
+	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+				CMDQ_OPCODE_DESTROY_EQ);
+	/* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+	req.eqn = ceqn + 1;
+	req.qtype = 1;
+	req.vector_idx = ceqn + 1;
+
+	err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL,
+				  NULL);
+	if (err)
+		return;
+
+	dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), eq->qbuf,
+			  eq->qbuf_dma_addr);
+}
+
+int erdma_ceqs_init(struct erdma_dev *dev)
+{
+	u32 i, j;
+	int err = 0;
+
+	for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+		err = erdma_ceq_init_one(dev, i);
+		if (err)
+			goto out_err;
+
+		err = erdma_set_ceq_irq(dev, i);
+		if (err) {
+			erdma_ceq_uninit_one(dev, i);
+			goto out_err;
+		}
+	}
+
+	return 0;
+
+out_err:
+	for (j = 0; j < i; j++) {
+		erdma_free_ceq_irq(dev, j);
+		erdma_ceq_uninit_one(dev, j);
+	}
+
+	return err;
+}
+
+void erdma_ceqs_uninit(struct erdma_dev *dev)
+{
+	u32 i;
+
+	for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+		erdma_free_ceq_irq(dev, i);
+		erdma_ceq_uninit_one(dev, i);
+	}
+}
-- 
2.27.0


  parent reply	other threads:[~2022-03-14  6:47 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-14  6:47 [PATCH for-next v4 00/12] Elastic RDMA Adapter (ERDMA) driver Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 01/12] RDMA: Add ERDMA to rdma_driver_id definition Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 02/12] RDMA/core: Allow calling query_port when netdev isn't attached in iWarp Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 03/12] RDMA/erdma: Add the hardware related definitions Cheng Xu
2022-03-18 10:27   ` Wenpeng Liang
2022-03-19  7:53     ` Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 04/12] RDMA/erdma: Add main include file Cheng Xu
2022-03-18 10:35   ` Wenpeng Liang
2022-03-19  8:11     ` Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 05/12] RDMA/erdma: Add cmdq implementation Cheng Xu
2022-03-18 11:13   ` Wenpeng Liang
2022-03-19  8:38     ` Cheng Xu
2022-03-18 11:16   ` Wenpeng Liang
2022-03-18 18:17     ` Jason Gunthorpe
2022-03-19  1:26       ` Wenpeng Liang
2022-03-18 12:57   ` Wenpeng Liang
2022-03-19  9:18     ` Cheng Xu
2022-03-14  6:47 ` Cheng Xu [this message]
2022-03-18 11:43   ` [PATCH for-next v4 06/12] RDMA/erdma: Add event queue implementation Wenpeng Liang
2022-03-18 18:18     ` Jason Gunthorpe
2022-03-19  9:43       ` Cheng Xu
2022-03-21 22:23         ` Jason Gunthorpe
2022-03-22  3:06           ` Cheng Xu
2022-03-19  8:54     ` Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 07/12] RDMA/erdma: Add verbs header file Cheng Xu
2022-03-18 11:46   ` Wenpeng Liang
2022-03-19  8:55     ` Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 08/12] RDMA/erdma: Add verbs implementation Cheng Xu
2022-03-18 12:24   ` Wenpeng Liang
2022-03-19  9:06     ` Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 09/12] RDMA/erdma: Add connection management (CM) support Cheng Xu
2022-03-18 12:38   ` Wenpeng Liang
2022-03-19  9:10     ` Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 10/12] RDMA/erdma: Add the erdma module Cheng Xu
2022-03-18 12:46   ` Wenpeng Liang
2022-03-19  9:13     ` Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 11/12] RDMA/erdma: Add the ABI definitions Cheng Xu
2022-03-14  6:47 ` [PATCH for-next v4 12/12] RDMA/erdma: Add driver to kernel build environment Cheng Xu
2022-03-17  3:14   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220314064739.81647-7-chengyou@linux.alibaba.com \
    --to=chengyou@linux.alibaba.com \
    --cc=BMT@zurich.ibm.com \
    --cc=KaiShen@linux.alibaba.com \
    --cc=dledford@redhat.com \
    --cc=jgg@ziepe.ca \
    --cc=leon@kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=tonylu@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.