From: Boris Pismenny <borisp@nvidia.com>
To: <dsahern@gmail.com>, <kuba@kernel.org>, <davem@davemloft.net>,
<saeedm@nvidia.com>, <hch@lst.de>, <sagi@grimberg.me>,
<axboe@fb.com>, <kbusch@kernel.org>, <viro@zeniv.linux.org.uk>,
<edumazet@google.com>, <smalin@marvell.com>
Cc: <boris.pismenny@gmail.com>, <linux-nvme@lists.infradead.org>,
<netdev@vger.kernel.org>, <benishay@nvidia.com>,
<ogerlitz@nvidia.com>, <yorayz@nvidia.com>,
Boris Pismenny <borisp@mellanox.com>,
Or Gerlitz <ogerlitz@mellanox.com>,
Yoray Zack <yorayz@mellanox.com>
Subject: [PATCH v5 net-next 14/36] net/mlx5e: NVMEoTCP use KLM UMRs
Date: Thu, 22 Jul 2021 14:03:03 +0300 [thread overview]
Message-ID: <20210722110325.371-15-borisp@nvidia.com> (raw)
In-Reply-To: <20210722110325.371-1-borisp@nvidia.com>
From: Ben Ben-Ishay <benishay@nvidia.com>
NVMEoTCP offload uses buffer registration for ddp operation,
every request comprises from SG list that might have elements with size > 4K,
thus the appropriate way to perform buffer registration is with KLM UMRs.
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Yoray Zack <yorayz@mellanox.com>
---
drivers/net/ethernet/mellanox/mlx5/core/en.h | 5 +-
.../net/ethernet/mellanox/mlx5/core/en/txrx.h | 3 +
.../mellanox/mlx5/core/en_accel/nvmeotcp.c | 116 ++++++++++++++++++
.../mlx5/core/en_accel/nvmeotcp_utils.h | 12 ++
.../net/ethernet/mellanox/mlx5/core/en_rx.c | 12 +-
5 files changed, 145 insertions(+), 3 deletions(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp_utils.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 5bc38002d136..d88ecd04d5da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -238,7 +238,10 @@ struct mlx5e_umr_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_umr_ctrl_seg uctrl;
struct mlx5_mkey_seg mkc;
- struct mlx5_mtt inline_mtts[0];
+ union {
+ struct mlx5_mtt inline_mtts[0];
+ struct mlx5_klm inline_klms[0];
+ };
};
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 055c3bc23733..529745aac7e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -41,6 +41,9 @@ enum mlx5e_icosq_wqe_type {
MLX5E_ICOSQ_WQE_SET_PSV_TLS,
MLX5E_ICOSQ_WQE_GET_PSV_TLS,
#endif
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ MLX5E_ICOSQ_WQE_UMR_NVME_TCP,
+#endif
};
/* General */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
index 04e88042b243..7fc3b13b1b35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
@@ -4,6 +4,7 @@
#include <linux/netdevice.h>
#include <linux/idr.h>
#include "en_accel/nvmeotcp.h"
+#include "en_accel/nvmeotcp_utils.h"
#include "en_accel/fs_tcp.h"
#include "en/txrx.h"
@@ -19,6 +20,121 @@ static const struct rhashtable_params rhash_queues = {
.max_size = MAX_NVMEOTCP_QUEUES,
};
+static void
+fill_nvmeotcp_klm_wqe(struct mlx5e_nvmeotcp_queue *queue,
+ struct mlx5e_umr_wqe *wqe, u16 ccid, u32 klm_entries,
+ u16 klm_offset)
+{
+ struct scatterlist *sgl_mkey;
+ u32 lkey, i;
+
+ lkey = queue->priv->mdev->mlx5e_res.mkey.key;
+ for (i = 0; i < klm_entries; i++) {
+ sgl_mkey = &queue->ccid_table[ccid].sgl[i + klm_offset];
+ wqe->inline_klms[i].bcount = cpu_to_be32(sgl_mkey->length);
+ wqe->inline_klms[i].key = cpu_to_be32(lkey);
+ wqe->inline_klms[i].va = cpu_to_be64(sgl_mkey->dma_address);
+ }
+
+ for (; i < ALIGN(klm_entries, KLM_ALIGNMENT); i++) {
+ wqe->inline_klms[i].bcount = 0;
+ wqe->inline_klms[i].key = 0;
+ wqe->inline_klms[i].va = 0;
+ }
+}
+
+static void
+build_nvmeotcp_klm_umr(struct mlx5e_nvmeotcp_queue *queue,
+ struct mlx5e_umr_wqe *wqe, u16 ccid, int klm_entries,
+ u32 klm_offset, u32 len)
+{
+ u32 id = queue->ccid_table[ccid].klm_mkey.key;
+ struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+ struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+ struct mlx5_mkey_seg *mkc = &wqe->mkc;
+
+ u32 sqn = queue->sq->icosq.sqn;
+ u16 pc = queue->sq->icosq.pc;
+
+ cseg->opmod_idx_opcode = cpu_to_be32((pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+ MLX5_OPCODE_UMR);
+ cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+ MLX5E_KLM_UMR_DS_CNT(ALIGN(klm_entries, KLM_ALIGNMENT)));
+ cseg->general_id = cpu_to_be32(id);
+
+ if (!klm_offset) {
+ ucseg->mkey_mask |= cpu_to_be64(MLX5_MKEY_MASK_XLT_OCT_SIZE |
+ MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_FREE);
+ mkc->xlt_oct_size = cpu_to_be32(ALIGN(len, KLM_ALIGNMENT));
+ mkc->len = cpu_to_be64(queue->ccid_table[ccid].size);
+ }
+
+ ucseg->flags = MLX5_UMR_INLINE | MLX5_UMR_TRANSLATION_OFFSET_EN;
+ ucseg->xlt_octowords = cpu_to_be16(ALIGN(klm_entries, KLM_ALIGNMENT));
+ ucseg->xlt_offset = cpu_to_be16(klm_offset);
+ fill_nvmeotcp_klm_wqe(queue, wqe, ccid, klm_entries, klm_offset);
+}
+
+static void
+mlx5e_nvmeotcp_fill_wi(struct mlx5e_nvmeotcp_queue *nvmeotcp_queue,
+ struct mlx5e_icosq *sq, u32 wqe_bbs, u16 pi)
+{
+ struct mlx5e_icosq_wqe_info *wi = &sq->db.wqe_info[pi];
+
+ wi->num_wqebbs = wqe_bbs;
+ wi->wqe_type = MLX5E_ICOSQ_WQE_UMR_NVME_TCP;
+}
+
+static void
+post_klm_wqe(struct mlx5e_nvmeotcp_queue *queue,
+ u16 ccid,
+ u32 klm_length,
+ u32 *klm_offset)
+{
+ struct mlx5e_icosq *sq = &queue->sq->icosq;
+ u32 wqe_bbs, cur_klm_entries;
+ struct mlx5e_umr_wqe *wqe;
+ u16 pi, wqe_sz;
+
+ cur_klm_entries = min_t(int, queue->max_klms_per_wqe,
+ klm_length - *klm_offset);
+ wqe_sz = MLX5E_KLM_UMR_WQE_SZ(ALIGN(cur_klm_entries, KLM_ALIGNMENT));
+ wqe_bbs = DIV_ROUND_UP(wqe_sz, MLX5_SEND_WQE_BB);
+ pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
+ wqe = MLX5E_NVMEOTCP_FETCH_KLM_WQE(sq, pi);
+ mlx5e_nvmeotcp_fill_wi(queue, sq, wqe_bbs, pi);
+ build_nvmeotcp_klm_umr(queue, wqe, ccid, cur_klm_entries, *klm_offset,
+ klm_length);
+ *klm_offset += cur_klm_entries;
+ sq->pc += wqe_bbs;
+ sq->doorbell_cseg = &wqe->ctrl;
+}
+
+static int
+mlx5e_nvmeotcp_post_klm_wqe(struct mlx5e_nvmeotcp_queue *queue,
+ u16 ccid,
+ u32 klm_length)
+{
+ u32 klm_offset = 0, wqes, wqe_sz, max_wqe_bbs, i, room;
+ struct mlx5e_icosq *sq = &queue->sq->icosq;
+
+ /* TODO: set stricter wqe_sz; using max for now */
+ wqes = DIV_ROUND_UP(klm_length, queue->max_klms_per_wqe);
+ wqe_sz = MLX5E_KLM_UMR_WQE_SZ(queue->max_klms_per_wqe);
+
+ max_wqe_bbs = DIV_ROUND_UP(wqe_sz, MLX5_SEND_WQE_BB);
+
+ room = mlx5e_stop_room_for_wqe(max_wqe_bbs) * wqes;
+ if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room)))
+ return -ENOSPC;
+
+ for (i = 0; i < wqes; i++)
+ post_klm_wqe(queue, ccid, klm_length, &klm_offset);
+
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg);
+ return 0;
+}
+
static int
mlx5e_nvmeotcp_offload_limits(struct net_device *netdev,
struct ulp_ddp_limits *limits)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp_utils.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp_utils.h
new file mode 100644
index 000000000000..329e114d6571
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp_utils.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+#ifndef __MLX5E_NVMEOTCP_UTILS_H__
+#define __MLX5E_NVMEOTCP_UTILS_H__
+
+#include "en.h"
+
+#define MLX5E_NVMEOTCP_FETCH_KLM_WQE(sq, pi) \
+ ((struct mlx5e_umr_wqe *)\
+ mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_umr_wqe)))
+
+#endif /* __MLX5E_NVMEOTCP_UTILS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 3c65fd0bcf31..9d821facbca4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -615,16 +615,20 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq)
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
sqcc += wi->num_wqebbs;
-#ifdef CONFIG_MLX5_EN_TLS
switch (wi->wqe_type) {
+#ifdef CONFIG_MLX5_EN_TLS
case MLX5E_ICOSQ_WQE_SET_PSV_TLS:
mlx5e_ktls_handle_ctx_completion(wi);
break;
case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
mlx5e_ktls_handle_get_psv_completion(wi, sq);
break;
- }
#endif
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ case MLX5E_ICOSQ_WQE_UMR_NVME_TCP:
+ break;
+#endif
+ }
}
sq->cc = sqcc;
}
@@ -694,6 +698,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
case MLX5E_ICOSQ_WQE_GET_PSV_TLS:
mlx5e_ktls_handle_get_psv_completion(wi, sq);
break;
+#endif
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ case MLX5E_ICOSQ_WQE_UMR_NVME_TCP:
+ break;
#endif
default:
netdev_WARN_ONCE(cq->netdev,
--
2.24.1
next prev parent reply other threads:[~2021-07-22 11:05 UTC|newest]
Thread overview: 62+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-22 11:02 [PATCH v5 net-next 00/36] nvme-tcp receive and tarnsmit offloads Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 01/36] net: Introduce direct data placement tcp offload Boris Pismenny
2021-07-22 11:26 ` Eric Dumazet
2021-07-22 12:18 ` Boris Pismenny
2021-07-22 13:10 ` Eric Dumazet
2021-07-22 13:33 ` Boris Pismenny
2021-07-22 13:39 ` Eric Dumazet
2021-07-22 14:02 ` Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 02/36] iov_iter: DDP copy to iter/pages Boris Pismenny
2021-07-22 13:31 ` Christoph Hellwig
2021-07-22 20:23 ` Boris Pismenny
2021-07-23 5:03 ` Christoph Hellwig
2021-07-23 5:21 ` Al Viro
2021-08-04 14:13 ` Or Gerlitz
2021-08-10 13:29 ` Or Gerlitz
2021-07-22 20:55 ` Al Viro
2021-07-22 11:02 ` [PATCH v5 net-next 03/36] net: skb copy(+hash) iterators for DDP offloads Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 04/36] net/tls: expose get_netdev_for_sock Boris Pismenny
2021-07-23 6:06 ` Christoph Hellwig
2021-08-04 13:26 ` Or Gerlitz
[not found] ` <20210804072918.17ba9cff@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com>
2021-08-04 15:07 ` Or Gerlitz
2021-08-10 13:25 ` Or Gerlitz
2021-07-22 11:02 ` [PATCH v5 net-next 05/36] nvme-tcp: Add DDP offload control path Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 06/36] nvme-tcp: Add DDP data-path Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 07/36] nvme-tcp: RX DDGST offload Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 08/36] nvme-tcp: Deal with netdevice DOWN events Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 09/36] net/mlx5: Header file changes for nvme-tcp offload Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 10/36] net/mlx5: Add 128B CQE for NVMEoTCP offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 11/36] net/mlx5e: TCP flow steering for nvme-tcp Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 12/36] net/mlx5e: NVMEoTCP offload initialization Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 13/36] net/mlx5e: KLM UMR helper macros Boris Pismenny
2021-07-22 11:03 ` Boris Pismenny [this message]
2021-07-22 11:03 ` [PATCH v5 net-next 15/36] net/mlx5e: NVMEoTCP queue init/teardown Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 16/36] net/mlx5e: NVMEoTCP async ddp invalidation Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 17/36] net/mlx5e: NVMEoTCP ddp setup and resync Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 18/36] net/mlx5e: NVMEoTCP, data-path for DDP+DDGST offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 19/36] net/mlx5e: NVMEoTCP statistics Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 20/36] Documentation: add ULP DDP offload documentation Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 21/36] net: drop ULP DDP HW offload feature if no CSUM offload feature Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 22/36] net: Add ulp_ddp_pdu_info struct Boris Pismenny
2021-07-23 19:42 ` Sagi Grimberg
2021-07-22 11:03 ` [PATCH v5 net-next 23/36] net: Add to ulp_ddp support for fallback flow Boris Pismenny
2021-07-23 6:09 ` Christoph Hellwig
2021-07-22 11:03 ` [PATCH v5 net-next 24/36] net: Add MSG_DDP_CRC flag Boris Pismenny
2021-07-22 14:23 ` Eric Dumazet
2021-07-22 11:03 ` [PATCH v5 net-next 25/36] nvme-tcp: TX DDGST offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 26/36] nvme-tcp: Mapping between Tx NVMEoTCP pdu and TCP sequence Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 27/36] mlx5e: make preparation in TLS code for NVMEoTCP CRC Tx offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 28/36] mlx5: Add sq state test bit for nvmeotcp Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 29/36] mlx5: Add support to NETIF_F_HW_TCP_DDP_CRC_TX feature Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 30/36] net/mlx5e: NVMEoTCP DDGST TX offload TIS Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 31/36] net/mlx5e: NVMEoTCP DDGST Tx offload queue init/teardown Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 32/36] net/mlx5e: NVMEoTCP DDGST TX BSF and PSV Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 33/36] net/mlx5e: NVMEoTCP DDGST TX Data path Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 34/36] net/mlx5e: NVMEoTCP DDGST TX handle OOO packets Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 35/36] net/mlx5e: NVMEoTCP DDGST TX offload optimization Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 36/36] net/mlx5e: NVMEoTCP DDGST TX statistics Boris Pismenny
2021-07-23 5:56 ` [PATCH v5 net-next 00/36] nvme-tcp receive and tarnsmit offloads Christoph Hellwig
2021-07-23 19:58 ` Sagi Grimberg
2021-08-04 13:51 ` Or Gerlitz
2021-08-06 19:46 ` Sagi Grimberg
2021-08-10 13:37 ` Or Gerlitz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210722110325.371-15-borisp@nvidia.com \
--to=borisp@nvidia.com \
--cc=axboe@fb.com \
--cc=benishay@nvidia.com \
--cc=boris.pismenny@gmail.com \
--cc=borisp@mellanox.com \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=edumazet@google.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=netdev@vger.kernel.org \
--cc=ogerlitz@mellanox.com \
--cc=ogerlitz@nvidia.com \
--cc=saeedm@nvidia.com \
--cc=sagi@grimberg.me \
--cc=smalin@marvell.com \
--cc=viro@zeniv.linux.org.uk \
--cc=yorayz@mellanox.com \
--cc=yorayz@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).