* [PATCH] nvmet-tcp: Don't map pages which can't come from HIGHMEM
@ 2022-08-29 7:54 Sagi Grimberg
2022-08-30 21:40 ` Fabio M. De Francesco
0 siblings, 1 reply; 2+ messages in thread
From: Sagi Grimberg @ 2022-08-29 7:54 UTC (permalink / raw)
To: linux-nvme
Cc: linux-kernel, Fabio M . De Francesco, Christoph Hellwig,
Keith Busch, Chaitanya Kulkarni, James Smart, Ira Weiny,
Venkataramanan Anirudh
From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
kmap() is being deprecated in favor of kmap_local_page().[1]
There are two main problems with kmap(): (1) It comes with an overhead as
mapping space is restricted and protected by a global lock for
synchronization and (2) it also requires global TLB invalidation when the
kmap’s pool wraps and it might block when the mapping space is fully
utilized until a slot becomes available.
The pages which will be mapped are allocated in nvmet_tcp_map_data(),
using the GFP_KERNEL flag. This assures that they cannot come from
HIGHMEM. This imply that a straight page_address() can replace the kmap()
of sg_page(sg) in nvmet_tcp_map_pdu_iovec(). As a side effect, we might
also delete the field "nr_mapped" from struct "nvmet_tcp_cmd" because,
after removing the kmap() calls, there would be no longer any need of it.
In addition, there is no reason to use a kvec for the command receive
data buffers iovec, use a bio_vec instead and let iov_iter handle the
buffer mapping and data copy.
Test with blktests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with
HIGHMEM64GB enabled.
[1] "[PATCH] checkpatch: Add kmap and kmap_atomic to the deprecated
list" https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com/
Cc: Chaitanya Kulkarni <chaitanyak@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Keith Busch <kbusch@kernel.org>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
[sagi: added bio_vec plus minor naming changes]
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/target/tcp.c | 44 ++++++++++++---------------------------
1 file changed, 13 insertions(+), 31 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index dc3b4dc8fe08..43594e0d609c 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -77,9 +77,8 @@ struct nvmet_tcp_cmd {
u32 pdu_len;
u32 pdu_recv;
int sg_idx;
- int nr_mapped;
struct msghdr recv_msg;
- struct kvec *iov;
+ struct bio_vec *iov;
u32 flags;
struct list_head entry;
@@ -167,7 +166,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops;
static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
-static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd);
static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd)
@@ -301,35 +299,21 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
{
- WARN_ON(unlikely(cmd->nr_mapped > 0));
-
kfree(cmd->iov);
sgl_free(cmd->req.sg);
cmd->iov = NULL;
cmd->req.sg = NULL;
}
-static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
-{
- struct scatterlist *sg;
- int i;
-
- sg = &cmd->req.sg[cmd->sg_idx];
-
- for (i = 0; i < cmd->nr_mapped; i++)
- kunmap(sg_page(&sg[i]));
-
- cmd->nr_mapped = 0;
-}
-
-static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
+static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
{
- struct kvec *iov = cmd->iov;
+ struct bio_vec *iov = cmd->iov;
struct scatterlist *sg;
u32 length, offset, sg_offset;
+ int nr_pages;
length = cmd->pdu_len;
- cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE);
+ nr_pages = DIV_ROUND_UP(length, PAGE_SIZE);
offset = cmd->rbytes_done;
cmd->sg_idx = offset / PAGE_SIZE;
sg_offset = offset % PAGE_SIZE;
@@ -338,8 +322,9 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
while (length) {
u32 iov_len = min_t(u32, length, sg->length - sg_offset);
- iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset;
- iov->iov_len = iov_len;
+ iov->bv_page = sg_page(sg);
+ iov->bv_len = sg->length;
+ iov->bv_offset = sg->offset + sg_offset;
length -= iov_len;
sg = sg_next(sg);
@@ -347,8 +332,8 @@ static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
sg_offset = 0;
}
- iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
- cmd->nr_mapped, cmd->pdu_len);
+ iov_iter_bvec(&cmd->recv_msg.msg_iter, READ, cmd->iov,
+ nr_pages, cmd->pdu_len);
}
static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
@@ -926,7 +911,7 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
}
queue->rcv_state = NVMET_TCP_RECV_DATA;
- nvmet_tcp_map_pdu_iovec(cmd);
+ nvmet_tcp_build_pdu_iovec(cmd);
cmd->flags |= NVMET_TCP_F_INIT_FAILED;
}
@@ -952,7 +937,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
cmd->pdu_len = le32_to_cpu(data->data_length);
cmd->pdu_recv = 0;
- nvmet_tcp_map_pdu_iovec(cmd);
+ nvmet_tcp_build_pdu_iovec(cmd);
queue->cmd = cmd;
queue->rcv_state = NVMET_TCP_RECV_DATA;
@@ -1021,7 +1006,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
if (nvmet_tcp_need_data_in(queue->cmd)) {
if (nvmet_tcp_has_inline_data(queue->cmd)) {
queue->rcv_state = NVMET_TCP_RECV_DATA;
- nvmet_tcp_map_pdu_iovec(queue->cmd);
+ nvmet_tcp_build_pdu_iovec(queue->cmd);
return 0;
}
/* send back R2T */
@@ -1141,7 +1126,6 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
cmd->rbytes_done += ret;
}
- nvmet_tcp_unmap_pdu_iovec(cmd);
if (queue->data_digest) {
nvmet_tcp_prep_recv_ddgst(cmd);
return 0;
@@ -1411,7 +1395,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
{
nvmet_req_uninit(&cmd->req);
- nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
}
@@ -1424,7 +1407,6 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
if (nvmet_tcp_need_data_in(cmd))
nvmet_req_uninit(&cmd->req);
- nvmet_tcp_unmap_pdu_iovec(cmd);
nvmet_tcp_free_cmd_buffers(cmd);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] nvmet-tcp: Don't map pages which can't come from HIGHMEM
2022-08-29 7:54 [PATCH] nvmet-tcp: Don't map pages which can't come from HIGHMEM Sagi Grimberg
@ 2022-08-30 21:40 ` Fabio M. De Francesco
0 siblings, 0 replies; 2+ messages in thread
From: Fabio M. De Francesco @ 2022-08-30 21:40 UTC (permalink / raw)
To: linux-nvme, Sagi Grimberg
Cc: linux-kernel, Christoph Hellwig, Keith Busch, Chaitanya Kulkarni,
James Smart, Ira Weiny, Venkataramanan Anirudh, Al Viro
On lunedì 29 agosto 2022 09:54:01 CEST Sagi Grimberg wrote:
> From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
>
> kmap() is being deprecated in favor of kmap_local_page().[1]
>
> There are two main problems with kmap(): (1) It comes with an overhead as
> mapping space is restricted and protected by a global lock for
> synchronization and (2) it also requires global TLB invalidation when the
> kmap’s pool wraps and it might block when the mapping space is fully
> utilized until a slot becomes available.
>
> The pages which will be mapped are allocated in nvmet_tcp_map_data(),
> using the GFP_KERNEL flag. This assures that they cannot come from
> HIGHMEM. This imply that a straight page_address() can replace the kmap()
> of sg_page(sg) in nvmet_tcp_map_pdu_iovec(). As a side effect, we might
> also delete the field "nr_mapped" from struct "nvmet_tcp_cmd" because,
> after removing the kmap() calls, there would be no longer any need of it.
>
> In addition, there is no reason to use a kvec for the command receive
> data buffers iovec, use a bio_vec instead and let iov_iter handle the
> buffer mapping and data copy.
>
> Test with blktests on a QEMU/KVM x86_32 VM, 6GB RAM, booting a kernel with
> HIGHMEM64GB enabled.
>
> [1] "[PATCH] checkpatch: Add kmap and kmap_atomic to the deprecated
> list" https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com/
>
> Cc: Chaitanya Kulkarni <chaitanyak@nvidia.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Keith Busch <kbusch@kernel.org>
> Suggested-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
> [sagi: added bio_vec plus minor naming changes]
> Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
> ---
> drivers/nvme/target/tcp.c | 44 ++++++++++++---------------------------
> 1 file changed, 13 insertions(+), 31 deletions(-)
Hi Sagi,
Thanks for changing the code according to the suggestions from Christoph and
Al.
Differently from what I just wrote while thanking Al, and after Ira made me
notice that you are one of the maintainers, I decided to leave everything like
you did.
The only exceptions we'll be about sending a v2 with the "Suggested-by" tags
from Christoph and Al. In the meantime checkpatch warned that the alignment of
"nr_pages" doesn't match the open parenthesis, so I'm changing it too.
Again thanks,
Fabio
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-08-30 21:40 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-29 7:54 [PATCH] nvmet-tcp: Don't map pages which can't come from HIGHMEM Sagi Grimberg
2022-08-30 21:40 ` Fabio M. De Francesco
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).