All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Howells <dhowells@redhat.com>
To: netdev@vger.kernel.org
Cc: David Howells <dhowells@redhat.com>,
	Alexander Duyck <alexander.duyck@gmail.com>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	David Ahern <dsahern@kernel.org>,
	Matthew Wilcox <willy@infradead.org>,
	Jens Axboe <axboe@kernel.dk>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Ilya Dryomov <idryomov@gmail.com>, Xiubo Li <xiubli@redhat.com>,
	Jeff Layton <jlayton@kernel.org>,
	ceph-devel@vger.kernel.org
Subject: [PATCH net-next v4 04/15] ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage()
Date: Fri, 23 Jun 2023 12:44:14 +0100	[thread overview]
Message-ID: <20230623114425.2150536-5-dhowells@redhat.com> (raw)
In-Reply-To: <20230623114425.2150536-1-dhowells@redhat.com>

Use sendmsg() and MSG_SPLICE_PAGES rather than sendpage in ceph when
transmitting data.  For the moment, this can only transmit one page at a
time because of the architecture of net/ceph/, but if
write_partial_message_data() can be given a bvec[] at a time by the
iteration code, this would allow pages to be sent in a batch.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
cc: netdev@vger.kernel.org
---
 net/ceph/messenger_v2.c | 91 +++++++++--------------------------------
 1 file changed, 19 insertions(+), 72 deletions(-)

diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 301a991dc6a6..87ac97073e75 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -117,91 +117,38 @@ static int ceph_tcp_recv(struct ceph_connection *con)
 	return ret;
 }
 
-static int do_sendmsg(struct socket *sock, struct iov_iter *it)
-{
-	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
-	int ret;
-
-	msg.msg_iter = *it;
-	while (iov_iter_count(it)) {
-		ret = sock_sendmsg(sock, &msg);
-		if (ret <= 0) {
-			if (ret == -EAGAIN)
-				ret = 0;
-			return ret;
-		}
-
-		iov_iter_advance(it, ret);
-	}
-
-	WARN_ON(msg_data_left(&msg));
-	return 1;
-}
-
-static int do_try_sendpage(struct socket *sock, struct iov_iter *it)
-{
-	struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS };
-	struct bio_vec bv;
-	int ret;
-
-	if (WARN_ON(!iov_iter_is_bvec(it)))
-		return -EINVAL;
-
-	while (iov_iter_count(it)) {
-		/* iov_iter_iovec() for ITER_BVEC */
-		bvec_set_page(&bv, it->bvec->bv_page,
-			      min(iov_iter_count(it),
-				  it->bvec->bv_len - it->iov_offset),
-			      it->bvec->bv_offset + it->iov_offset);
-
-		/*
-		 * sendpage cannot properly handle pages with
-		 * page_count == 0, we need to fall back to sendmsg if
-		 * that's the case.
-		 *
-		 * Same goes for slab pages: skb_can_coalesce() allows
-		 * coalescing neighboring slab objects into a single frag
-		 * which triggers one of hardened usercopy checks.
-		 */
-		if (sendpage_ok(bv.bv_page)) {
-			ret = sock->ops->sendpage(sock, bv.bv_page,
-						  bv.bv_offset, bv.bv_len,
-						  CEPH_MSG_FLAGS);
-		} else {
-			iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len);
-			ret = sock_sendmsg(sock, &msg);
-		}
-		if (ret <= 0) {
-			if (ret == -EAGAIN)
-				ret = 0;
-			return ret;
-		}
-
-		iov_iter_advance(it, ret);
-	}
-
-	return 1;
-}
-
 /*
  * Write as much as possible.  The socket is expected to be corked,
- * so we don't bother with MSG_MORE/MSG_SENDPAGE_NOTLAST here.
+ * so we don't bother with MSG_MORE here.
  *
  * Return:
- *   1 - done, nothing (else) to write
+ *  >0 - done, nothing (else) to write
  *   0 - socket is full, need to wait
  *  <0 - error
  */
 static int ceph_tcp_send(struct ceph_connection *con)
 {
+	struct msghdr msg = {
+		.msg_iter	= con->v2.out_iter,
+		.msg_flags	= CEPH_MSG_FLAGS,
+	};
 	int ret;
 
+	if (WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter)))
+		return -EINVAL;
+
+	if (con->v2.out_iter_sendpage)
+		msg.msg_flags |= MSG_SPLICE_PAGES;
+
 	dout("%s con %p have %zu try_sendpage %d\n", __func__, con,
 	     iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage);
-	if (con->v2.out_iter_sendpage)
-		ret = do_try_sendpage(con->sock, &con->v2.out_iter);
-	else
-		ret = do_sendmsg(con->sock, &con->v2.out_iter);
+
+	ret = sock_sendmsg(con->sock, &msg);
+	if (ret > 0)
+		iov_iter_advance(&con->v2.out_iter, ret);
+	else if (ret == -EAGAIN)
+		ret = 0;
+
 	dout("%s con %p ret %d left %zu\n", __func__, con, ret,
 	     iov_iter_count(&con->v2.out_iter));
 	return ret;


  parent reply	other threads:[~2023-06-23 11:45 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-23 11:44 [PATCH net-next v4 00/15] splice, net: Switch over users of sendpage() and remove it David Howells
2023-06-23 11:44 ` [PATCH net-next v4 01/15] tcp_bpf, smc, tls, espintcp, siw: Reduce MSG_SENDPAGE_NOTLAST usage David Howells
2023-06-23 11:44 ` [PATCH net-next v4 02/15] net: Use sendmsg(MSG_SPLICE_PAGES) not sendpage in skb_send_sock() David Howells
2023-06-23 11:44 ` [PATCH net-next v4 03/15] ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage David Howells
2023-06-23 17:00   ` Jakub Kicinski
2023-06-23 19:43   ` David Howells
2023-06-23 11:44 ` David Howells [this message]
2023-06-23 11:44 ` [PATCH net-next v4 05/15] rds: " David Howells
2023-06-23 11:44 ` [PATCH net-next v4 06/15] dlm: " David Howells
2023-06-23 11:44   ` [Cluster-devel] " David Howells
2023-06-23 11:44 ` [PATCH net-next v4 07/15] nvme-tcp: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage David Howells
2023-06-23 11:44 ` [PATCH net-next v4 08/15] nvmet-tcp: " David Howells
2023-06-23 11:44 ` [PATCH net-next v4 09/15] smc: Drop smc_sendpage() in favour of smc_sendmsg() + MSG_SPLICE_PAGES David Howells
2023-06-23 11:44 ` [PATCH net-next v4 10/15] drbd: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage() David Howells
2023-06-23 11:44 ` [PATCH net-next v4 11/15] iscsi: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage David Howells
2023-06-23 16:03   ` Mike Christie
2023-06-23 19:45   ` David Howells
2023-06-23 11:44 ` [PATCH net-next v4 12/15] ocfs2: Fix use of slab data with sendpage David Howells
2023-06-23 11:44   ` [Ocfs2-devel] " David Howells via Ocfs2-devel
2023-06-23 11:44 ` [PATCH net-next v4 13/15] ocfs2: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage() David Howells
2023-06-23 11:44   ` [Ocfs2-devel] " David Howells via Ocfs2-devel
2023-06-23 11:44 ` [PATCH net-next v4 14/15] sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES) David Howells
2023-06-23 11:44   ` David Howells
2023-06-23 11:44   ` David Howells
2023-06-23 11:44   ` David Howells
2023-06-23 11:44 ` [PATCH net-next v4 15/15] net: Kill MSG_SENDPAGE_NOTLAST David Howells
2023-06-23 11:44   ` David Howells
2023-06-23 11:44   ` David Howells
2023-06-23 11:44   ` David Howells

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230623114425.2150536-5-dhowells@redhat.com \
    --to=dhowells@redhat.com \
    --cc=alexander.duyck@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=ceph-devel@vger.kernel.org \
    --cc=davem@davemloft.net \
    --cc=dsahern@kernel.org \
    --cc=edumazet@google.com \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=willemdebruijn.kernel@gmail.com \
    --cc=willy@infradead.org \
    --cc=xiubli@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.