ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: xiubli@redhat.com
To: idryomov@gmail.com, ceph-devel@vger.kernel.org
Cc: jlayton@kernel.org, lhenriques@suse.de, vshankar@redhat.com,
	mchangir@redhat.com, Al Viro <viro@zeniv.linux.org.uk>,
	David Howells <dhowells@redhat.com>, Xiubo Li <xiubli@redhat.com>
Subject: [PATCH v16 10/68] libceph: add new iov_iter-based ceph_msg_data_type and ceph_osd_data_type
Date: Mon, 27 Feb 2023 11:27:15 +0800	[thread overview]
Message-ID: <20230227032813.337906-11-xiubli@redhat.com> (raw)
In-Reply-To: <20230227032813.337906-1-xiubli@redhat.com>

From: Jeff Layton <jlayton@kernel.org>

Add an iov_iter to the unions in ceph_msg_data and ceph_msg_data_cursor.
Instead of requiring a list of pages or bvecs, we can just use an
iov_iter directly, and avoid extra allocations.

We assume that the pages represented by the iter are pinned such that
they shouldn't incur page faults, which is the case for the iov_iters
created by netfs.

While working on this, Al Viro informed me that he was going to change
iov_iter_get_pages to auto-advance the iterator as that pattern is more
or less required for ITER_PIPE anyway. We emulate that here for now by
advancing in the _next op and tracking that amount in the "lastlen"
field.

In the event that _next is called twice without an intervening
_advance, we revert the iov_iter by the remaining lastlen before
calling iov_iter_get_pages.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 include/linux/ceph/messenger.h  |  8 ++++
 include/linux/ceph/osd_client.h |  4 ++
 net/ceph/messenger.c            | 78 +++++++++++++++++++++++++++++++++
 net/ceph/osd_client.c           | 27 ++++++++++++
 4 files changed, 117 insertions(+)

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 9fd7255172ad..2eaaabbe98cb 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -123,6 +123,7 @@ enum ceph_msg_data_type {
 	CEPH_MSG_DATA_BIO,	/* data source/destination is a bio list */
 #endif /* CONFIG_BLOCK */
 	CEPH_MSG_DATA_BVECS,	/* data source/destination is a bio_vec array */
+	CEPH_MSG_DATA_ITER,	/* data source/destination is an iov_iter */
 };
 
 #ifdef CONFIG_BLOCK
@@ -224,6 +225,7 @@ struct ceph_msg_data {
 			bool		own_pages;
 		};
 		struct ceph_pagelist	*pagelist;
+		struct iov_iter		iter;
 	};
 };
 
@@ -248,6 +250,10 @@ struct ceph_msg_data_cursor {
 			struct page	*page;		/* page from list */
 			size_t		offset;		/* bytes from list */
 		};
+		struct {
+			struct iov_iter		iov_iter;
+			unsigned int		lastlen;
+		};
 	};
 };
 
@@ -605,6 +611,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 #endif /* CONFIG_BLOCK */
 void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 			     struct ceph_bvec_iter *bvec_pos);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+			    struct iov_iter *iter);
 
 struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
 			       gfp_t flags, bool can_fail);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 460881c93f9a..680b4a70f6d4 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -108,6 +108,7 @@ enum ceph_osd_data_type {
 	CEPH_OSD_DATA_TYPE_BIO,
 #endif /* CONFIG_BLOCK */
 	CEPH_OSD_DATA_TYPE_BVECS,
+	CEPH_OSD_DATA_TYPE_ITER,
 };
 
 struct ceph_osd_data {
@@ -131,6 +132,7 @@ struct ceph_osd_data {
 			struct ceph_bvec_iter	bvec_pos;
 			u32			num_bvecs;
 		};
+		struct iov_iter		iter;
 	};
 };
 
@@ -501,6 +503,8 @@ void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
 					 unsigned int which,
 					 struct ceph_bvec_iter *bvec_pos);
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+				unsigned int which, struct iov_iter *iter);
 
 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
 					unsigned int which,
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 7403a3a133cd..45f61c471923 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -965,6 +965,63 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
 	return true;
 }
 
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+
+	cursor->iov_iter = data->iter;
+	cursor->lastlen = 0;
+	iov_iter_truncate(&cursor->iov_iter, length);
+	cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+						size_t *page_offset,
+						size_t *length)
+{
+	struct page *page;
+	ssize_t len;
+
+	if (cursor->lastlen)
+		iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+	len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+				  1, page_offset);
+	BUG_ON(len < 0);
+
+	cursor->lastlen = len;
+
+	/*
+	 * FIXME: The assumption is that the pages represented by the iov_iter
+	 *	  are pinned, with the references held by the upper-level
+	 *	  callers, or by virtue of being under writeback. Eventually,
+	 *	  we'll get an iov_iter_get_pages2 variant that doesn't take page
+	 *	  refs. Until then, just put the page ref.
+	 */
+	VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+	put_page(page);
+
+	*length = min_t(size_t, len, cursor->resid);
+	return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+					size_t bytes)
+{
+	BUG_ON(bytes > cursor->resid);
+	cursor->resid -= bytes;
+
+	if (bytes < cursor->lastlen) {
+		cursor->lastlen -= bytes;
+	} else {
+		iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+		cursor->lastlen = 0;
+	}
+
+	return cursor->resid;
+}
+
 /*
  * Message data is handled (sent or received) in pieces, where each
  * piece resides on a single page.  The network layer might not
@@ -992,6 +1049,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
 	case CEPH_MSG_DATA_BVECS:
 		ceph_msg_data_bvecs_cursor_init(cursor, length);
 		break;
+	case CEPH_MSG_DATA_ITER:
+		ceph_msg_data_iter_cursor_init(cursor, length);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		/* BUG(); */
@@ -1039,6 +1099,9 @@ struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
 	case CEPH_MSG_DATA_BVECS:
 		page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
 		break;
+	case CEPH_MSG_DATA_ITER:
+		page = ceph_msg_data_iter_next(cursor, page_offset, length);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		page = NULL;
@@ -1077,6 +1140,9 @@ void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes)
 	case CEPH_MSG_DATA_BVECS:
 		new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
 		break;
+	case CEPH_MSG_DATA_ITER:
+		new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		BUG();
@@ -1875,6 +1941,18 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
 
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+			    struct iov_iter *iter)
+{
+	struct ceph_msg_data *data;
+
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_ITER;
+	data->iter = *iter;
+
+	msg->data_length += iov_iter_count(&data->iter);
+}
+
 /*
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 8534ca9c39b9..9138abc07f45 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,6 +171,13 @@ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
 	osd_data->num_bvecs = num_bvecs;
 }
 
+static void ceph_osd_iter_init(struct ceph_osd_data *osd_data,
+			       struct iov_iter *iter)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_ITER;
+	osd_data->iter = *iter;
+}
+
 static struct ceph_osd_data *
 osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
 {
@@ -264,6 +271,22 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
 }
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
 
+/**
+ * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer
+ * @osd_req: The request to set up
+ * @which: Index of the operation in which to set the iter
+ * @iter: The buffer iterator
+ */
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+				unsigned int which, struct iov_iter *iter)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_iter_init(osd_data, iter);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_iter);
+
 static void osd_req_op_cls_request_info_pagelist(
 			struct ceph_osd_request *osd_req,
 			unsigned int which, struct ceph_pagelist *pagelist)
@@ -346,6 +369,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
 #endif /* CONFIG_BLOCK */
 	case CEPH_OSD_DATA_TYPE_BVECS:
 		return osd_data->bvec_pos.iter.bi_size;
+	case CEPH_OSD_DATA_TYPE_ITER:
+		return iov_iter_count(&osd_data->iter);
 	default:
 		WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
 		return 0;
@@ -954,6 +979,8 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
 #endif
 	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
 		ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) {
+		ceph_msg_data_add_iter(msg, &osd_data->iter);
 	} else {
 		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
 	}
-- 
2.31.1


  parent reply	other threads:[~2023-02-27  3:29 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-27  3:27 [PATCH v16 00/68] ceph+fscrypt: full support xiubli
2023-02-27  3:27 ` [PATCH v16 01/68] libceph: add spinlock around osd->o_requests xiubli
2023-02-27  3:27 ` [PATCH v16 02/68] libceph: define struct ceph_sparse_extent and add some helpers xiubli
2023-02-27  3:27 ` [PATCH v16 03/68] libceph: add sparse read support to msgr2 crc state machine xiubli
2023-02-27  3:27 ` [PATCH v16 04/68] libceph: add sparse read support to OSD client xiubli
2023-02-27  3:27 ` [PATCH v16 05/68] libceph: support sparse reads on msgr2 secure codepath xiubli
2023-02-27  3:27 ` [PATCH v16 06/68] libceph: add sparse read support to msgr1 xiubli
2023-02-27  3:27 ` [PATCH v16 07/68] ceph: add new mount option to enable sparse reads xiubli
2023-02-27  3:27 ` [PATCH v16 08/68] ceph: preallocate inode for ops that may create one xiubli
2023-02-27  3:27 ` [PATCH v16 09/68] ceph: make ceph_msdc_build_path use ref-walk xiubli
2023-02-27  3:27 ` xiubli [this message]
2023-02-27  3:27 ` [PATCH v16 11/68] ceph: use osd_req_op_extent_osd_iter for netfs reads xiubli
2023-02-27  3:27 ` [PATCH v16 12/68] ceph: fscrypt_auth handling for ceph xiubli
2023-02-27  3:27 ` [PATCH v16 13/68] ceph: ensure that we accept a new context from MDS for new inodes xiubli
2023-02-27  3:27 ` [PATCH v16 14/68] ceph: add support for fscrypt_auth/fscrypt_file to cap messages xiubli
2023-02-27  3:27 ` [PATCH v16 15/68] ceph: implement -o test_dummy_encryption mount option xiubli
2023-02-27  3:27 ` [PATCH v16 16/68] ceph: decode alternate_name in lease info xiubli
2023-02-27  3:27 ` [PATCH v16 17/68] ceph: add fscrypt ioctls xiubli
2023-02-27  3:27 ` [PATCH v16 18/68] ceph: make the ioctl cmd more readable in debug log xiubli
2023-02-27  3:27 ` [PATCH v16 19/68] ceph: add base64 endcoding routines for encrypted names xiubli
2023-02-27  3:27 ` [PATCH v16 20/68] ceph: add encrypted fname handling to ceph_mdsc_build_path xiubli
2023-02-27  3:27 ` [PATCH v16 21/68] ceph: send altname in MClientRequest xiubli
2023-02-27  3:27 ` [PATCH v16 22/68] ceph: encode encrypted name in dentry release xiubli
2023-02-27  3:27 ` [PATCH v16 23/68] ceph: properly set DCACHE_NOKEY_NAME flag in lookup xiubli
2023-02-27  3:27 ` [PATCH v16 24/68] ceph: set DCACHE_NOKEY_NAME in atomic open xiubli
2023-02-27  3:27 ` [PATCH v16 25/68] ceph: make d_revalidate call fscrypt revalidator for encrypted dentries xiubli
2023-03-07 18:53   ` Luís Henriques
2023-03-08  1:50     ` Xiubo Li
2023-03-08  9:29       ` Luís Henriques
2023-03-08 10:42         ` Xiubo Li
2023-03-08 17:14           ` Luís Henriques
2023-03-08 17:54             ` Jeff Layton
2023-03-08 18:30               ` Luís Henriques
2023-03-08 19:32                 ` Jeff Layton
2023-03-09  9:52                   ` Luís Henriques
2023-03-09  7:06             ` Xiubo Li
2023-03-09  9:55               ` Luís Henriques
2023-03-09 11:41                 ` Xiubo Li
2023-02-27  3:27 ` [PATCH v16 26/68] ceph: add helpers for converting names for userland presentation xiubli
2023-02-27  3:27 ` [PATCH v16 27/68] ceph: fix base64 encoded name's length check in ceph_fname_to_usr() xiubli
2023-02-27  3:27 ` [PATCH v16 28/68] ceph: add fscrypt support to ceph_fill_trace xiubli
2023-02-27  3:27 ` [PATCH v16 29/68] ceph: pass the request to parse_reply_info_readdir() xiubli
2023-02-27  3:27 ` [PATCH v16 30/68] ceph: add ceph_encode_encrypted_dname() helper xiubli
2023-02-27  3:27 ` [PATCH v16 31/68] ceph: add support to readdir for encrypted filenames xiubli
2023-02-27  3:27 ` [PATCH v16 32/68] ceph: create symlinks with encrypted and base64-encoded targets xiubli
2023-02-27  3:27 ` [PATCH v16 33/68] ceph: make ceph_get_name decrypt filenames xiubli
2023-02-27  3:27 ` [PATCH v16 34/68] ceph: add a new ceph.fscrypt.auth vxattr xiubli
2023-02-27  3:27 ` [PATCH v16 35/68] ceph: add some fscrypt guardrails xiubli
2023-02-27  3:27 ` [PATCH v16 36/68] ceph: allow encrypting a directory while not having Ax caps xiubli
2023-02-27  3:27 ` [PATCH v16 37/68] ceph: mark directory as non-complete after loading key xiubli
2023-02-27  3:27 ` [PATCH v16 38/68] ceph: don't allow changing layout on encrypted files/directories xiubli
2023-02-27  3:27 ` [PATCH v16 39/68] libceph: add CEPH_OSD_OP_ASSERT_VER support xiubli
2023-02-27  3:27 ` [PATCH v16 40/68] ceph: size handling for encrypted inodes in cap updates xiubli
2023-02-27  3:27 ` [PATCH v16 41/68] ceph: fscrypt_file field handling in MClientRequest messages xiubli
2023-02-27  3:27 ` [PATCH v16 42/68] ceph: get file size from fscrypt_file when present in inode traces xiubli
2023-02-27  3:27 ` [PATCH v16 43/68] ceph: handle fscrypt fields in cap messages from MDS xiubli
2023-02-27  3:27 ` [PATCH v16 44/68] ceph: update WARN_ON message to pr_warn xiubli
2023-02-27  3:27 ` [PATCH v16 45/68] ceph: add __ceph_get_caps helper support xiubli
2023-02-27  3:27 ` [PATCH v16 46/68] ceph: add __ceph_sync_read " xiubli
2023-02-27  3:27 ` [PATCH v16 47/68] ceph: add object version support for sync read xiubli
2023-02-27  3:27 ` [PATCH v16 48/68] ceph: add infrastructure for file encryption and decryption xiubli
2023-02-27  3:27 ` [PATCH v16 49/68] ceph: add truncate size handling support for fscrypt xiubli
2023-02-27  3:27 ` [PATCH v16 50/68] libceph: allow ceph_osdc_new_request to accept a multi-op read xiubli
2023-02-27  3:27 ` [PATCH v16 51/68] ceph: disable fallocate for encrypted inodes xiubli
2023-02-27  3:27 ` [PATCH v16 52/68] ceph: disable copy offload on " xiubli
2023-02-27  3:27 ` [PATCH v16 53/68] ceph: don't use special DIO path for " xiubli
2023-02-27  3:27 ` [PATCH v16 54/68] ceph: align data in pages in ceph_sync_write xiubli
2023-02-27  3:28 ` [PATCH v16 55/68] ceph: add read/modify/write to ceph_sync_write xiubli
2023-02-27  3:28 ` [PATCH v16 56/68] ceph: plumb in decryption during sync reads xiubli
2023-02-27  3:28 ` [PATCH v16 57/68] ceph: add fscrypt decryption support to ceph_netfs_issue_op xiubli
2023-02-27  3:28 ` [PATCH v16 58/68] ceph: set i_blkbits to crypto block size for encrypted inodes xiubli
2023-02-27  3:28 ` [PATCH v16 59/68] ceph: add encryption support to writepage xiubli
2023-02-27  3:28 ` [PATCH v16 60/68] ceph: fscrypt support for writepages xiubli
2023-02-27  3:28 ` [PATCH v16 61/68] ceph: invalidate pages when doing direct/sync writes xiubli
2023-02-27  3:28 ` [PATCH v16 62/68] ceph: add support for encrypted snapshot names xiubli
2023-02-27  3:28 ` [PATCH v16 63/68] ceph: add support for handling " xiubli
2023-02-27  3:28 ` [PATCH v16 64/68] ceph: update documentation regarding snapshot naming limitations xiubli
2023-02-27  3:28 ` [PATCH v16 65/68] ceph: prevent snapshots to be created in encrypted locked directories xiubli
2023-02-27  3:28 ` [PATCH v16 66/68] ceph: report STATX_ATTR_ENCRYPTED on encrypted inodes xiubli
2023-02-27  3:28 ` [PATCH v16 67/68] libceph: defer removing the req from osdc just after req->r_callback xiubli
2023-02-27  3:28 ` [PATCH v16 68/68] ceph: drop the messages from MDS when unmounting xiubli
2023-02-27  9:27 ` [PATCH v16 00/68] ceph+fscrypt: full support Luís Henriques
2023-02-27  9:58   ` Xiubo Li
2023-02-27 10:30     ` Luís Henriques

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230227032813.337906-11-xiubli@redhat.com \
    --to=xiubli@redhat.com \
    --cc=ceph-devel@vger.kernel.org \
    --cc=dhowells@redhat.com \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=lhenriques@suse.de \
    --cc=mchangir@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vshankar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).