All of lore.kernel.org
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 29/50] lustre: llite: Move free user pages
Date: Sun, 20 Mar 2022 09:30:43 -0400	[thread overview]
Message-ID: <1647783064-20688-30-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1647783064-20688-1-git-send-email-jsimmons@infradead.org>

From: Patrick Farrell <pfarrell@whamcloud.com>

It is incorrect to release our reference on the user pages
before we're done with them - We need to keep it until the
i/o is complete, otherwise we access them after releasing
our reference.  This has not caused any known bugs so far,
but it's still wrong.

So only drop these references when we free the aio struct,
which is only freed once i/o is complete.

Also rename free_user_pages to release_user_pages, because
it does not free them - it just releases our reference.

This also helps performance by moving free_user_pages to
the daemon threads.  This is a 5-10% boost.

This patch reduces i/o time in ms/GiB by:
Write: 18 ms/GiB
ead: 19 ms/GiB

Totals:
Write: 180 ms/GiB
Read: 178 ms/GiB

mpirun -np 1  $IOR -w -r -t 64M -b 64G -o ./iorfile --posix.odirect

With previous patches in series:
write           5183 MiB/s
read            5201 MiB/s

Plus this patch:
write           5702 MiB/s
read            5756 MiB/s

WC-bug-id: https://jira.whamcloud.com/browse/LU-13799
Lustre-commit: 7f9b8465bc1125e51 ("LU-13799 llite: Move free user pages")
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/39443
Reviewed-by: Yingjin Qian <qian@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 fs/lustre/include/cl_object.h | 18 +++++++++++++++
 fs/lustre/llite/rw26.c        | 52 +++++++++----------------------------------
 fs/lustre/obdclass/cl_io.c    | 21 ++++++++++++++++-
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/fs/lustre/include/cl_object.h b/fs/lustre/include/cl_object.h
index 9815b19..af708cc 100644
--- a/fs/lustre/include/cl_object.h
+++ b/fs/lustre/include/cl_object.h
@@ -2620,6 +2620,21 @@ struct cl_sync_io {
 	struct cl_dio_aio	*csi_aio;
 };
 
+
+/* direct IO pages */
+struct ll_dio_pages {
+	struct cl_dio_aio	*ldp_aio;
+	/*
+	 * page array to be written. we don't support
+	 * partial pages except the last one.
+	 */
+	struct page		**ldp_pages;
+	/* # of pages in the array. */
+	size_t			ldp_count;
+	/* the file offset of the first page. */
+	loff_t			ldp_file_offset;
+};
+
 /* To support Direct AIO */
 struct cl_dio_aio {
 	struct cl_sync_io	cda_sync;
@@ -2628,10 +2643,13 @@ struct cl_dio_aio {
 	struct kiocb		*cda_iocb;
 	ssize_t			cda_bytes;
 	struct cl_dio_aio	*cda_ll_aio;
+	struct ll_dio_pages	cda_dio_pages;
 	unsigned int		cda_no_aio_complete:1,
 				cda_no_aio_free:1;
 };
 
+void ll_release_user_pages(struct page **pages, int npages);
+
 /** @} cl_sync_io */
 
 /** \defgroup cl_env cl_env
diff --git a/fs/lustre/llite/rw26.c b/fs/lustre/llite/rw26.c
index 16cccfa..a5cdb01 100644
--- a/fs/lustre/llite/rw26.c
+++ b/fs/lustre/llite/rw26.c
@@ -150,22 +150,6 @@ static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask)
 	return result;
 }
 
-/*
- * ll_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- */
-static void ll_free_user_pages(struct page **pages, int npages)
-{
-	int i;
-
-	for (i = 0; i < npages; i++) {
-		if (!pages[i])
-			break;
-		put_page(pages[i]);
-	}
-	kvfree(pages);
-}
-
 static ssize_t ll_get_user_pages(int rw, struct iov_iter *iter,
 				struct page ***pages, ssize_t *npages,
 				size_t maxsize)
@@ -209,28 +193,15 @@ static unsigned long ll_iov_iter_alignment(struct iov_iter *i)
 	return res;
 }
 
-/* direct IO pages */
-struct ll_dio_pages {
-	struct cl_dio_aio	*ldp_aio;
-	/*
-	 * page array to be written. we don't support
-	 * partial pages except the last one.
-	 */
-	struct page		**ldp_pages;
-	/* # of pages in the array. */
-	size_t			ldp_count;
-	/* the file offset of the first page. */
-	loff_t			ldp_file_offset;
-};
-
 static int
 ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size,
-		   int rw, struct inode *inode, struct ll_dio_pages *pv)
+		   int rw, struct inode *inode, struct cl_dio_aio *aio)
 {
+	struct ll_dio_pages *pv = &aio->cda_dio_pages;
 	struct cl_page *page;
 	struct cl_2queue *queue = &io->ci_queue;
 	struct cl_object *obj = io->ci_obj;
-	struct cl_sync_io *anchor = &pv->ldp_aio->cda_sync;
+	struct cl_sync_io *anchor = &aio->cda_sync;
 	loff_t offset = pv->ldp_file_offset;
 	int io_pages = 0;
 	size_t page_size = cl_page_size(obj);
@@ -290,8 +261,7 @@ struct ll_dio_pages {
 		smp_mb();
 		rc = cl_io_submit_rw(env, io, iot, queue);
 		if (rc == 0) {
-			cl_page_list_splice(&queue->c2_qout,
-					&pv->ldp_aio->cda_pages);
+			cl_page_list_splice(&queue->c2_qout, &aio->cda_pages);
 		} else {
 			atomic_add(-queue->c2_qin.pl_nr,
 				   &anchor->csi_sync_nr);
@@ -371,7 +341,7 @@ static ssize_t ll_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	LASSERT(ll_aio->cda_iocb == iocb);
 
 	while (iov_iter_count(iter)) {
-		struct ll_dio_pages pvec = {};
+		struct ll_dio_pages *pvec;
 		struct page **pages;
 
 		count = min_t(size_t, iov_iter_count(iter), MAX_DIO_SIZE);
@@ -392,26 +362,26 @@ static ssize_t ll_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 			result = -ENOMEM;
 			goto out;
 		}
-		pvec.ldp_aio = ldp_aio;
+
+		pvec = &ldp_aio->cda_dio_pages;
 
 		result = ll_get_user_pages(rw, iter, &pages,
-					   &pvec.ldp_count, count);
+					   &pvec->ldp_count, count);
 		if (unlikely(result <= 0)) {
 			cl_sync_io_note(env, &ldp_aio->cda_sync, result);
 			goto out;
 		}
 
 		count = result;
-		pvec.ldp_file_offset = file_offset;
-		pvec.ldp_pages = pages;
+		pvec->ldp_file_offset = file_offset;
+		pvec->ldp_pages = pages;
 
 		result = ll_direct_rw_pages(env, io, count,
-					    rw, inode, &pvec);
+					    rw, inode, ldp_aio);
 		/* We've submitted pages and can now remove the extra
 		 * reference for that
 		 */
 		cl_sync_io_note(env, &ldp_aio->cda_sync, result);
-		ll_free_user_pages(pages, pvec.ldp_count);
 
 		if (unlikely(result < 0))
 			goto out;
diff --git a/fs/lustre/obdclass/cl_io.c b/fs/lustre/obdclass/cl_io.c
index 038ab4c..e4fc795 100644
--- a/fs/lustre/obdclass/cl_io.c
+++ b/fs/lustre/obdclass/cl_io.c
@@ -1139,8 +1139,11 @@ static void cl_aio_end(const struct lu_env *env, struct cl_sync_io *anchor)
 		aio->cda_iocb->ki_complete(aio->cda_iocb,
 					   ret ?: aio->cda_bytes, 0);
 
-	if (aio->cda_ll_aio)
+	if (aio->cda_ll_aio) {
+		ll_release_user_pages(aio->cda_dio_pages.ldp_pages,
+				      aio->cda_dio_pages.ldp_count);
 		cl_sync_io_note(env, &aio->cda_ll_aio->cda_sync, ret);
+	}
 }
 
 struct cl_dio_aio *cl_aio_alloc(struct kiocb *iocb, struct cl_object *obj,
@@ -1195,6 +1198,22 @@ void cl_aio_free(const struct lu_env *env, struct cl_dio_aio *aio)
 }
 EXPORT_SYMBOL(cl_aio_free);
 
+/*
+ * ll_release_user_pages - tear down page struct array
+ * @pages: array of page struct pointers underlying target buffer
+ */
+void ll_release_user_pages(struct page **pages, int npages)
+{
+	int i;
+
+	for (i = 0; i < npages; i++) {
+		if (!pages[i])
+			break;
+		put_page(pages[i]);
+	}
+	kvfree(pages);
+}
+
 /**
  * Indicate that transfer of a single page completed.
  */
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2022-03-20 13:33 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-20 13:30 [lustre-devel] [PATCH 00/50] lustre: update to OpenSFS tree as of March 20, 2022 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 01/50] lustre: type cleanups and remove debug statements James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 02/50] lustre: osc: Fix grant test for ARM James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 03/50] lnet: extend nids in struct lnet_msg James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 04/50] lnet: Change lnet_send() to take large-addr nids James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 05/50] lnet: use large nids in struct lnet_event James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 06/50] lnet: socklnd: prepare for new KSOCK_MSG type James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 07/50] lnet: socklnd: don't deref lnet_hdr in LNDs James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 08/50] lustre: sec: make client encryption compatible with ext4 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 09/50] lustre: sec: allow subdir mount of encrypted dir James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 10/50] lustre: fld: repeat rpc in fld_client_rpc after EAGAIN James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 11/50] lustre: fld: don't obtain a slot for fld request James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 12/50] lustre: update version to 2.14.57 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 13/50] lustre: llite: deadlock in ll_new_node() James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 14/50] lnet: o2iblnd: avoid static allocation for msg tx James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 15/50] lnet: separate lnet_hdr in msg from that in lnd James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 16/50] lnet: change lnet_hdr to store large nids James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 17/50] lnet: change lnet_prep_send to take net_processid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 18/50] lnet: convert to struct lnet_process_id in lib-move James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 19/50] lnet: convert LNetGetID to return an large-addr pid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 20/50] lnet: alter lnd_notify_peer_down() to take lnet_nid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 21/50] lnet: socklnd: move lnet_hdr unpack into ->pro_unpack James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 22/50] lnet: socklnd: Change ksock_hello_msg to struct lnet_nid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 23/50] lnet: socklnd: add hello message version 4 James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 24/50] lnet: Convert ping to support 16-bytes address James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 25/50] lnet: convert nids in lnet_parse to lnet_nid James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 26/50] lnet: change src_nid arg to lnet_parse() to 16byte James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 27/50] lnet: Fix NULL-deref in lnet_nidstr_r() James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 28/50] lnet: change lnet_del_route() to take lnet_nid James Simmons
2022-03-20 13:30 ` James Simmons [this message]
2022-03-20 13:30 ` [lustre-devel] [PATCH 30/50] lustre: llite: Do not get/put DIO pages James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 31/50] lustre: llite: Remove unnecessary page get/put James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 32/50] lustre: llite: LL_IOC_LMV_GETSTRIPE 'default' shows inherit layout James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 33/50] lustre: hsm: update size upon completion of data version James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 34/50] lustre: llite: Delay dput in ll_dirty_page_discard_warn James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 35/50] lnet: libcfs: Use FAIL_CHECK_QUIET for fake i/o James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 36/50] lnet: Avoid peer NI recovery for local interface James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 37/50] lustre: osc: add OBD_IOC_GETATTR support for osc James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 38/50] lustre: sec: present .fscrypt in subdir mount James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 39/50] lnet: improve hash distribution across CPTs James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 40/50] lustre: osc: osc_extent_wait() deadlock James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 41/50] lustre: quota: delete unused quota ID James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 42/50] lnet: Check LNET_NID_IS_ANY in LNET_NID_NET James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 43/50] lustre: llite: clear async errors on write commit sync James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 44/50] lnet: lnet_peer_data_present() memory leak James Simmons
2022-03-20 13:30 ` [lustre-devel] [PATCH 45/50] lnet: Don't use pref NI for reserved portal James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 46/50] lnet: o2iblnd: avoid memory copy for short msg James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 47/50] lustre: llite: set default LMV hash type with 2.12 MDS James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 48/50] lnet: Stop discovery on deleted peer NI James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 49/50] lustre: sec: fix DIO for encrypted files James Simmons
2022-03-20 13:31 ` [lustre-devel] [PATCH 50/50] lustre: ptlrpc: Use after free of 'conn' in rhashtable retry James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1647783064-20688-30-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.