All of lore.kernel.org
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 01/15] lustre: osc: Notify server if cache discard takes a long time
Date: Wed,  7 Jul 2021 15:11:02 -0400	[thread overview]
Message-ID: <1625685076-1964-2-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1625685076-1964-1-git-send-email-jsimmons@infradead.org>

From: Oleg Drokin <green@whamcloud.com>

Discarding a large number of pages from a mapping under a
single lock can take a really long time (750GB is over 170s).
Since there is no stream of RPCs sent to the server as with
read or write to prolong the DLM lock timeout, the server
may evict the client as it does not see progress is being made.

As such send periodic "empty" RPCs to the server to show the
client is still alive and working on the pages under the lock.

For compatibility reasons the RPC is formed as a one-byte
OST_READ request with a special flag set to avoid doing
actual IO, but older servers actually do the one-byte read

WC-bug-id: https://jira.whamcloud.com/browse/LU-14711
Lustre-commit: 564070343ac4ccf4 ("LU-14711 osc: Notify server if cache discard takes a long time")
Signed-off-by: Oleg Drokin <green@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/43857
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Patrick Farrell <farr0186@gmail.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 fs/lustre/include/cl_object.h |  3 +++
 fs/lustre/osc/osc_cache.c     | 11 +++++++++
 fs/lustre/osc/osc_internal.h  |  1 +
 fs/lustre/osc/osc_request.c   | 54 +++++++++++++++++++++++++++++++++----------
 4 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/fs/lustre/include/cl_object.h b/fs/lustre/include/cl_object.h
index c615091..1495949 100644
--- a/fs/lustre/include/cl_object.h
+++ b/fs/lustre/include/cl_object.h
@@ -1919,6 +1919,9 @@ struct cl_io {
 			loff_t			ls_result;
 			int			ls_whence;
 		} ci_lseek;
+		struct cl_misc_io {
+			time64_t		lm_next_rpc_time;
+		} ci_misc;
 	} u;
 	struct cl_2queue	ci_queue;
 	size_t			ci_nob;
diff --git a/fs/lustre/osc/osc_cache.c b/fs/lustre/osc/osc_cache.c
index 8dd12b1..321e9d9 100644
--- a/fs/lustre/osc/osc_cache.c
+++ b/fs/lustre/osc/osc_cache.c
@@ -3186,6 +3186,15 @@ bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
 
 		if (!res)
 			break;
+
+		if (io->ci_type == CIT_MISC &&
+		    io->u.ci_misc.lm_next_rpc_time &&
+		    ktime_get_seconds() > io->u.ci_misc.lm_next_rpc_time) {
+			osc_send_empty_rpc(osc, idx << PAGE_SHIFT);
+			io->u.ci_misc.lm_next_rpc_time = ktime_get_seconds() +
+							 5 * obd_timeout / 16;
+		}
+
 		if (need_resched())
 			cond_resched();
 
@@ -3320,6 +3329,8 @@ int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
 
 	io->ci_obj = cl_object_top(osc2cl(osc));
 	io->ci_ignore_layout = 1;
+	io->u.ci_misc.lm_next_rpc_time = ktime_get_seconds() +
+					 5 * obd_timeout / 16;
 	result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
 	if (result != 0)
 		goto out;
diff --git a/fs/lustre/osc/osc_internal.h b/fs/lustre/osc/osc_internal.h
index 3b65f2d..d174691 100644
--- a/fs/lustre/osc/osc_internal.h
+++ b/fs/lustre/osc/osc_internal.h
@@ -87,6 +87,7 @@ int osc_ladvise_base(struct obd_export *exp, struct obdo *oa,
 int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 		  struct list_head *ext_list, int cmd);
+void osc_send_empty_rpc(struct osc_object *osc, pgoff_t start);
 unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages);
 void osc_lru_unreserve(struct client_obd *cli, unsigned long npages);
 
diff --git a/fs/lustre/osc/osc_request.c b/fs/lustre/osc/osc_request.c
index 0d590ed..2b2ee83 100644
--- a/fs/lustre/osc/osc_request.c
+++ b/fs/lustre/osc/osc_request.c
@@ -1399,21 +1399,23 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,
 	struct brw_page *pg_prev;
 	void *short_io_buf;
 	const char *obd_name = cli->cl_import->imp_obd->obd_name;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	bool directio = false;
 
-	inode = page2inode(pga[0]->pg);
-	if (!inode) {
-		/* Try to get reference to inode from cl_page if we are
-		 * dealing with direct IO, as handled pages are not
-		 * actual page cache pages.
-		 */
-		struct osc_async_page *oap = brw_page2oap(pga[0]);
-		struct cl_page *clpage = oap2cl_page(oap);
+	if (pga[0]->pg) {
+		inode = page2inode(pga[0]->pg);
+		if (!inode) {
+			/* Try to get reference to inode from cl_page if we are
+			 * dealing with direct IO, as handled pages are not
+			 * actual page cache pages.
+			 */
+			struct osc_async_page *oap = brw_page2oap(pga[0]);
+			struct cl_page *clpage = oap2cl_page(oap);
 
-		inode = clpage->cp_inode;
-		if (inode)
-			directio = true;
+			inode = clpage->cp_inode;
+			if (inode)
+				directio = true;
+		}
 	}
 	if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
 		return -ENOMEM; /* Recoverable */
@@ -2666,6 +2668,34 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
 	return rc;
 }
 
+/* This is to refresh our lock in face of no RPCs. */
+void osc_send_empty_rpc(struct osc_object *osc, pgoff_t start)
+{
+	struct ptlrpc_request *req;
+	struct obdo oa;
+	struct brw_page bpg = { .off = start, .count = 1};
+	struct brw_page *pga = &bpg;
+	int rc;
+
+	memset(&oa, 0, sizeof(oa));
+	oa.o_oi = osc->oo_oinfo->loi_oi;
+	oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLFLAGS;
+	/* For updated servers - don't do a read */
+	oa.o_flags = OBD_FL_NORPC;
+
+	rc = osc_brw_prep_request(OBD_BRW_READ, osc_cli(osc), &oa, 1, &pga,
+				  &req, 0);
+
+	/* If we succeeded we ship it off, if not there's no point in doing
+	 * anything. Also no resends.
+	 * No interpret callback, no commit callback.
+	 */
+	if (!rc) {
+		req->rq_no_resend = 1;
+		ptlrpcd_add_req(req);
+	}
+}
+
 static int osc_set_lock_data(struct ldlm_lock *lock, void *data)
 {
 	int set = 0;
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  reply	other threads:[~2021-07-07 19:11 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-07 19:11 [lustre-devel] [PATCH 00/15] lustre: updates to OpenSFS tree as of July 7 2021 James Simmons
2021-07-07 19:11 ` James Simmons [this message]
2021-07-07 19:11 ` [lustre-devel] [PATCH 02/15] lustre: osc: Move shrink update to per-write James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 03/15] lustre: client: don't panic for mgs evictions James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 04/15] lnet: Add health ping stats James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 05/15] lnet: Ensure ref taken when queueing for discovery James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 06/15] lnet: Correct distance calculation of local NIDs James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 07/15] lnet: socklnd: detect link state to set fatal error on ni James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 08/15] lustre: mdt: New connect flag for non-open-by-fid lock request James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 09/15] lustre: obdclass: Wake up entire queue of requests on close completion James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 10/15] lnet: add netlink infrastructure James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 11/15] lustre: llite: parallelize direct i/o issuance James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 12/15] lustre: osc: Don't get time for each page James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 13/15] lustre: clio: Implement real list splice James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 14/15] lustre: osc: Simplify clipping for transient pages James Simmons
2021-07-07 19:11 ` [lustre-devel] [PATCH 15/15] lustre: mgc: configurable wait-to-reprocess time James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1625685076-1964-2-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.