lustre-devel-lustre.org archive mirror
 help / color / mirror / Atom feed
From: James Simmons <jsimmons@infradead.org>
To: Andreas Dilger <adilger@whamcloud.com>,
	Oleg Drokin <green@whamcloud.com>, NeilBrown <neilb@suse.de>
Cc: Lustre Development List <lustre-devel@lists.lustre.org>
Subject: [lustre-devel] [PATCH 22/41] lustre: obdclass: try to skip corrupted llog records
Date: Sun,  4 Apr 2021 20:50:51 -0400	[thread overview]
Message-ID: <1617583870-32029-23-git-send-email-jsimmons@infradead.org> (raw)
In-Reply-To: <1617583870-32029-1-git-send-email-jsimmons@infradead.org>

From: Alex Zhuravlev <bzzz@whamcloud.com>

if llog's header or record is found corrupted, then
ignore the remaining records and try with the next one.

WC-bug-id: https://jira.whamcloud.com/browse/LU-14098
Lustre-commit: 910eb97c1b43a44 ("LU-14098 obdclass: try to skip corrupted llog records")
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/40754
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: James Simmons <jsimmons@infradead.org>
---
 fs/lustre/obdclass/llog.c          | 76 ++++++++++++++++++++++++++++++--------
 fs/lustre/obdclass/llog_cat.c      | 14 +++----
 fs/lustre/obdclass/llog_internal.h |  5 +++
 3 files changed, 72 insertions(+), 23 deletions(-)

diff --git a/fs/lustre/obdclass/llog.c b/fs/lustre/obdclass/llog.c
index e172ebc..7668d51 100644
--- a/fs/lustre/obdclass/llog.c
+++ b/fs/lustre/obdclass/llog.c
@@ -184,7 +184,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
 			     (llh->llh_flags & LLOG_F_IS_CAT &&
 			      flags & LLOG_F_IS_PLAIN))) {
 			CERROR("%s: llog type is %s but initializing %s\n",
-			       handle->lgh_ctxt->loc_obd->obd_name,
+			       loghandle2name(handle),
 			       llh->llh_flags & LLOG_F_IS_CAT ?
 			       "catalog" : "plain",
 			       flags & LLOG_F_IS_CAT ? "catalog" : "plain");
@@ -206,7 +206,7 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
 		if (unlikely(uuid &&
 			     !obd_uuid_equals(uuid, &llh->llh_tgtuuid))) {
 			CERROR("%s: llog uuid mismatch: %s/%s\n",
-			       handle->lgh_ctxt->loc_obd->obd_name,
+			       loghandle2name(handle),
 			       (char *)uuid->uuid,
 			       (char *)llh->llh_tgtuuid.uuid);
 			rc = -EEXIST;
@@ -220,8 +220,8 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
 		llh->llh_flags |= LLOG_F_IS_FIXSIZE;
 	} else if (!(flags & LLOG_F_IS_PLAIN)) {
 		CERROR("%s: unknown flags: %#x (expected %#x or %#x)\n",
-		       handle->lgh_ctxt->loc_obd->obd_name,
-		       flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
+		       loghandle2name(handle), flags, LLOG_F_IS_CAT,
+		       LLOG_F_IS_PLAIN);
 		rc = -EINVAL;
 	}
 	llh->llh_flags |= fmt;
@@ -234,6 +234,29 @@ int llog_init_handle(const struct lu_env *env, struct llog_handle *handle,
 }
 EXPORT_SYMBOL(llog_init_handle);
 
+int llog_verify_record(const struct llog_handle *llh, struct llog_rec_hdr *rec)
+{
+	int chunk_size = llh->lgh_hdr->llh_hdr.lrh_len;
+
+	if (rec->lrh_len == 0 || rec->lrh_len > chunk_size) {
+		CERROR("%s: record is too large: %d > %d\n",
+		       loghandle2name(llh), rec->lrh_len, chunk_size);
+		return -EINVAL;
+	}
+	if (rec->lrh_index >= LLOG_HDR_BITMAP_SIZE(llh->lgh_hdr)) {
+		CERROR("%s: index is too high: %d\n",
+		       loghandle2name(llh), rec->lrh_index);
+		return -EINVAL;
+	}
+	if ((rec->lrh_type & LLOG_OP_MASK) != LLOG_OP_MAGIC) {
+		CERROR("%s: magic %x is bad\n",
+		       loghandle2name(llh), rec->lrh_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int llog_process_thread(void *arg)
 {
 	struct llog_process_info *lpi = arg;
@@ -247,6 +270,7 @@ static int llog_process_thread(void *arg)
 	int saved_index = 0;
 	int last_called_index = 0;
 	bool repeated = false;
+	bool refresh_idx = false;
 
 	if (!llh)
 		return -EINVAL;
@@ -380,12 +404,21 @@ static int llog_process_thread(void *arg)
 
 			repeated = false;
 
-			if (!rec->lrh_len || rec->lrh_len > chunk_size) {
-				CWARN("invalid length %d in llog record for index %d/%d\n",
-				      rec->lrh_len,
-				      rec->lrh_index, index);
-				rc = -EINVAL;
-				goto out;
+			rc = llog_verify_record(loghandle, rec);
+			if (rc) {
+				CERROR("%s: invalid record in llog "DFID" record for index %d/%d: rc = %d\n",
+				       loghandle2name(loghandle),
+                                       PFID(&loghandle->lgh_id.lgl_oi.oi_fid),
+				       rec->lrh_len, index, rc);
+				/*
+				 * the block seem to be corrupted, let's try
+				 * with the next one. reset rc to go to the
+				 * next chunk.
+				 */
+				refresh_idx = true;
+				index = 0;
+				rc = 0;
+				goto repeat;
 			}
 
 			if (rec->lrh_index < index) {
@@ -395,11 +428,22 @@ static int llog_process_thread(void *arg)
 			}
 
 			if (rec->lrh_index != index) {
-				CERROR("%s: Invalid record: index %u but expected %u\n",
-				       loghandle->lgh_ctxt->loc_obd->obd_name,
-				       rec->lrh_index, index);
-				rc = -ERANGE;
-				goto out;
+				/*
+				 * the last time we couldn't parse the block due
+				 * to corruption, thus has no idea about the
+				 * next index, take it from the block, once.
+				 */
+				if (refresh_idx) {
+					refresh_idx = false;
+					index = rec->lrh_index;
+				} else {
+					CERROR("%s: "DFID" Invalid record: index %u but expected %u\n",
+					       loghandle2name(loghandle),
+					       PFID(&loghandle->lgh_id.lgl_oi.oi_fid),
+					       rec->lrh_index, index);
+					rc = -ERANGE;
+					goto out;
+				}
 			}
 
 			CDEBUG(D_OTHER,
@@ -501,7 +545,7 @@ int llog_process_or_fork(const struct lu_env *env,
 		if (IS_ERR(task)) {
 			rc = PTR_ERR(task);
 			CERROR("%s: cannot start thread: rc = %d\n",
-			       loghandle->lgh_ctxt->loc_obd->obd_name, rc);
+			       loghandle2name(loghandle), rc);
 			goto out_lpi;
 		}
 		wait_for_completion(&lpi->lpi_completion);
diff --git a/fs/lustre/obdclass/llog_cat.c b/fs/lustre/obdclass/llog_cat.c
index 9298808..b67e7a2b 100644
--- a/fs/lustre/obdclass/llog_cat.c
+++ b/fs/lustre/obdclass/llog_cat.c
@@ -80,7 +80,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
 		    ostid_seq(&cgl->lgl_oi) == ostid_seq(&logid->lgl_oi)) {
 			if (cgl->lgl_ogen != logid->lgl_ogen) {
 				CWARN("%s: log " DFID " generation %x != %x\n",
-				      loghandle->lgh_ctxt->loc_obd->obd_name,
+				      loghandle2name(loghandle),
 				      PFID(&logid->lgl_oi.oi_fid),
 				      cgl->lgl_ogen, logid->lgl_ogen);
 				continue;
@@ -88,7 +88,7 @@ static int llog_cat_id2handle(const struct lu_env *env,
 			*res = llog_handle_get(loghandle);
 			if (!*res) {
 				CERROR("%s: log "DFID" refcount is zero!\n",
-				       loghandle->lgh_ctxt->loc_obd->obd_name,
+				       loghandle2name(loghandle),
 				       PFID(&logid->lgl_oi.oi_fid));
 				continue;
 			}
@@ -103,8 +103,8 @@ static int llog_cat_id2handle(const struct lu_env *env,
 		       LLOG_OPEN_EXISTS);
 	if (rc < 0) {
 		CERROR("%s: error opening log id " DFID ":%x: rc = %d\n",
-		       cathandle->lgh_ctxt->loc_obd->obd_name,
-		       PFID(&logid->lgl_oi.oi_fid), logid->lgl_ogen, rc);
+		       loghandle2name(cathandle), PFID(&logid->lgl_oi.oi_fid),
+		       logid->lgl_ogen, rc);
 		return rc;
 	}
 
@@ -155,7 +155,7 @@ static int llog_cat_process_common(const struct lu_env *env,
 	if (rec->lrh_type != le32_to_cpu(LLOG_LOGID_MAGIC)) {
 		rc = -EINVAL;
 		CWARN("%s: invalid record in catalog " DFID ":%x: rc = %d\n",
-		      cat_llh->lgh_ctxt->loc_obd->obd_name,
+		      loghandle2name(cat_llh),
 		      PFID(&cat_llh->lgh_id.lgl_oi.oi_fid),
 		      cat_llh->lgh_id.lgl_ogen, rc);
 
@@ -170,7 +170,7 @@ static int llog_cat_process_common(const struct lu_env *env,
 	rc = llog_cat_id2handle(env, cat_llh, llhp, &lir->lid_id);
 	if (rc) {
 		CWARN("%s: can't find llog handle " DFID ":%x: rc = %d\n",
-		      cat_llh->lgh_ctxt->loc_obd->obd_name,
+		      loghandle2name(cat_llh),
 		      PFID(&lir->lid_id.lgl_oi.oi_fid),
 		      lir->lid_id.lgl_ogen, rc);
 
@@ -235,7 +235,7 @@ static int llog_cat_process_or_fork(const struct lu_env *env,
 		struct llog_process_cat_data cd;
 
 		CWARN("%s: catlog " DFID " crosses index zero\n",
-		      cat_llh->lgh_ctxt->loc_obd->obd_name,
+		      loghandle2name(cat_llh),
 		      PFID(&cat_llh->lgh_id.lgl_oi.oi_fid));
 		/*startcat = 0 is default value for general processing */
 		if ((startcat != LLOG_CAT_FIRST &&
diff --git a/fs/lustre/obdclass/llog_internal.h b/fs/lustre/obdclass/llog_internal.h
index c34adfe..41ac4f0 100644
--- a/fs/lustre/obdclass/llog_internal.h
+++ b/fs/lustre/obdclass/llog_internal.h
@@ -74,4 +74,9 @@ static inline struct llog_rec_hdr *llog_rec_hdr_next(struct llog_rec_hdr *rec)
 {
 	return (struct llog_rec_hdr *)((char *)rec + rec->lrh_len);
 }
+
+static inline char *loghandle2name(const struct llog_handle *lgh)
+{
+	return lgh->lgh_ctxt->loc_obd->obd_name;
+}
 #endif
-- 
1.8.3.1

_______________________________________________
lustre-devel mailing list
lustre-devel@lists.lustre.org
http://lists.lustre.org/listinfo.cgi/lustre-devel-lustre.org

  parent reply	other threads:[~2021-04-05  0:52 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05  0:50 [lustre-devel] [PATCH 00/41] lustre: sync to OpenSFS branch as of March 1 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 01/41] lustre: llite: data corruption due to RPC reordering James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 02/41] lustre: llite: make readahead aware of hints James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 03/41] lustre: lov: avoid NULL dereference in cleanup James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 04/41] lustre: llite: quiet spurious ioctl warning James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 05/41] lustre: ptlrpc: do not output error when imp_sec is freed James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 06/41] lustre: update version to 2.14.0 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 07/41] lnet: UDSP storage and marshalled structs James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 08/41] lnet: foundation patch for selection mod James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 09/41] lnet: Preferred gateway selection James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 10/41] lnet: Select NI/peer NI with highest prio James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 11/41] lnet: select best peer and local net James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 12/41] lnet: UDSP handling James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 13/41] lnet: Apply UDSP on local and remote NIs James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 14/41] lnet: Add the kernel level Marshalling API James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 15/41] lnet: Add the kernel level De-Marshalling API James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 16/41] lnet: Add the ioctl handler for "add policy" James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 17/41] lnet: ioctl handler for "delete policy" James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 18/41] lnet: ioctl handler for get policy info James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 19/41] lustre: update version to 2.14.50 James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 20/41] lustre: gss: handle empty reqmsg in sptlrpc_req_ctx_switch James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 21/41] lustre: sec: file ioctls to handle encryption policies James Simmons
2021-04-05  0:50 ` James Simmons [this message]
2021-04-05  0:50 ` [lustre-devel] [PATCH 23/41] lustre: lov: fix layout generation inc for mirror split James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 24/41] lnet: modify assertion in lnet_post_send_locked James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 25/41] lustre: lov: fixes bitfield in lod qos code James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 26/41] lustre: lov: grant deadlock if same OSC in two components James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 27/41] lustre: change EWOULDBLOCK to EAGAIN James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 28/41] lsutre: ldlm: return error from ldlm_namespace_new() James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 29/41] lustre: llite: remove unused ll_teardown_mmaps() James Simmons
2021-04-05  0:50 ` [lustre-devel] [PATCH 30/41] lustre: lov: style cleanups in lov_set_osc_active() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 31/41] lustre: change various operations structs to const James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 32/41] lustre: mark strings in char arrays as const James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 33/41] lustre: convert snprintf to scnprintf as appropriate James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 34/41] lustre: remove non-static 'inline' markings James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 35/41] lustre: llite: use is_root_inode() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 36/41] lnet: libcfs: discard cfs_firststr James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 37/41] lnet: place wire protocol data int own headers James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 38/41] lnet: libcfs: use wait_event_timeout() in tracefiled() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 39/41] lnet: use init_wait() rather than init_waitqueue_entry() James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 40/41] lnet: discard LNET_MD_PHYS James Simmons
2021-04-05  0:51 ` [lustre-devel] [PATCH 41/41] lnet: o2iblnd: convert peers hash table to hashtable.h James Simmons

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1617583870-32029-23-git-send-email-jsimmons@infradead.org \
    --to=jsimmons@infradead.org \
    --cc=adilger@whamcloud.com \
    --cc=green@whamcloud.com \
    --cc=lustre-devel@lists.lustre.org \
    --cc=neilb@suse.de \
    --subject='Re: [lustre-devel] [PATCH 22/41] lustre: obdclass: try to skip corrupted llog records' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).