From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bob Peterson Date: Thu, 26 Mar 2020 13:40:17 -0500 Subject: [Cluster-devel] [GFS2 PATCH 2/5] gfs2: instrumentation wrt ail1 stuck In-Reply-To: <20200326184020.123544-1-rpeterso@redhat.com> References: <20200326184020.123544-1-rpeterso@redhat.com> Message-ID: <20200326184020.123544-3-rpeterso@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Before this patch, if the ail1 flush got stuck for some reason, there were no clues as to why. This patch introduces a check for getting stuck for more than a minute, and if it happens, it dumps the items still remaining on the ail1 list. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 87f3e892be3e..2abec43ae898 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -139,6 +139,41 @@ __acquires(&sdp->sd_ail_lock) return ret; } +static void dump_ail_list(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + fs_err(sdp, "Error: In gfs2_ail1_flush for a minute! t=%d\n", + current->journal_info ? 1 : 0); + + list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { + list_for_each_entry_reverse(bd, &tr->tr_ail1_list, + bd_ail_st_list) { + bh = bd->bd_bh; + fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd, + (unsigned long long)bd->bd_blkno, bh); + if (!bh) { + fs_err(sdp, "\n"); + continue; + } + fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d " + "map:%d new:%d ar:%d aw:%d delay:%d " + "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n", + (unsigned long long)bh->b_blocknr, + buffer_uptodate(bh), buffer_dirty(bh), + buffer_locked(bh), buffer_req(bh), + buffer_mapped(bh), buffer_new(bh), + buffer_async_read(bh), buffer_async_write(bh), + buffer_delay(bh), buffer_write_io_error(bh), + buffer_unwritten(bh), + buffer_defer_completion(bh), + buffer_pinned(bh), buffer_escaped(bh)); + fs_err(sdp, "\n"); + } + } +} /** * gfs2_ail1_flush - start writeback of some ail1 entries @@ -155,11 +190,16 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) struct gfs2_trans *tr; struct blk_plug plug; int ret = 0; + unsigned long flush_start = jiffies; trace_gfs2_ail_flush(sdp, wbc, 1); blk_start_plug(&plug); spin_lock(&sdp->sd_ail_lock); restart: + if (time_after(jiffies, flush_start + (HZ * 60))) { + dump_ail_list(sdp); + goto out; + } list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; @@ -170,6 +210,7 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) break; } } +out: spin_unlock(&sdp->sd_ail_lock); blk_finish_plug(&plug); if (ret) { -- 2.25.1