From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andreas Gruenbacher Date: Tue, 9 Apr 2019 16:00:37 +0200 Subject: [Cluster-devel] [PATCH 02/19] gfs2: Introduce concept of a pending withdraw In-Reply-To: <20190327123532.27131-3-rpeterso@redhat.com> References: <20190327123532.27131-1-rpeterso@redhat.com> <20190327123532.27131-3-rpeterso@redhat.com> Message-ID: List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi Bob, On Wed, 27 Mar 2019 at 13:35, Bob Peterson wrote: > File system withdraws can be delayed when inconsistencies are > discovered when we cannot withdraw immediately, for example, when > critical spin_locks are held. But delaying the withdraw can cause > gfs2 to ignore the error and keep running for a short period of time. > For example, an rgrp glock may be dequeued and demoted while there > are still buffers that haven't been properly revoked, due to io > errors writing to the journal. > > This patch introduces a new concept of a delayed withdraw, which > means an inconsistency has been discovered and we need to withdraw > at the earliest possible opportunity. In these cases, we aren't > quite withdrawn yet, but we still need to not dequeue glocks and > other critical things. If we dequeue the glocks and the withdraw > results in our journal being replayed, the replay could overwrite > data that's been modified by a different node that acquired the > glock in the meantime. > > Signed-off-by: Bob Peterson > --- > fs/gfs2/aops.c | 4 ++-- > fs/gfs2/file.c | 2 +- > fs/gfs2/glock.c | 7 +++---- > fs/gfs2/glops.c | 2 +- > fs/gfs2/incore.h | 1 + > fs/gfs2/log.c | 20 ++++++++------------ > fs/gfs2/meta_io.c | 6 +++--- > fs/gfs2/ops_fstype.c | 3 +-- > fs/gfs2/quota.c | 2 +- > fs/gfs2/super.c | 6 +++--- > fs/gfs2/sys.c | 2 +- > fs/gfs2/util.c | 1 + > fs/gfs2/util.h | 8 ++++++++ > 13 files changed, 34 insertions(+), 30 deletions(-) > > diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c > index 05dd78f4b2b3..0d3cde8a61cd 100644 > --- a/fs/gfs2/aops.c > +++ b/fs/gfs2/aops.c > @@ -521,7 +521,7 @@ static int __gfs2_readpage(void *file, struct page *page) > error = mpage_readpage(page, gfs2_block_map); > } > > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) > + if (unlikely(withdrawn(sdp))) > return -EIO; > > return error; > @@ -638,7 +638,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, > gfs2_glock_dq(&gh); > out_uninit: > gfs2_holder_uninit(&gh); > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) > + if (unlikely(withdrawn(sdp))) > ret = -EIO; > return ret; > } > diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c > index 58a768e59712..2a3ac9747d0d 100644 > --- a/fs/gfs2/file.c > +++ b/fs/gfs2/file.c > @@ -1169,7 +1169,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) > cmd = F_SETLK; > fl->fl_type = F_UNLCK; > } > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { > + if (unlikely(withdrawn(sdp))) { > if (fl->fl_type == F_UNLCK) > locks_lock_file_wait(file, fl); > return -EIO; > diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c > index d32964cd1117..4330164de8bd 100644 > --- a/fs/gfs2/glock.c > +++ b/fs/gfs2/glock.c > @@ -542,7 +542,7 @@ __acquires(&gl->gl_lockref.lock) > unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); > int ret; > > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && > + if (unlikely(withdrawn(sdp)) && > target != LM_ST_UNLOCKED) > return; > lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | > @@ -579,8 +579,7 @@ __acquires(&gl->gl_lockref.lock) > } > else if (ret) { > fs_err(sdp, "lm_lock ret %d\n", ret); > - GLOCK_BUG_ON(gl, !test_bit(SDF_SHUTDOWN, > - &sdp->sd_flags)); > + GLOCK_BUG_ON(gl, !withdrawn(sdp)); > } > } else { /* lock_nolock */ > finish_xmote(gl, target); > @@ -1092,7 +1091,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) > struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; > int error = 0; > > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) > + if (unlikely(withdrawn(sdp))) > return -EIO; > > if (test_bit(GLF_LRU, &gl->gl_flags)) > diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c > index 78510ab91835..719961b5a511 100644 > --- a/fs/gfs2/glops.c > +++ b/fs/gfs2/glops.c > @@ -538,7 +538,7 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) > gfs2_consist(sdp); > > /* Initialize some head of the log stuff */ > - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { > + if (!withdrawn(sdp)) { > sdp->sd_log_sequence = head.lh_sequence + 1; > gfs2_log_pointers_init(sdp, head.lh_blkno); > } > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > index 76336b592030..c6984265807f 100644 > --- a/fs/gfs2/incore.h > +++ b/fs/gfs2/incore.h > @@ -620,6 +620,7 @@ enum { > SDF_RORECOVERY = 7, /* read only recovery */ > SDF_SKIP_DLM_UNLOCK = 8, > SDF_FORCE_AIL_FLUSH = 9, > + SDF_PENDING_WITHDRAW = 10, /* Will withdraw eventually */ > }; > > enum gfs2_freeze_state { > diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c > index 0717b4d4828b..c79279ef03b8 100644 > --- a/fs/gfs2/log.c > +++ b/fs/gfs2/log.c > @@ -92,8 +92,7 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd) > > static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, > struct writeback_control *wbc, > - struct gfs2_trans *tr, > - bool *withdraw) > + struct gfs2_trans *tr) > __releases(&sdp->sd_ail_lock) > __acquires(&sdp->sd_ail_lock) > { > @@ -112,7 +111,7 @@ __acquires(&sdp->sd_ail_lock) > atomic_add_return(1, &sdp->sd_log_errors) == 1) { > sdp->sd_log_error = -EIO; > gfs2_io_error_bh(sdp, bh); > - *withdraw = true; > + set_bit(SDF_PENDING_WITHDRAW, &sdp->sd_flags); > } > list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); > continue; > @@ -153,7 +152,6 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) > struct list_head *head = &sdp->sd_ail1_list; > struct gfs2_trans *tr; > struct blk_plug plug; > - bool withdraw = false; > > trace_gfs2_ail_flush(sdp, wbc, 1); > blk_start_plug(&plug); > @@ -162,12 +160,12 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) > list_for_each_entry_reverse(tr, head, tr_list) { > if (wbc->nr_to_write <= 0) > break; > - if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw)) > + if (gfs2_ail1_start_one(sdp, wbc, tr)) > goto restart; > } > spin_unlock(&sdp->sd_ail_lock); > blk_finish_plug(&plug); > - if (withdraw) > + if (test_bit(SDF_PENDING_WITHDRAW, &sdp->sd_flags)) > gfs2_lm_withdraw(sdp, NULL); > trace_gfs2_ail_flush(sdp, wbc, 0); > } > @@ -196,8 +194,7 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) > * > */ > > -static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, > - bool *withdraw) > +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) > { > struct gfs2_bufdata *bd, *s; > struct buffer_head *bh; > @@ -212,7 +209,7 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, > atomic_add_return(1, &sdp->sd_log_errors) == 1) { > sdp->sd_log_error = -EIO; > gfs2_io_error_bh(sdp, bh); > - *withdraw = true; > + set_bit(SDF_PENDING_WITHDRAW, &sdp->sd_flags); > } > list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); > } > @@ -230,11 +227,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) > struct gfs2_trans *tr, *s; > int oldest_tr = 1; > int ret; > - bool withdraw = false; > > spin_lock(&sdp->sd_ail_lock); > list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { > - gfs2_ail1_empty_one(sdp, tr, &withdraw); > + gfs2_ail1_empty_one(sdp, tr); > if (list_empty(&tr->tr_ail1_list) && oldest_tr) > list_move(&tr->tr_list, &sdp->sd_ail2_list); > else > @@ -243,7 +239,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) > ret = list_empty(&sdp->sd_ail1_list); > spin_unlock(&sdp->sd_ail_lock); > > - if (withdraw) > + if (test_bit(SDF_PENDING_WITHDRAW, &sdp->sd_flags)) > gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n"); > > return ret; > diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c > index 3201342404a7..b02c7eb2ded3 100644 > --- a/fs/gfs2/meta_io.c > +++ b/fs/gfs2/meta_io.c > @@ -255,7 +255,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, > struct buffer_head *bh, *bhs[2]; > int num = 0; > > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { > + if (unlikely(withdrawn(sdp))) { > *bhp = NULL; > return -EIO; > } > @@ -313,7 +313,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, > > int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) > { > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) > + if (unlikely(withdrawn(sdp))) > return -EIO; > > wait_on_buffer(bh); > @@ -324,7 +324,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) > gfs2_io_error_bh_wd(sdp, bh); > return -EIO; > } > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) > + if (unlikely(withdrawn(sdp))) > return -EIO; > > return 0; > diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c > index 77610be6c918..de328b9a5648 100644 > --- a/fs/gfs2/ops_fstype.c > +++ b/fs/gfs2/ops_fstype.c > @@ -999,8 +999,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) > void gfs2_lm_unmount(struct gfs2_sbd *sdp) > { > const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops; > - if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) && > - lm->lm_unmount) > + if (likely(!withdrawn(sdp)) && lm->lm_unmount) > lm->lm_unmount(sdp); > } > > diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c > index 009172ef4dfe..5a1a95fb57ca 100644 > --- a/fs/gfs2/quota.c > +++ b/fs/gfs2/quota.c > @@ -1478,7 +1478,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) > { > if (error == 0 || error == -EROFS) > return; > - if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { > + if (!withdrawn(sdp)) { > if (atomic_add_return(1, &sdp->sd_log_errors) == 1) { > sdp->sd_log_error = error; > fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); > diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c > index ca71163ff7cf..5db590d57564 100644 > --- a/fs/gfs2/super.c > +++ b/fs/gfs2/super.c > @@ -802,7 +802,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) > > if (!(flags & I_DIRTY_INODE)) > return; > - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) > + if (unlikely(withdrawn(sdp))) > return; > if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { > ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); > @@ -851,7 +851,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) > > error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, > &freeze_gh); > - if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) > + if (error && !withdrawn(sdp)) > return error; > > flush_workqueue(gfs2_delete_workqueue); > @@ -1005,7 +1005,7 @@ static int gfs2_freeze(struct super_block *sb) > if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) > goto out; > > - if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { > + if (withdrawn(sdp)) { > error = -EINVAL; > goto out; > } > diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c > index 1787d295834e..a2419e92a64e 100644 > --- a/fs/gfs2/sys.c > +++ b/fs/gfs2/sys.c > @@ -121,7 +121,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) > > static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) > { > - unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags); > + unsigned int b = withdrawn(sdp); > return snprintf(buf, PAGE_SIZE, "%u\n", b); > } > > diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c > index 0a814ccac41d..717aef772c60 100644 > --- a/fs/gfs2/util.c > +++ b/fs/gfs2/util.c > @@ -47,6 +47,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) > test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) > return 0; > > + clear_bit(SDF_PENDING_WITHDRAW, &sdp->sd_flags); > if (fmt) { > va_start(args, fmt); > > diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h > index 9278fecba632..16e087da3bd3 100644 > --- a/fs/gfs2/util.h > +++ b/fs/gfs2/util.h > @@ -167,6 +167,14 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, > return x; > } > > +static inline bool withdrawn(struct gfs2_sbd *sdp) > +{ > + if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags) || > + test_bit(SDF_PENDING_WITHDRAW, &sdp->sd_flags)) > + return true; > + return false; > +} > + > #define gfs2_tune_get(sdp, field) \ > gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) > > -- > 2.20.1 > this patch looks good, I'm not sure why this isn't the way things are done already. I'm not fully happy about how things are called though. What do you think of renaming SDF_PENDING_WITHDRAW -> SDF_WITHDRAW, withdrawn -> gfs2_withdraw? Is there actually a need to clear the SDF_PENDING_WITHDRAW flag in gfs2_lm_withdraw? It's not that a mounted filesystem can ever become "unwithdrawn". Thanks, Andreas