* [Cluster-devel] [PATCH] gfs2: fix GL_SKIP node_scope problems
@ 2021-09-29 13:21 Bob Peterson
2021-09-29 13:27 ` Bob Peterson
2021-09-29 15:35 ` Andreas Gruenbacher
0 siblings, 2 replies; 4+ messages in thread
From: Bob Peterson @ 2021-09-29 13:21 UTC (permalink / raw)
To: cluster-devel.redhat.com
Before this patch, when a glock was locked, the very first holder on the
queue would unlock the lockref and call the go_lock glops function (if
one existed), unless GL_SKIP was specified. When we introduced the new
node-scope concept, we allowed multiple holders to lock glocks in EX mode
and share the lock.
But node-scope introduced a new problem: if the first holder has GL_SKIP
and the next one does NOT, since it is not the first holder on the queue,
the go_lock op was not called. Eventually the GL_SKIP holder may call the
go_lock sub-function (e.g. gfs2_rgrp_bh_get) but there was still a
window of time in which another non-GL_SKIP holder assumes the go_lock
function had been called by the first holder. In the case of rgrp glocks,
this led to a NULL pointer dereference on the buffer_heads.
This patch tries to fix the problem by introducing two new glock flags:
GLF_GO_LOCK_NEEDED, which keeps track of when the go_lock function needs
to be called to "fill in" or "read" the object before it is referenced.
GLF_GO_LOCK_IN_PROG which is used to determine when a process is
in the process of reading in the object. Whenever a function needs to
reference the object, it checks the GLF_GO_LOCK_NEEDED flag, and if
set, it sets GLF_GO_LOCK_IN_PROG and calls the glops "go_lock" function.
As before, the gl_lockref spin_lock is unlocked during the IO operation,
which may take a relatively long amount of time to complete. While
unlocked, if another process determines go_lock is still needed, it sees
the GLF_GO_LOCK_IN_PROG flag is set, and waits for the go_lock glop
operation to be completed. Once GLF_GO_LOCK_IN_PROG is cleared, it needs
to check GLF_GO_LOCK_NEEDED again because the other process's go_lock
operation may not have been successful.
To faciliate this change, the go_lock section of function do_promote
was extracted to its own new function, gfs2_go_lock. The reason we do
this is because GL_SKIP callers often read in the object later.
Before this patch, those GL_SKIP callers (like gfs2_inode_lookup and
update_rgrp_lvb) called directly into the object-read functions
(gfs2_inode_refresh and gfs2_rgrp_bh_get respectively), but that never
cleared the new GLF_GO_LOCK_NEEDED flag. This patch changes those
functions so they call into the new gfs2_go_lock directly, which takes
care of all that.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs/gfs2/glock.c | 136 +++++++++++++++++++++++++++++++++--------------
fs/gfs2/glock.h | 1 +
fs/gfs2/glops.c | 21 ++++----
fs/gfs2/incore.h | 3 +-
fs/gfs2/inode.c | 4 +-
fs/gfs2/rgrp.c | 12 ++---
fs/gfs2/super.c | 6 ++-
7 files changed, 121 insertions(+), 62 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 4fcf340603e7..750ea07b4173 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -380,6 +380,66 @@ static void do_error(struct gfs2_glock *gl, const int ret)
}
}
+/**
+ * gfs2_go_lock - Call the go_lock glops function
+ * @gl: The glock
+ *
+ * Note: the GLF bits used throughout are protected by the lockref spinlock.
+ *
+ * Returns: 0 if go_lock was successful, 2 if type specific operation is
+ * underway, -EAGAIN if secondary go_lock was waiting for another caller of
+ * go_lock, or error.
+ */
+int gfs2_go_lock(struct gfs2_holder *gh)
+__releases(&gl->gl_lockref.lock)
+__acquires(&gl->gl_lockref.lock)
+{
+ struct gfs2_glock *gl = gh->gh_gl;
+ const struct gfs2_glock_operations *glops = gl->gl_ops;
+ int ret;
+
+ /*
+ * Since we unlock the lockref lock, we set a flag to indicate
+ * go_lock is in progress.
+ */
+ if (test_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags)) {
+ wait_on_bit(&gl->gl_flags, GLF_GO_LOCK_IN_PROG,
+ TASK_UNINTERRUPTIBLE);
+ /*
+ * Here we just waited for a first go_lock to finish.
+ * But that go_lock may not have been successful, as when
+ * a process locks an inode glock _before_ it has an actual
+ * inode to populate. So we check again. This secondary waiter
+ * _might_ have an inode to populate.
+ */
+ return -EAGAIN;
+ }
+
+ set_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags);
+
+ /* FIXME: eliminate this eventually */
+ ret = glops->go_lock(gh);
+
+ switch(ret) {
+ case 0:
+ clear_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags);
+ break;
+ case 1:
+ ret = 2;
+ break;
+ default:
+ gh->gh_error = ret;
+ list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
+ break;
+ }
+ clear_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&gl->gl_flags, GLF_GO_LOCK_IN_PROG);
+ return ret;
+}
+
/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
@@ -389,54 +449,50 @@ static void do_error(struct gfs2_glock *gl, const int ret)
*/
static int do_promote(struct gfs2_glock *gl)
-__releases(&gl->gl_lockref.lock)
-__acquires(&gl->gl_lockref.lock)
{
- const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh, *tmp;
int first;
+ bool lock_released;
int ret;
restart:
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
+ lock_released = false;
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
- if (may_grant(gl, gh)) {
- first = gfs2_first_holder(gh);
- if (first && glops->go_lock) {
- if (!(gh->gh_flags & GL_SKIP)) {
- spin_unlock(&gl->gl_lockref.lock);
- /* FIXME: eliminate this eventually */
- ret = glops->go_lock(gh);
- spin_lock(&gl->gl_lockref.lock);
- if (ret) {
- if (ret == 1)
- return 2;
- gh->gh_error = ret;
- list_del_init(&gh->gh_list);
- trace_gfs2_glock_queue(gh, 0);
- gfs2_holder_wake(gh);
- goto restart;
- }
- }
- }
- set_bit(HIF_HOLDER, &gh->gh_iflags);
- trace_gfs2_promote(gh, first);
- gfs2_holder_wake(gh);
- /*
- * If this was the first holder, we may have released
- * the gl_lockref.lock, so the holders list may have
- * changed. For that reason, we start again at the
- * start of the holders queue.
- */
- if (first)
- goto restart;
- continue;
+ if (!may_grant(gl, gh)) {
+ if (gh->gh_list.prev == &gl->gl_holders)
+ return 1;
+ do_error(gl, 0);
+ break;
}
- if (gh->gh_list.prev == &gl->gl_holders)
- return 1;
- do_error(gl, 0);
- break;
+
+ first = gfs2_first_holder(gh);
+check_go_lock:
+ if (test_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags) &&
+ !(gh->gh_flags & GL_SKIP)) {
+ spin_unlock(&gl->gl_lockref.lock);
+ ret = gfs2_go_lock(gh);
+ spin_lock(&gl->gl_lockref.lock);
+ if (ret) {
+ if (ret == -EAGAIN)
+ goto check_go_lock;
+ else
+ goto restart;
+ } else
+ lock_released = true;
+ }
+
+ set_bit(HIF_HOLDER, &gh->gh_iflags);
+ trace_gfs2_promote(gh, first);
+ gfs2_holder_wake(gh);
+ /*
+ * If we released the gl_lockref.lock the holders list may have
+ * changed. For that reason, we start again at the start of
+ * the holders queue.
+ */
+ if (lock_released)
+ goto restart;
}
return 0;
}
@@ -1064,7 +1120,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
atomic_inc(&sdp->sd_glock_disposal);
gl->gl_node.next = NULL;
- gl->gl_flags = 0;
+ gl->gl_flags = glops->go_lock ? BIT(GLF_GO_LOCK_NEEDED) : 0;
gl->gl_name = name;
lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
gl->gl_lockref.count = 1;
@@ -2153,6 +2209,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'P';
if (test_bit(GLF_FREEING, gflags))
*p++ = 'x';
+ if (test_bit(GLF_GO_LOCK_NEEDED, gflags))
+ *p++ = 'g';
*p = 0;
return buf;
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 8a09379dbf66..2e348e250fd2 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -195,6 +195,7 @@ extern void gfs2_holder_reinit(unsigned int state, u16 flags,
extern void gfs2_holder_uninit(struct gfs2_holder *gh);
extern int gfs2_glock_nq(struct gfs2_holder *gh);
extern int gfs2_glock_poll(struct gfs2_holder *gh);
+extern int gfs2_go_lock(struct gfs2_holder *gh);
extern int gfs2_glock_wait(struct gfs2_holder *gh);
extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
extern void gfs2_glock_dq(struct gfs2_holder *gh);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4b19f513570f..4823ba63c3bf 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -356,7 +356,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
struct address_space *mapping = gfs2_glock2aspace(gl);
truncate_inode_pages(mapping, 0);
if (ip) {
- set_bit(GIF_INVALID, &ip->i_flags);
+ set_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags);
forget_all_cached_acls(&ip->i_inode);
security_inode_invalidate_secctx(&ip->i_inode);
gfs2_dir_hash_inval(ip);
@@ -476,8 +476,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
error = gfs2_dinode_in(ip, dibh->b_data);
brelse(dibh);
- clear_bit(GIF_INVALID, &ip->i_flags);
-
return error;
}
@@ -493,16 +491,14 @@ static int inode_go_lock(struct gfs2_holder *gh)
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_inode *ip = gl->gl_object;
- int error = 0;
+ int error = 1;
- if (!ip)
- return 0;
+ if (!ip) /* no inode to populate - read it in later */
+ goto out;
- if (test_bit(GIF_INVALID, &ip->i_flags)) {
- error = gfs2_inode_refresh(ip);
- if (error)
- return error;
- }
+ error = gfs2_inode_refresh(ip);
+ if (error)
+ goto out;
if (gh->gh_state != LM_ST_DEFERRED)
inode_dio_wait(&ip->i_inode);
@@ -515,9 +511,10 @@ static int inode_go_lock(struct gfs2_holder *gh)
list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
spin_unlock(&sdp->sd_trunc_lock);
wake_up(&sdp->sd_quota_wait);
- return 1;
+ error = 1;
}
+out:
return error;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0fe49770166e..c98b37545f2c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -315,6 +315,7 @@ struct gfs2_alloc_parms {
enum {
GLF_LOCK = 1,
+ GLF_GO_LOCK_NEEDED = 2, /* needs go_lock */
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DEMOTE_IN_PROGRESS = 5,
@@ -324,6 +325,7 @@ enum {
GLF_REPLY_PENDING = 9,
GLF_INITIAL = 10,
GLF_FROZEN = 11,
+ GLF_GO_LOCK_IN_PROG = 12, /* go_lock happening now */
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
@@ -370,7 +372,6 @@ struct gfs2_glock {
};
enum {
- GIF_INVALID = 0,
GIF_QD_LOCKED = 1,
GIF_ALLOC_FAILED = 2,
GIF_SW_PAGED = 3,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6e15434b23ac..4e583e9a380c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
}
glock_set_object(ip->i_gl, ip);
- set_bit(GIF_INVALID, &ip->i_flags);
+ set_bit(GLF_GO_LOCK_NEEDED, &ip->i_gl->gl_flags);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail;
@@ -196,7 +196,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
if (type == DT_UNKNOWN) {
/* Inode glock must be locked already */
- error = gfs2_inode_refresh(GFS2_I(inode));
+ error = gfs2_go_lock(&i_gh);
if (error)
goto fail;
} else {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 96b2fbed6bf1..796f9e015218 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1261,7 +1261,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
return error;
}
-static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
+static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh)
{
u32 rl_flags;
@@ -1269,7 +1269,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
return 0;
if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
- return gfs2_rgrp_bh_get(rgd);
+ gfs2_go_lock(gh);
rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
rl_flags &= ~GFS2_RDF_MASK;
@@ -1312,6 +1312,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
bi->bi_bh = NULL;
}
}
+ set_bit(GLF_GO_LOCK_NEEDED, &rgd->rd_gl->gl_flags);
}
int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
@@ -2110,7 +2111,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
gfs2_rgrp_congested(rs->rs_rgd, loops))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb) {
- error = update_rgrp_lvb(rs->rs_rgd);
+ error = update_rgrp_lvb(rs->rs_rgd,
+ &ip->i_rgd_gh);
if (unlikely(error)) {
rgrp_unlock_local(rs->rs_rgd);
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
@@ -2126,7 +2128,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb)
- gfs2_rgrp_bh_get(rs->rs_rgd);
+ gfs2_go_lock(&ip->i_rgd_gh);
/* Get a reservation if we don't already have one */
if (!gfs2_rs_active(rs))
@@ -2762,8 +2764,6 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
void rgrp_lock_local(struct gfs2_rgrpd *rgd)
{
- GLOCK_BUG_ON(rgd->rd_gl, !gfs2_glock_is_held_excl(rgd->rd_gl) &&
- !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
mutex_lock(&rgd->rd_mutex);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 6e00d15ef0a8..20698232774a 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1244,8 +1244,10 @@ static enum dinode_demise evict_should_delete(struct inode *inode,
if (ret)
return SHOULD_NOT_DELETE_DINODE;
- if (test_bit(GIF_INVALID, &ip->i_flags)) {
- ret = gfs2_inode_refresh(ip);
+ if (test_bit(GLF_GO_LOCK_NEEDED, &ip->i_gl->gl_flags)) {
+ do {
+ ret = gfs2_go_lock(gh);
+ } while (ret == -EAGAIN);
if (ret)
return SHOULD_NOT_DELETE_DINODE;
}
--
2.31.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [Cluster-devel] [PATCH] gfs2: fix GL_SKIP node_scope problems
2021-09-29 13:21 [Cluster-devel] [PATCH] gfs2: fix GL_SKIP node_scope problems Bob Peterson
@ 2021-09-29 13:27 ` Bob Peterson
2021-09-29 15:35 ` Andreas Gruenbacher
1 sibling, 0 replies; 4+ messages in thread
From: Bob Peterson @ 2021-09-29 13:27 UTC (permalink / raw)
To: cluster-devel.redhat.com
On 9/29/21 8:21 AM, Bob Peterson wrote:
> Before this patch, when a glock was locked, the very first holder on the
> queue would unlock the lockref and call the go_lock glops function (if
> one existed), unless GL_SKIP was specified. When we introduced the new
> node-scope concept, we allowed multiple holders to lock glocks in EX mode
> and share the lock.
>
> But node-scope introduced a new problem: if the first holder has GL_SKIP
> and the next one does NOT, since it is not the first holder on the queue,
> the go_lock op was not called. Eventually the GL_SKIP holder may call the
> go_lock sub-function (e.g. gfs2_rgrp_bh_get) but there was still a
> window of time in which another non-GL_SKIP holder assumes the go_lock
> function had been called by the first holder. In the case of rgrp glocks,
> this led to a NULL pointer dereference on the buffer_heads.
>
> This patch tries to fix the problem by introducing two new glock flags:
>
> GLF_GO_LOCK_NEEDED, which keeps track of when the go_lock function needs
> to be called to "fill in" or "read" the object before it is referenced.
>
> GLF_GO_LOCK_IN_PROG which is used to determine when a process is
> in the process of reading in the object. Whenever a function needs to
> reference the object, it checks the GLF_GO_LOCK_NEEDED flag, and if
> set, it sets GLF_GO_LOCK_IN_PROG and calls the glops "go_lock" function.
>
> As before, the gl_lockref spin_lock is unlocked during the IO operation,
> which may take a relatively long amount of time to complete. While
> unlocked, if another process determines go_lock is still needed, it sees
> the GLF_GO_LOCK_IN_PROG flag is set, and waits for the go_lock glop
> operation to be completed. Once GLF_GO_LOCK_IN_PROG is cleared, it needs
> to check GLF_GO_LOCK_NEEDED again because the other process's go_lock
> operation may not have been successful.
>
> To faciliate this change, the go_lock section of function do_promote
> was extracted to its own new function, gfs2_go_lock. The reason we do
> this is because GL_SKIP callers often read in the object later.
> Before this patch, those GL_SKIP callers (like gfs2_inode_lookup and
> update_rgrp_lvb) called directly into the object-read functions
> (gfs2_inode_refresh and gfs2_rgrp_bh_get respectively), but that never
> cleared the new GLF_GO_LOCK_NEEDED flag. This patch changes those
> functions so they call into the new gfs2_go_lock directly, which takes
> care of all that.
>
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> fs/gfs2/glock.c | 136 +++++++++++++++++++++++++++++++++--------------
> fs/gfs2/glock.h | 1 +
> fs/gfs2/glops.c | 21 ++++----
> fs/gfs2/incore.h | 3 +-
> fs/gfs2/inode.c | 4 +-
> fs/gfs2/rgrp.c | 12 ++---
> fs/gfs2/super.c | 6 ++-
> 7 files changed, 121 insertions(+), 62 deletions(-)
(snip)
> @@ -2153,6 +2209,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
> *p++ = 'P';
> if (test_bit(GLF_FREEING, gflags))
> *p++ = 'x';
> + if (test_bit(GLF_GO_LOCK_NEEDED, gflags))
> + *p++ = 'g';
> *p = 0;
> return buf;
> }
Hi,
As soon as I sent this patch out I realized I forgot to add the second
new GLF bit, GLF_GO_LOCK_IN_PROG, to gflags2str. So the final version
will include it.
Also, this version passed 500 iterations of the failing test case and
a full run of xfstests.
Regards,
Bob Peterson
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Cluster-devel] [PATCH] gfs2: fix GL_SKIP node_scope problems
2021-09-29 13:21 [Cluster-devel] [PATCH] gfs2: fix GL_SKIP node_scope problems Bob Peterson
2021-09-29 13:27 ` Bob Peterson
@ 2021-09-29 15:35 ` Andreas Gruenbacher
2021-09-29 16:42 ` Andreas Gruenbacher
1 sibling, 1 reply; 4+ messages in thread
From: Andreas Gruenbacher @ 2021-09-29 15:35 UTC (permalink / raw)
To: cluster-devel.redhat.com
Hi Bob,
On Wed, Sep 29, 2021 at 3:21 PM Bob Peterson <rpeterso@redhat.com> wrote:
> Before this patch, when a glock was locked, the very first holder on the
> queue would unlock the lockref and call the go_lock glops function (if
> one existed), unless GL_SKIP was specified. When we introduced the new
> node-scope concept, we allowed multiple holders to lock glocks in EX mode
> and share the lock.
>
> But node-scope introduced a new problem: if the first holder has GL_SKIP
> and the next one does NOT, since it is not the first holder on the queue,
> the go_lock op was not called. Eventually the GL_SKIP holder may call the
> go_lock sub-function (e.g. gfs2_rgrp_bh_get) but there was still a
> window of time in which another non-GL_SKIP holder assumes the go_lock
> function had been called by the first holder. In the case of rgrp glocks,
> this led to a NULL pointer dereference on the buffer_heads.
>
> This patch tries to fix the problem by introducing two new glock flags:
>
> GLF_GO_LOCK_NEEDED, which keeps track of when the go_lock function needs
> to be called to "fill in" or "read" the object before it is referenced.
>
> GLF_GO_LOCK_IN_PROG which is used to determine when a process is
> in the process of reading in the object. Whenever a function needs to
> reference the object, it checks the GLF_GO_LOCK_NEEDED flag, and if
> set, it sets GLF_GO_LOCK_IN_PROG and calls the glops "go_lock" function.
>
> As before, the gl_lockref spin_lock is unlocked during the IO operation,
> which may take a relatively long amount of time to complete. While
> unlocked, if another process determines go_lock is still needed, it sees
> the GLF_GO_LOCK_IN_PROG flag is set, and waits for the go_lock glop
> operation to be completed. Once GLF_GO_LOCK_IN_PROG is cleared, it needs
> to check GLF_GO_LOCK_NEEDED again because the other process's go_lock
> operation may not have been successful.
>
> To faciliate this change, the go_lock section of function do_promote
> was extracted to its own new function, gfs2_go_lock. The reason we do
> this is because GL_SKIP callers often read in the object later.
> Before this patch, those GL_SKIP callers (like gfs2_inode_lookup and
> update_rgrp_lvb) called directly into the object-read functions
> (gfs2_inode_refresh and gfs2_rgrp_bh_get respectively), but that never
> cleared the new GLF_GO_LOCK_NEEDED flag. This patch changes those
> functions so they call into the new gfs2_go_lock directly, which takes
> care of all that.
this is looking much better now. The naming of the go_lock callback
and consequently the new flags is poor though; go_lock doesn't
actually do any locking. Something like revalidate would make a lot
more sense.
> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
> fs/gfs2/glock.c | 136 +++++++++++++++++++++++++++++++++--------------
> fs/gfs2/glock.h | 1 +
> fs/gfs2/glops.c | 21 ++++----
> fs/gfs2/incore.h | 3 +-
> fs/gfs2/inode.c | 4 +-
> fs/gfs2/rgrp.c | 12 ++---
> fs/gfs2/super.c | 6 ++-
> 7 files changed, 121 insertions(+), 62 deletions(-)
>
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index 4fcf340603e7..750ea07b4173 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -380,6 +380,66 @@ static void do_error(struct gfs2_glock *gl, const int ret)
> }
> }
>
> +/**
> + * gfs2_go_lock - Call the go_lock glops function
> + * @gl: The glock
> + *
> + * Note: the GLF bits used throughout are protected by the lockref spinlock.
They are? How?
> + *
> + * Returns: 0 if go_lock was successful, 2 if type specific operation is
> + * underway, -EAGAIN if secondary go_lock was waiting for another caller of
> + * go_lock, or error.
> + */
> +int gfs2_go_lock(struct gfs2_holder *gh)
> +__releases(&gl->gl_lockref.lock)
> +__acquires(&gl->gl_lockref.lock)
These annotations seem wrong.
> +{
> + struct gfs2_glock *gl = gh->gh_gl;
> + const struct gfs2_glock_operations *glops = gl->gl_ops;
> + int ret;
> +
> + /*
> + * Since we unlock the lockref lock, we set a flag to indicate
> + * go_lock is in progress.
> + */
> + if (test_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags)) {
> + wait_on_bit(&gl->gl_flags, GLF_GO_LOCK_IN_PROG,
> + TASK_UNINTERRUPTIBLE);
> + /*
> + * Here we just waited for a st go_lock to finish.
> + * But that go_lock may not have been successful, as when
> + * a process locks an inode glock _before_ it has an actual
> + * inode to populate. So we check again. This secondary waiter
> + * _might_ have an inode to populate.
> + */
> + return -EAGAIN;
I'm not sure why the retrying needs to be handled by the caller. Also,
there are callers now that don't handle -EAGAIN; what about those?
> + }
> +
> + set_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags);
> +
> + /* FIXME: eliminate this eventually */
> + ret = glops->go_lock(gh);
> +
> + switch(ret) {
> + case 0:
> + clear_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags);
> + break;
> + case 1:
> + ret = 2;
> + break;
> + default:
> + gh->gh_error = ret;
> + list_del_init(&gh->gh_list);
> + trace_gfs2_glock_queue(gh, 0);
> + gfs2_holder_wake(gh);
> + break;
> + }
> + clear_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags);
> + smp_mb__after_atomic();
> + wake_up_bit(&gl->gl_flags, GLF_GO_LOCK_IN_PROG);
> + return ret;
> +}
> +
> /**
> * do_promote - promote as many requests as possible on the current queue
> * @gl: The glock
> @@ -389,54 +449,50 @@ static void do_error(struct gfs2_glock *gl, const int ret)
> */
>
> static int do_promote(struct gfs2_glock *gl)
> -__releases(&gl->gl_lockref.lock)
> -__acquires(&gl->gl_lockref.lock)
> {
> - const struct gfs2_glock_operations *glops = gl->gl_ops;
> struct gfs2_holder *gh, *tmp;
> int first;
>
> + bool lock_released;
> int ret;
>
> restart:
> list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
> + lock_released = false;
> if (test_bit(HIF_HOLDER, &gh->gh_iflags))
> continue;
> - if (may_grant(gl, gh)) {
> - first = gfs2_first_holder(gh);
> - if (first && glops->go_lock) {
> - if (!(gh->gh_flags & GL_SKIP)) {
> - spin_unlock(&gl->gl_lockref.lock);
> - /* FIXME: eliminate this eventually */
> - ret = glops->go_lock(gh);
> - spin_lock(&gl->gl_lockref.lock);
> - if (ret) {
> - if (ret == 1)
> - return 2;
> - gh->gh_error = ret;
> - list_del_init(&gh->gh_list);
> - trace_gfs2_glock_queue(gh, 0);
> - gfs2_holder_wake(gh);
> - goto restart;
> - }
> - }
> - }
> - set_bit(HIF_HOLDER, &gh->gh_iflags);
> - trace_gfs2_promote(gh, first);
> - gfs2_holder_wake(gh);
> - /*
> - * If this was the first holder, we may have released
> - * the gl_lockref.lock, so the holders list may have
> - * changed. For that reason, we start again at the
> - * start of the holders queue.
> - */
> - if (first)
> - goto restart;
> - continue;
> + if (!may_grant(gl, gh)) {
> + if (gh->gh_list.prev == &gl->gl_holders)
> + return 1;
> + do_error(gl, 0);
> + break;
> }
> - if (gh->gh_list.prev == &gl->gl_holders)
> - return 1;
> - do_error(gl, 0);
> - break;
> +
> + first = gfs2_first_holder(gh);
> +check_go_lock:
> + if (test_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags) &&
> + !(gh->gh_flags & GL_SKIP)) {
> + spin_unlock(&gl->gl_lockref.lock);
> + ret = gfs2_go_lock(gh);
> + spin_lock(&gl->gl_lockref.lock);
I'm wondering if we can change go_lock to take a glock argument now.
After that, we should be able to move the go_lock call to when we're
done with the glock, when we've dropped the lockref spin lock already.
Also, maybe we can move waiting for the flag to gfs2_glock_nq
eventually.
> + if (ret) {
> + if (ret == -EAGAIN)
> + goto check_go_lock;
> + else
> + goto restart;
> + } else
> + lock_released = true;
> + }
> +
> + set_bit(HIF_HOLDER, &gh->gh_iflags);
> + trace_gfs2_promote(gh, first);
I think 'first' can be killed now.
> + gfs2_holder_wake(gh);
> + /*
> + * If we released the gl_lockref.lock the holders list may have
> + * changed. For that reason, we start again at the start of
> + * the holders queue.
> + */
> + if (lock_released)
> + goto restart;
> }
> return 0;
> }
> @@ -1064,7 +1120,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
>
> atomic_inc(&sdp->sd_glock_disposal);
> gl->gl_node.next = NULL;
> - gl->gl_flags = 0;
> + gl->gl_flags = glops->go_lock ? BIT(GLF_GO_LOCK_NEEDED) : 0;
> gl->gl_name = name;
> lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
> gl->gl_lockref.count = 1;
> @@ -2153,6 +2209,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
> *p++ = 'P';
> if (test_bit(GLF_FREEING, gflags))
> *p++ = 'x';
> + if (test_bit(GLF_GO_LOCK_NEEDED, gflags))
> + *p++ = 'g';
> *p = 0;
> return buf;
> }
> diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> index 8a09379dbf66..2e348e250fd2 100644
> --- a/fs/gfs2/glock.h
> +++ b/fs/gfs2/glock.h
> @@ -195,6 +195,7 @@ extern void gfs2_holder_reinit(unsigned int state, u16 flags,
> extern void gfs2_holder_uninit(struct gfs2_holder *gh);
> extern int gfs2_glock_nq(struct gfs2_holder *gh);
> extern int gfs2_glock_poll(struct gfs2_holder *gh);
> +extern int gfs2_go_lock(struct gfs2_holder *gh);
> extern int gfs2_glock_wait(struct gfs2_holder *gh);
> extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
> extern void gfs2_glock_dq(struct gfs2_holder *gh);
> diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> index 4b19f513570f..4823ba63c3bf 100644
> --- a/fs/gfs2/glops.c
> +++ b/fs/gfs2/glops.c
> @@ -356,7 +356,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
> struct address_space *mapping = gfs2_glock2aspace(gl);
> truncate_inode_pages(mapping, 0);
> if (ip) {
> - set_bit(GIF_INVALID, &ip->i_flags);
> + set_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags);
> forget_all_cached_acls(&ip->i_inode);
> security_inode_invalidate_secctx(&ip->i_inode);
> gfs2_dir_hash_inval(ip);
> @@ -476,8 +476,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
>
> error = gfs2_dinode_in(ip, dibh->b_data);
> brelse(dibh);
> - clear_bit(GIF_INVALID, &ip->i_flags);
Yikes. Can replacing GIF_INVALID with GLF_GO_LOCK_NEEDED go in a separate patch?
> -
> return error;
> }
>
> @@ -493,16 +491,14 @@ static int inode_go_lock(struct gfs2_holder *gh)
> struct gfs2_glock *gl = gh->gh_gl;
> struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> struct gfs2_inode *ip = gl->gl_object;
> - int error = 0;
> + int error = 1;
>
> - if (!ip)
> - return 0;
> + if (!ip) /* no inode to populate - read it in later */
> + goto out;
>
> - if (test_bit(GIF_INVALID, &ip->i_flags)) {
> - error = gfs2_inode_refresh(ip);
> - if (error)
> - return error;
> - }
> + error = gfs2_inode_refresh(ip);
> + if (error)
> + goto out;
>
> if (gh->gh_state != LM_ST_DEFERRED)
> inode_dio_wait(&ip->i_inode);
> @@ -515,9 +511,10 @@ static int inode_go_lock(struct gfs2_holder *gh)
> list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
> spin_unlock(&sdp->sd_trunc_lock);
> wake_up(&sdp->sd_quota_wait);
> - return 1;
> + error = 1;
> }
>
> +out:
> return error;
> }
>
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 0fe49770166e..c98b37545f2c 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -315,6 +315,7 @@ struct gfs2_alloc_parms {
>
> enum {
> GLF_LOCK = 1,
> + GLF_GO_LOCK_NEEDED = 2, /* needs go_lock */
> GLF_DEMOTE = 3,
> GLF_PENDING_DEMOTE = 4,
> GLF_DEMOTE_IN_PROGRESS = 5,
> @@ -324,6 +325,7 @@ enum {
> GLF_REPLY_PENDING = 9,
> GLF_INITIAL = 10,
> GLF_FROZEN = 11,
> + GLF_GO_LOCK_IN_PROG = 12, /* go_lock happening now */
> GLF_LRU = 13,
> GLF_OBJECT = 14, /* Used only for tracing */
> GLF_BLOCKING = 15,
> @@ -370,7 +372,6 @@ struct gfs2_glock {
> };
>
> enum {
> - GIF_INVALID = 0,
> GIF_QD_LOCKED = 1,
> GIF_ALLOC_FAILED = 2,
> GIF_SW_PAGED = 3,
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 6e15434b23ac..4e583e9a380c 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
> }
>
> glock_set_object(ip->i_gl, ip);
> - set_bit(GIF_INVALID, &ip->i_flags);
> + set_bit(GLF_GO_LOCK_NEEDED, &ip->i_gl->gl_flags);
> error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
> if (unlikely(error))
> goto fail;
> @@ -196,7 +196,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
>
> if (type == DT_UNKNOWN) {
> /* Inode glock must be locked already */
> - error = gfs2_inode_refresh(GFS2_I(inode));
> + error = gfs2_go_lock(&i_gh);
> if (error)
> goto fail;
> } else {
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index 96b2fbed6bf1..796f9e015218 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -1261,7 +1261,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
> return error;
> }
>
> -static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
> +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh)
> {
> u32 rl_flags;
>
> @@ -1269,7 +1269,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
> return 0;
>
> if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
> - return gfs2_rgrp_bh_get(rgd);
> + gfs2_go_lock(gh);
>
> rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
> rl_flags &= ~GFS2_RDF_MASK;
> @@ -1312,6 +1312,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
> bi->bi_bh = NULL;
> }
> }
> + set_bit(GLF_GO_LOCK_NEEDED, &rgd->rd_gl->gl_flags);
> }
>
> int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
> @@ -2110,7 +2111,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
> gfs2_rgrp_congested(rs->rs_rgd, loops))
> goto skip_rgrp;
> if (sdp->sd_args.ar_rgrplvb) {
> - error = update_rgrp_lvb(rs->rs_rgd);
> + error = update_rgrp_lvb(rs->rs_rgd,
> + &ip->i_rgd_gh);
> if (unlikely(error)) {
> rgrp_unlock_local(rs->rs_rgd);
> gfs2_glock_dq_uninit(&ip->i_rgd_gh);
> @@ -2126,7 +2128,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
> goto skip_rgrp;
>
> if (sdp->sd_args.ar_rgrplvb)
> - gfs2_rgrp_bh_get(rs->rs_rgd);
> + gfs2_go_lock(&ip->i_rgd_gh);
>
> /* Get a reservation if we don't already have one */
> if (!gfs2_rs_active(rs))
> @@ -2762,8 +2764,6 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
>
> void rgrp_lock_local(struct gfs2_rgrpd *rgd)
> {
> - GLOCK_BUG_ON(rgd->rd_gl, !gfs2_glock_is_held_excl(rgd->rd_gl) &&
> - !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
> mutex_lock(&rgd->rd_mutex);
> }
>
> diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> index 6e00d15ef0a8..20698232774a 100644
> --- a/fs/gfs2/super.c
> +++ b/fs/gfs2/super.c
> @@ -1244,8 +1244,10 @@ static enum dinode_demise evict_should_delete(struct inode *inode,
> if (ret)
> return SHOULD_NOT_DELETE_DINODE;
>
> - if (test_bit(GIF_INVALID, &ip->i_flags)) {
> - ret = gfs2_inode_refresh(ip);
> + if (test_bit(GLF_GO_LOCK_NEEDED, &ip->i_gl->gl_flags)) {
> + do {
> + ret = gfs2_go_lock(gh);
> + } while (ret == -EAGAIN);
> if (ret)
> return SHOULD_NOT_DELETE_DINODE;
> }
> --
> 2.31.1
>
Thanks,
Andreas
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Cluster-devel] [PATCH] gfs2: fix GL_SKIP node_scope problems
2021-09-29 15:35 ` Andreas Gruenbacher
@ 2021-09-29 16:42 ` Andreas Gruenbacher
0 siblings, 0 replies; 4+ messages in thread
From: Andreas Gruenbacher @ 2021-09-29 16:42 UTC (permalink / raw)
To: cluster-devel.redhat.com
On Wed, Sep 29, 2021 at 5:35 PM Andreas Gruenbacher <agruenba@redhat.com> wrote:
>
> Hi Bob,
>
> On Wed, Sep 29, 2021 at 3:21 PM Bob Peterson <rpeterso@redhat.com> wrote:
> > Before this patch, when a glock was locked, the very first holder on the
> > queue would unlock the lockref and call the go_lock glops function (if
> > one existed), unless GL_SKIP was specified. When we introduced the new
> > node-scope concept, we allowed multiple holders to lock glocks in EX mode
> > and share the lock.
> >
> > But node-scope introduced a new problem: if the first holder has GL_SKIP
> > and the next one does NOT, since it is not the first holder on the queue,
> > the go_lock op was not called. Eventually the GL_SKIP holder may call the
> > go_lock sub-function (e.g. gfs2_rgrp_bh_get) but there was still a
> > window of time in which another non-GL_SKIP holder assumes the go_lock
> > function had been called by the first holder. In the case of rgrp glocks,
> > this led to a NULL pointer dereference on the buffer_heads.
> >
> > This patch tries to fix the problem by introducing two new glock flags:
> >
> > GLF_GO_LOCK_NEEDED, which keeps track of when the go_lock function needs
> > to be called to "fill in" or "read" the object before it is referenced.
> >
> > GLF_GO_LOCK_IN_PROG which is used to determine when a process is
> > in the process of reading in the object. Whenever a function needs to
> > reference the object, it checks the GLF_GO_LOCK_NEEDED flag, and if
> > set, it sets GLF_GO_LOCK_IN_PROG and calls the glops "go_lock" function.
> >
> > As before, the gl_lockref spin_lock is unlocked during the IO operation,
> > which may take a relatively long amount of time to complete. While
> > unlocked, if another process determines go_lock is still needed, it sees
> > the GLF_GO_LOCK_IN_PROG flag is set, and waits for the go_lock glop
> > operation to be completed. Once GLF_GO_LOCK_IN_PROG is cleared, it needs
> > to check GLF_GO_LOCK_NEEDED again because the other process's go_lock
> > operation may not have been successful.
> >
> > To faciliate this change, the go_lock section of function do_promote
> > was extracted to its own new function, gfs2_go_lock. The reason we do
> > this is because GL_SKIP callers often read in the object later.
> > Before this patch, those GL_SKIP callers (like gfs2_inode_lookup and
> > update_rgrp_lvb) called directly into the object-read functions
> > (gfs2_inode_refresh and gfs2_rgrp_bh_get respectively), but that never
> > cleared the new GLF_GO_LOCK_NEEDED flag. This patch changes those
> > functions so they call into the new gfs2_go_lock directly, which takes
> > care of all that.
>
> this is looking much better now. The naming of the go_lock callback
> and consequently the new flags is poor though; go_lock doesn't
> actually do any locking. Something like revalidate would make a lot
> more sense.
>
> > Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> > ---
> > fs/gfs2/glock.c | 136 +++++++++++++++++++++++++++++++++--------------
> > fs/gfs2/glock.h | 1 +
> > fs/gfs2/glops.c | 21 ++++----
> > fs/gfs2/incore.h | 3 +-
> > fs/gfs2/inode.c | 4 +-
> > fs/gfs2/rgrp.c | 12 ++---
> > fs/gfs2/super.c | 6 ++-
> > 7 files changed, 121 insertions(+), 62 deletions(-)
> >
> > diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> > index 4fcf340603e7..750ea07b4173 100644
> > --- a/fs/gfs2/glock.c
> > +++ b/fs/gfs2/glock.c
> > @@ -380,6 +380,66 @@ static void do_error(struct gfs2_glock *gl, const int ret)
> > }
> > }
> >
> > +/**
> > + * gfs2_go_lock - Call the go_lock glops function
> > + * @gl: The glock
> > + *
> > + * Note: the GLF bits used throughout are protected by the lockref spinlock.
>
> They are? How?
>
> > + *
> > + * Returns: 0 if go_lock was successful, 2 if type specific operation is
> > + * underway, -EAGAIN if secondary go_lock was waiting for another caller of
> > + * go_lock, or error.
> > + */
> > +int gfs2_go_lock(struct gfs2_holder *gh)
> > +__releases(&gl->gl_lockref.lock)
> > +__acquires(&gl->gl_lockref.lock)
>
> These annotations seem wrong.
>
> > +{
> > + struct gfs2_glock *gl = gh->gh_gl;
> > + const struct gfs2_glock_operations *glops = gl->gl_ops;
> > + int ret;
> > +
> > + /*
> > + * Since we unlock the lockref lock, we set a flag to indicate
> > + * go_lock is in progress.
> > + */
> > + if (test_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags)) {
> > + wait_on_bit(&gl->gl_flags, GLF_GO_LOCK_IN_PROG,
> > + TASK_UNINTERRUPTIBLE);
> > + /*
> > + * Here we just waited for a st go_lock to finish.
> > + * But that go_lock may not have been successful, as when
> > + * a process locks an inode glock _before_ it has an actual
> > + * inode to populate. So we check again. This secondary waiter
> > + * _might_ have an inode to populate.
> > + */
> > + return -EAGAIN;
>
> I'm not sure why the retrying needs to be handled by the caller. Also,
> there are callers now that don't handle -EAGAIN; what about those?
>
> > + }
> > +
> > + set_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags);
> > +
> > + /* FIXME: eliminate this eventually */
> > + ret = glops->go_lock(gh);
> > +
> > + switch(ret) {
> > + case 0:
> > + clear_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags);
> > + break;
> > + case 1:
> > + ret = 2;
> > + break;
> > + default:
> > + gh->gh_error = ret;
> > + list_del_init(&gh->gh_list);
> > + trace_gfs2_glock_queue(gh, 0);
> > + gfs2_holder_wake(gh);
> > + break;
> > + }
> > + clear_bit(GLF_GO_LOCK_IN_PROG, &gl->gl_flags);
> > + smp_mb__after_atomic();
> > + wake_up_bit(&gl->gl_flags, GLF_GO_LOCK_IN_PROG);
> > + return ret;
> > +}
> > +
> > /**
> > * do_promote - promote as many requests as possible on the current queue
> > * @gl: The glock
> > @@ -389,54 +449,50 @@ static void do_error(struct gfs2_glock *gl, const int ret)
> > */
> >
> > static int do_promote(struct gfs2_glock *gl)
> > -__releases(&gl->gl_lockref.lock)
> > -__acquires(&gl->gl_lockref.lock)
> > {
> > - const struct gfs2_glock_operations *glops = gl->gl_ops;
> > struct gfs2_holder *gh, *tmp;
> > int first;
> >
> > + bool lock_released;
> > int ret;
> >
> > restart:
> > list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
> > + lock_released = false;
> > if (test_bit(HIF_HOLDER, &gh->gh_iflags))
> > continue;
> > - if (may_grant(gl, gh)) {
> > - first = gfs2_first_holder(gh);
> > - if (first && glops->go_lock) {
> > - if (!(gh->gh_flags & GL_SKIP)) {
> > - spin_unlock(&gl->gl_lockref.lock);
> > - /* FIXME: eliminate this eventually */
> > - ret = glops->go_lock(gh);
> > - spin_lock(&gl->gl_lockref.lock);
> > - if (ret) {
> > - if (ret == 1)
> > - return 2;
> > - gh->gh_error = ret;
> > - list_del_init(&gh->gh_list);
> > - trace_gfs2_glock_queue(gh, 0);
> > - gfs2_holder_wake(gh);
> > - goto restart;
> > - }
> > - }
> > - }
> > - set_bit(HIF_HOLDER, &gh->gh_iflags);
> > - trace_gfs2_promote(gh, first);
> > - gfs2_holder_wake(gh);
> > - /*
> > - * If this was the first holder, we may have released
> > - * the gl_lockref.lock, so the holders list may have
> > - * changed. For that reason, we start again at the
> > - * start of the holders queue.
> > - */
> > - if (first)
> > - goto restart;
> > - continue;
> > + if (!may_grant(gl, gh)) {
> > + if (gh->gh_list.prev == &gl->gl_holders)
> > + return 1;
> > + do_error(gl, 0);
> > + break;
> > }
> > - if (gh->gh_list.prev == &gl->gl_holders)
> > - return 1;
> > - do_error(gl, 0);
> > - break;
> > +
> > + first = gfs2_first_holder(gh);
> > +check_go_lock:
> > + if (test_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags) &&
> > + !(gh->gh_flags & GL_SKIP)) {
> > + spin_unlock(&gl->gl_lockref.lock);
> > + ret = gfs2_go_lock(gh);
> > + spin_lock(&gl->gl_lockref.lock);
>
> I'm wondering if we can change go_lock to take a glock argument now.
> After that, we should be able to move the go_lock call to when we're
> done with the glock, when we've dropped the lockref spin lock already.
> Also, maybe we can move waiting for the flag to gfs2_glock_nq
> eventually.
Actually, I think this entire if can go into gfs2_glock_nq.
Andreas
> > + if (ret) {
> > + if (ret == -EAGAIN)
> > + goto check_go_lock;
> > + else
> > + goto restart;
> > + } else
> > + lock_released = true;
> > + }
> > +
> > + set_bit(HIF_HOLDER, &gh->gh_iflags);
> > + trace_gfs2_promote(gh, first);
>
> I think 'first' can be killed now.
>
> > + gfs2_holder_wake(gh);
> > + /*
> > + * If we released the gl_lockref.lock the holders list may have
> > + * changed. For that reason, we start again at the start of
> > + * the holders queue.
> > + */
> > + if (lock_released)
> > + goto restart;
> > }
> > return 0;
> > }
> > @@ -1064,7 +1120,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
> >
> > atomic_inc(&sdp->sd_glock_disposal);
> > gl->gl_node.next = NULL;
> > - gl->gl_flags = 0;
> > + gl->gl_flags = glops->go_lock ? BIT(GLF_GO_LOCK_NEEDED) : 0;
> > gl->gl_name = name;
> > lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
> > gl->gl_lockref.count = 1;
> > @@ -2153,6 +2209,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
> > *p++ = 'P';
> > if (test_bit(GLF_FREEING, gflags))
> > *p++ = 'x';
> > + if (test_bit(GLF_GO_LOCK_NEEDED, gflags))
> > + *p++ = 'g';
> > *p = 0;
> > return buf;
> > }
> > diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
> > index 8a09379dbf66..2e348e250fd2 100644
> > --- a/fs/gfs2/glock.h
> > +++ b/fs/gfs2/glock.h
> > @@ -195,6 +195,7 @@ extern void gfs2_holder_reinit(unsigned int state, u16 flags,
> > extern void gfs2_holder_uninit(struct gfs2_holder *gh);
> > extern int gfs2_glock_nq(struct gfs2_holder *gh);
> > extern int gfs2_glock_poll(struct gfs2_holder *gh);
> > +extern int gfs2_go_lock(struct gfs2_holder *gh);
> > extern int gfs2_glock_wait(struct gfs2_holder *gh);
> > extern int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
> > extern void gfs2_glock_dq(struct gfs2_holder *gh);
> > diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
> > index 4b19f513570f..4823ba63c3bf 100644
> > --- a/fs/gfs2/glops.c
> > +++ b/fs/gfs2/glops.c
> > @@ -356,7 +356,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
> > struct address_space *mapping = gfs2_glock2aspace(gl);
> > truncate_inode_pages(mapping, 0);
> > if (ip) {
> > - set_bit(GIF_INVALID, &ip->i_flags);
> > + set_bit(GLF_GO_LOCK_NEEDED, &gl->gl_flags);
> > forget_all_cached_acls(&ip->i_inode);
> > security_inode_invalidate_secctx(&ip->i_inode);
> > gfs2_dir_hash_inval(ip);
> > @@ -476,8 +476,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
> >
> > error = gfs2_dinode_in(ip, dibh->b_data);
> > brelse(dibh);
> > - clear_bit(GIF_INVALID, &ip->i_flags);
>
> Yikes. Can replacing GIF_INVALID with GLF_GO_LOCK_NEEDED go in a separate patch?
>
> > -
> > return error;
> > }
> >
> > @@ -493,16 +491,14 @@ static int inode_go_lock(struct gfs2_holder *gh)
> > struct gfs2_glock *gl = gh->gh_gl;
> > struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
> > struct gfs2_inode *ip = gl->gl_object;
> > - int error = 0;
> > + int error = 1;
> >
> > - if (!ip)
> > - return 0;
> > + if (!ip) /* no inode to populate - read it in later */
> > + goto out;
> >
> > - if (test_bit(GIF_INVALID, &ip->i_flags)) {
> > - error = gfs2_inode_refresh(ip);
> > - if (error)
> > - return error;
> > - }
> > + error = gfs2_inode_refresh(ip);
> > + if (error)
> > + goto out;
> >
> > if (gh->gh_state != LM_ST_DEFERRED)
> > inode_dio_wait(&ip->i_inode);
> > @@ -515,9 +511,10 @@ static int inode_go_lock(struct gfs2_holder *gh)
> > list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
> > spin_unlock(&sdp->sd_trunc_lock);
> > wake_up(&sdp->sd_quota_wait);
> > - return 1;
> > + error = 1;
> > }
> >
> > +out:
> > return error;
> > }
> >
> > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> > index 0fe49770166e..c98b37545f2c 100644
> > --- a/fs/gfs2/incore.h
> > +++ b/fs/gfs2/incore.h
> > @@ -315,6 +315,7 @@ struct gfs2_alloc_parms {
> >
> > enum {
> > GLF_LOCK = 1,
> > + GLF_GO_LOCK_NEEDED = 2, /* needs go_lock */
> > GLF_DEMOTE = 3,
> > GLF_PENDING_DEMOTE = 4,
> > GLF_DEMOTE_IN_PROGRESS = 5,
> > @@ -324,6 +325,7 @@ enum {
> > GLF_REPLY_PENDING = 9,
> > GLF_INITIAL = 10,
> > GLF_FROZEN = 11,
> > + GLF_GO_LOCK_IN_PROG = 12, /* go_lock happening now */
> > GLF_LRU = 13,
> > GLF_OBJECT = 14, /* Used only for tracing */
> > GLF_BLOCKING = 15,
> > @@ -370,7 +372,6 @@ struct gfs2_glock {
> > };
> >
> > enum {
> > - GIF_INVALID = 0,
> > GIF_QD_LOCKED = 1,
> > GIF_ALLOC_FAILED = 2,
> > GIF_SW_PAGED = 3,
> > diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> > index 6e15434b23ac..4e583e9a380c 100644
> > --- a/fs/gfs2/inode.c
> > +++ b/fs/gfs2/inode.c
> > @@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
> > }
> >
> > glock_set_object(ip->i_gl, ip);
> > - set_bit(GIF_INVALID, &ip->i_flags);
> > + set_bit(GLF_GO_LOCK_NEEDED, &ip->i_gl->gl_flags);
> > error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
> > if (unlikely(error))
> > goto fail;
> > @@ -196,7 +196,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
> >
> > if (type == DT_UNKNOWN) {
> > /* Inode glock must be locked already */
> > - error = gfs2_inode_refresh(GFS2_I(inode));
> > + error = gfs2_go_lock(&i_gh);
> > if (error)
> > goto fail;
> > } else {
> > diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> > index 96b2fbed6bf1..796f9e015218 100644
> > --- a/fs/gfs2/rgrp.c
> > +++ b/fs/gfs2/rgrp.c
> > @@ -1261,7 +1261,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
> > return error;
> > }
> >
> > -static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
> > +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd, struct gfs2_holder *gh)
> > {
> > u32 rl_flags;
> >
> > @@ -1269,7 +1269,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
> > return 0;
> >
> > if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
> > - return gfs2_rgrp_bh_get(rgd);
> > + gfs2_go_lock(gh);
> >
> > rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
> > rl_flags &= ~GFS2_RDF_MASK;
> > @@ -1312,6 +1312,7 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
> > bi->bi_bh = NULL;
> > }
> > }
> > + set_bit(GLF_GO_LOCK_NEEDED, &rgd->rd_gl->gl_flags);
> > }
> >
> > int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
> > @@ -2110,7 +2111,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
> > gfs2_rgrp_congested(rs->rs_rgd, loops))
> > goto skip_rgrp;
> > if (sdp->sd_args.ar_rgrplvb) {
> > - error = update_rgrp_lvb(rs->rs_rgd);
> > + error = update_rgrp_lvb(rs->rs_rgd,
> > + &ip->i_rgd_gh);
> > if (unlikely(error)) {
> > rgrp_unlock_local(rs->rs_rgd);
> > gfs2_glock_dq_uninit(&ip->i_rgd_gh);
> > @@ -2126,7 +2128,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
> > goto skip_rgrp;
> >
> > if (sdp->sd_args.ar_rgrplvb)
> > - gfs2_rgrp_bh_get(rs->rs_rgd);
> > + gfs2_go_lock(&ip->i_rgd_gh);
> >
> > /* Get a reservation if we don't already have one */
> > if (!gfs2_rs_active(rs))
> > @@ -2762,8 +2764,6 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
> >
> > void rgrp_lock_local(struct gfs2_rgrpd *rgd)
> > {
> > - GLOCK_BUG_ON(rgd->rd_gl, !gfs2_glock_is_held_excl(rgd->rd_gl) &&
> > - !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
> > mutex_lock(&rgd->rd_mutex);
> > }
> >
> > diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
> > index 6e00d15ef0a8..20698232774a 100644
> > --- a/fs/gfs2/super.c
> > +++ b/fs/gfs2/super.c
> > @@ -1244,8 +1244,10 @@ static enum dinode_demise evict_should_delete(struct inode *inode,
> > if (ret)
> > return SHOULD_NOT_DELETE_DINODE;
> >
> > - if (test_bit(GIF_INVALID, &ip->i_flags)) {
> > - ret = gfs2_inode_refresh(ip);
> > + if (test_bit(GLF_GO_LOCK_NEEDED, &ip->i_gl->gl_flags)) {
> > + do {
> > + ret = gfs2_go_lock(gh);
> > + } while (ret == -EAGAIN);
> > if (ret)
> > return SHOULD_NOT_DELETE_DINODE;
> > }
> > --
> > 2.31.1
> >
>
> Thanks,
> Andreas
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-09-29 16:42 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-29 13:21 [Cluster-devel] [PATCH] gfs2: fix GL_SKIP node_scope problems Bob Peterson
2021-09-29 13:27 ` Bob Peterson
2021-09-29 15:35 ` Andreas Gruenbacher
2021-09-29 16:42 ` Andreas Gruenbacher
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.