* [PATCH v3] ceph: dump info about cap flushes when we're waiting too long for them
@ 2021-07-29 18:04 Jeff Layton
2021-07-30 10:09 ` Luis Henriques
0 siblings, 1 reply; 3+ messages in thread
From: Jeff Layton @ 2021-07-29 18:04 UTC (permalink / raw)
To: ceph-devel; +Cc: pdonnell, idryomov
We've had some cases of hung umounts in teuthology testing. It looks
like client is waiting for cap flushes to complete, but they aren't.
Add a field to the inode to track the highest cap flush tid seen for
that inode. Also, add a backpointer to the inode to the ceph_cap_flush
struct.
Change wait_caps_flush to wait 60s, and then dump info about the
condition of the list.
Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
inode onto the floor.
Reported-by: Patrick Donnelly <pdonnell@redhat.com>
URL: https://tracker.ceph.com/issues/51279
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
fs/ceph/caps.c | 17 +++++++++++++++--
fs/ceph/inode.c | 1 +
fs/ceph/mds_client.c | 31 +++++++++++++++++++++++++++++--
fs/ceph/super.h | 2 ++
4 files changed, 47 insertions(+), 4 deletions(-)
v3: more debugging has shown the client waiting on FLUSH_ACK messages
that seem to never have come. Add some new printks if we end up
dropping a FLUSH_ACK onto the floor.
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7ae83d06d48c..cb551c9e5867 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1829,6 +1829,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
swap(cf, ci->i_prealloc_cap_flush);
cf->caps = flushing;
cf->wake = wake;
+ cf->ci = ci;
spin_lock(&mdsc->cap_dirty_lock);
list_del_init(&ci->i_dirty_item);
@@ -3588,6 +3589,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
bool wake_ci = false;
bool wake_mdsc = false;
+ /* track latest cap flush ack seen for this inode */
+ if (flush_tid > ci->i_last_cap_flush_ack)
+ ci->i_last_cap_flush_ack = flush_tid;
+
list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
/* Is this the one that was flushed? */
if (cf->tid == flush_tid)
@@ -4116,7 +4121,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
(unsigned)seq);
if (!inode) {
- dout(" i don't have ino %llx\n", vino.ino);
+ if (op == CEPH_CAP_OP_FLUSH_ACK)
+ pr_info("%s: can't find ino %llx:%llx for flush_ack!\n",
+ __func__, vino.snap, vino.ino);
+ else
+ dout(" i don't have ino %llx\n", vino.ino);
if (op == CEPH_CAP_OP_IMPORT) {
cap = ceph_get_cap(mdsc, NULL);
@@ -4169,10 +4178,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
if (!cap) {
- dout(" no cap on %p ino %llx.%llx from mds%d\n",
+ dout(" no cap on %p ino %llx:%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode),
session->s_mds);
spin_unlock(&ci->i_ceph_lock);
+ if (op == CEPH_CAP_OP_FLUSH_ACK)
+ pr_info("%s: no cap on %p ino %llx:%llx from mds%d for flush_ack!\n",
+ __func__, inode, ceph_ino(inode),
+ ceph_snap(inode), session->s_mds);
goto flush_cap_releases;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1bd2cc015913..84e4f112fc45 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -499,6 +499,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&ci->i_cap_snaps);
ci->i_head_snapc = NULL;
ci->i_snap_caps = 0;
+ ci->i_last_cap_flush_ack = 0;
ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c3fa0c0e4f6c..fc26527b8059 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2064,6 +2064,24 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
return ret;
}
+static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
+{
+ struct ceph_cap_flush *cf;
+
+ pr_info("%s: still waiting for cap flushes through %llu\n:\n",
+ __func__, want_tid);
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
+ if (cf->tid > want_tid)
+ break;
+ pr_info("%llx:%llx %s %llu %llu %d\n",
+ ceph_vinop(&cf->ci->vfs_inode),
+ ceph_cap_string(cf->caps), cf->tid,
+ cf->ci->i_last_cap_flush_ack, cf->wake);
+ }
+ spin_unlock(&mdsc->cap_dirty_lock);
+}
+
/*
* flush all dirty inode data to disk.
*
@@ -2072,10 +2090,19 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
static void wait_caps_flush(struct ceph_mds_client *mdsc,
u64 want_flush_tid)
{
+ long ret;
+
dout("check_caps_flush want %llu\n", want_flush_tid);
- wait_event(mdsc->cap_flushing_wq,
- check_caps_flush(mdsc, want_flush_tid));
+ do {
+ ret = wait_event_timeout(mdsc->cap_flushing_wq,
+ check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
+ if (ret == 0)
+ dump_cap_flushes(mdsc, want_flush_tid);
+ else if (ret == 1)
+ pr_info("%s: condition evaluated to true after timeout!\n",
+ __func__);
+ } while (ret == 0);
dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 07eb542efa1d..d51d42a00f33 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -189,6 +189,7 @@ struct ceph_cap_flush {
bool wake; /* wake up flush waiters when finish ? */
struct list_head g_list; // global
struct list_head i_list; // per inode
+ struct ceph_inode_info *ci;
};
/*
@@ -388,6 +389,7 @@ struct ceph_inode_info {
struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
dirty|flushing caps */
unsigned i_snap_caps; /* cap bits for snapped files */
+ u64 i_last_cap_flush_ack; /* latest cap flush_ack tid for this inode */
unsigned long i_last_rd;
unsigned long i_last_wr;
--
2.31.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v3] ceph: dump info about cap flushes when we're waiting too long for them
2021-07-29 18:04 [PATCH v3] ceph: dump info about cap flushes when we're waiting too long for them Jeff Layton
@ 2021-07-30 10:09 ` Luis Henriques
2021-07-30 13:32 ` Jeff Layton
0 siblings, 1 reply; 3+ messages in thread
From: Luis Henriques @ 2021-07-30 10:09 UTC (permalink / raw)
To: Jeff Layton; +Cc: ceph-devel, pdonnell, idryomov
Jeff Layton <jlayton@kernel.org> writes:
> We've had some cases of hung umounts in teuthology testing. It looks
> like client is waiting for cap flushes to complete, but they aren't.
>
> Add a field to the inode to track the highest cap flush tid seen for
> that inode. Also, add a backpointer to the inode to the ceph_cap_flush
> struct.
>
> Change wait_caps_flush to wait 60s, and then dump info about the
> condition of the list.
>
> Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
> inode onto the floor.
>
> Reported-by: Patrick Donnelly <pdonnell@redhat.com>
> URL: https://tracker.ceph.com/issues/51279
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
> fs/ceph/caps.c | 17 +++++++++++++++--
> fs/ceph/inode.c | 1 +
> fs/ceph/mds_client.c | 31 +++++++++++++++++++++++++++++--
> fs/ceph/super.h | 2 ++
> 4 files changed, 47 insertions(+), 4 deletions(-)
>
> v3: more debugging has shown the client waiting on FLUSH_ACK messages
> that seem to never have come. Add some new printks if we end up
> dropping a FLUSH_ACK onto the floor.
Since you're adding debug printks, would it be worth to also add one in
mds_dispatch(), when __verify_registered_session(mdsc, s) < 0?
It's a wild guess, but the FLUSH_ACK could be dropped in that case too.
Not that I could spot any issue there, but since this seems to be
happening during umount...
Cheers,
--
Luis
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 7ae83d06d48c..cb551c9e5867 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1829,6 +1829,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
> swap(cf, ci->i_prealloc_cap_flush);
> cf->caps = flushing;
> cf->wake = wake;
> + cf->ci = ci;
>
> spin_lock(&mdsc->cap_dirty_lock);
> list_del_init(&ci->i_dirty_item);
> @@ -3588,6 +3589,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
> bool wake_ci = false;
> bool wake_mdsc = false;
>
> + /* track latest cap flush ack seen for this inode */
> + if (flush_tid > ci->i_last_cap_flush_ack)
> + ci->i_last_cap_flush_ack = flush_tid;
> +
> list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
> /* Is this the one that was flushed? */
> if (cf->tid == flush_tid)
> @@ -4116,7 +4121,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
> (unsigned)seq);
>
> if (!inode) {
> - dout(" i don't have ino %llx\n", vino.ino);
> + if (op == CEPH_CAP_OP_FLUSH_ACK)
> + pr_info("%s: can't find ino %llx:%llx for flush_ack!\n",
> + __func__, vino.snap, vino.ino);
> + else
> + dout(" i don't have ino %llx\n", vino.ino);
>
> if (op == CEPH_CAP_OP_IMPORT) {
> cap = ceph_get_cap(mdsc, NULL);
> @@ -4169,10 +4178,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
> spin_lock(&ci->i_ceph_lock);
> cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
> if (!cap) {
> - dout(" no cap on %p ino %llx.%llx from mds%d\n",
> + dout(" no cap on %p ino %llx:%llx from mds%d\n",
> inode, ceph_ino(inode), ceph_snap(inode),
> session->s_mds);
> spin_unlock(&ci->i_ceph_lock);
> + if (op == CEPH_CAP_OP_FLUSH_ACK)
> + pr_info("%s: no cap on %p ino %llx:%llx from mds%d for flush_ack!\n",
> + __func__, inode, ceph_ino(inode),
> + ceph_snap(inode), session->s_mds);
> goto flush_cap_releases;
> }
>
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 1bd2cc015913..84e4f112fc45 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -499,6 +499,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
> INIT_LIST_HEAD(&ci->i_cap_snaps);
> ci->i_head_snapc = NULL;
> ci->i_snap_caps = 0;
> + ci->i_last_cap_flush_ack = 0;
>
> ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
> for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index c3fa0c0e4f6c..fc26527b8059 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2064,6 +2064,24 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
> return ret;
> }
>
> +static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
> +{
> + struct ceph_cap_flush *cf;
> +
> + pr_info("%s: still waiting for cap flushes through %llu\n:\n",
> + __func__, want_tid);
> + spin_lock(&mdsc->cap_dirty_lock);
> + list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
> + if (cf->tid > want_tid)
> + break;
> + pr_info("%llx:%llx %s %llu %llu %d\n",
> + ceph_vinop(&cf->ci->vfs_inode),
> + ceph_cap_string(cf->caps), cf->tid,
> + cf->ci->i_last_cap_flush_ack, cf->wake);
> + }
> + spin_unlock(&mdsc->cap_dirty_lock);
> +}
> +
> /*
> * flush all dirty inode data to disk.
> *
> @@ -2072,10 +2090,19 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
> static void wait_caps_flush(struct ceph_mds_client *mdsc,
> u64 want_flush_tid)
> {
> + long ret;
> +
> dout("check_caps_flush want %llu\n", want_flush_tid);
>
> - wait_event(mdsc->cap_flushing_wq,
> - check_caps_flush(mdsc, want_flush_tid));
> + do {
> + ret = wait_event_timeout(mdsc->cap_flushing_wq,
> + check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
> + if (ret == 0)
> + dump_cap_flushes(mdsc, want_flush_tid);
> + else if (ret == 1)
> + pr_info("%s: condition evaluated to true after timeout!\n",
> + __func__);
> + } while (ret == 0);
>
> dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
> }
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 07eb542efa1d..d51d42a00f33 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -189,6 +189,7 @@ struct ceph_cap_flush {
> bool wake; /* wake up flush waiters when finish ? */
> struct list_head g_list; // global
> struct list_head i_list; // per inode
> + struct ceph_inode_info *ci;
> };
>
> /*
> @@ -388,6 +389,7 @@ struct ceph_inode_info {
> struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or
> dirty|flushing caps */
> unsigned i_snap_caps; /* cap bits for snapped files */
> + u64 i_last_cap_flush_ack; /* latest cap flush_ack tid for this inode */
>
> unsigned long i_last_rd;
> unsigned long i_last_wr;
> --
>
> 2.31.1
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v3] ceph: dump info about cap flushes when we're waiting too long for them
2021-07-30 10:09 ` Luis Henriques
@ 2021-07-30 13:32 ` Jeff Layton
0 siblings, 0 replies; 3+ messages in thread
From: Jeff Layton @ 2021-07-30 13:32 UTC (permalink / raw)
To: Luis Henriques; +Cc: ceph-devel, pdonnell, idryomov
On Fri, 2021-07-30 at 11:09 +0100, Luis Henriques wrote:
> Jeff Layton <jlayton@kernel.org> writes:
>
> > We've had some cases of hung umounts in teuthology testing. It looks
> > like client is waiting for cap flushes to complete, but they aren't.
> >
> > Add a field to the inode to track the highest cap flush tid seen for
> > that inode. Also, add a backpointer to the inode to the ceph_cap_flush
> > struct.
> >
> > Change wait_caps_flush to wait 60s, and then dump info about the
> > condition of the list.
> >
> > Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
> > inode onto the floor.
> >
> > Reported-by: Patrick Donnelly <pdonnell@redhat.com>
> > URL: https://tracker.ceph.com/issues/51279
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> > fs/ceph/caps.c | 17 +++++++++++++++--
> > fs/ceph/inode.c | 1 +
> > fs/ceph/mds_client.c | 31 +++++++++++++++++++++++++++++--
> > fs/ceph/super.h | 2 ++
> > 4 files changed, 47 insertions(+), 4 deletions(-)
> >
> > v3: more debugging has shown the client waiting on FLUSH_ACK messages
> > that seem to never have come. Add some new printks if we end up
> > dropping a FLUSH_ACK onto the floor.
>
> Since you're adding debug printks, would it be worth to also add one in
> mds_dispatch(), when __verify_registered_session(mdsc, s) < 0?
>
> It's a wild guess, but the FLUSH_ACK could be dropped in that case too.
> Not that I could spot any issue there, but since this seems to be
> happening during umount...
>
> Cheers,
Good point. I had looked at that case and had sort of dismissed it in
this situation, but you're probably right. I've added a similar pr_info
for that case and pushed it to the repo after a little testing here. I
won't bother re-posting it though since the change is trivial.
Thanks,
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-07-30 13:32 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-29 18:04 [PATCH v3] ceph: dump info about cap flushes when we're waiting too long for them Jeff Layton
2021-07-30 10:09 ` Luis Henriques
2021-07-30 13:32 ` Jeff Layton
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).