ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Luis Henriques <lhenriques@suse.de>
To: Jeff Layton <jlayton@kernel.org>
Cc: ceph-devel@vger.kernel.org, pdonnell@redhat.com, idryomov@gmail.com
Subject: Re: [PATCH v3] ceph: dump info about cap flushes when we're waiting too long for them
Date: Fri, 30 Jul 2021 11:09:04 +0100	[thread overview]
Message-ID: <87zgu4m7un.fsf@suse.de> (raw)
In-Reply-To: <20210729180442.177399-1-jlayton@kernel.org> (Jeff Layton's message of "Thu, 29 Jul 2021 14:04:42 -0400")

Jeff Layton <jlayton@kernel.org> writes:

> We've had some cases of hung umounts in teuthology testing. It looks
> like client is waiting for cap flushes to complete, but they aren't.
>
> Add a field to the inode to track the highest cap flush tid seen for
> that inode. Also, add a backpointer to the inode to the ceph_cap_flush
> struct.
>
> Change wait_caps_flush to wait 60s, and then dump info about the
> condition of the list.
>
> Also, print pr_info messages if we end up dropping a FLUSH_ACK for an
> inode onto the floor.
>
> Reported-by: Patrick Donnelly <pdonnell@redhat.com>
> URL: https://tracker.ceph.com/issues/51279
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/ceph/caps.c       | 17 +++++++++++++++--
>  fs/ceph/inode.c      |  1 +
>  fs/ceph/mds_client.c | 31 +++++++++++++++++++++++++++++--
>  fs/ceph/super.h      |  2 ++
>  4 files changed, 47 insertions(+), 4 deletions(-)
>
> v3: more debugging has shown the client waiting on FLUSH_ACK messages
>     that seem to never have come. Add some new printks if we end up
>     dropping a FLUSH_ACK onto the floor.

Since you're adding debug printks, would it be worth to also add one in
mds_dispatch(), when __verify_registered_session(mdsc, s) < 0?

It's a wild guess, but the FLUSH_ACK could be dropped in that case too.
Not that I could spot any issue there, but since this seems to be
happening during umount...

Cheers,
-- 
Luis


>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 7ae83d06d48c..cb551c9e5867 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1829,6 +1829,7 @@ static u64 __mark_caps_flushing(struct inode *inode,
>  	swap(cf, ci->i_prealloc_cap_flush);
>  	cf->caps = flushing;
>  	cf->wake = wake;
> +	cf->ci = ci;
>  
>  	spin_lock(&mdsc->cap_dirty_lock);
>  	list_del_init(&ci->i_dirty_item);
> @@ -3588,6 +3589,10 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
>  	bool wake_ci = false;
>  	bool wake_mdsc = false;
>  
> +	/* track latest cap flush ack seen for this inode */
> +	if (flush_tid > ci->i_last_cap_flush_ack)
> +		ci->i_last_cap_flush_ack = flush_tid;
> +
>  	list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
>  		/* Is this the one that was flushed? */
>  		if (cf->tid == flush_tid)
> @@ -4116,7 +4121,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,
>  	     (unsigned)seq);
>  
>  	if (!inode) {
> -		dout(" i don't have ino %llx\n", vino.ino);
> +		if (op == CEPH_CAP_OP_FLUSH_ACK)
> +			pr_info("%s: can't find ino %llx:%llx for flush_ack!\n",
> +				__func__, vino.snap, vino.ino);
> +		else
> +			dout(" i don't have ino %llx\n", vino.ino);
>  
>  		if (op == CEPH_CAP_OP_IMPORT) {
>  			cap = ceph_get_cap(mdsc, NULL);
> @@ -4169,10 +4178,14 @@ void ceph_handle_caps(struct ceph_mds_session *session,
>  	spin_lock(&ci->i_ceph_lock);
>  	cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
>  	if (!cap) {
> -		dout(" no cap on %p ino %llx.%llx from mds%d\n",
> +		dout(" no cap on %p ino %llx:%llx from mds%d\n",
>  		     inode, ceph_ino(inode), ceph_snap(inode),
>  		     session->s_mds);
>  		spin_unlock(&ci->i_ceph_lock);
> +		if (op == CEPH_CAP_OP_FLUSH_ACK)
> +			pr_info("%s: no cap on %p ino %llx:%llx from mds%d for flush_ack!\n",
> +				__func__, inode, ceph_ino(inode),
> +				ceph_snap(inode), session->s_mds);
>  		goto flush_cap_releases;
>  	}
>  
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 1bd2cc015913..84e4f112fc45 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -499,6 +499,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>  	INIT_LIST_HEAD(&ci->i_cap_snaps);
>  	ci->i_head_snapc = NULL;
>  	ci->i_snap_caps = 0;
> +	ci->i_last_cap_flush_ack = 0;
>  
>  	ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
>  	for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index c3fa0c0e4f6c..fc26527b8059 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2064,6 +2064,24 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
>  	return ret;
>  }
>  
> +static void dump_cap_flushes(struct ceph_mds_client *mdsc, u64 want_tid)
> +{
> +	struct ceph_cap_flush *cf;
> +
> +	pr_info("%s: still waiting for cap flushes through %llu\n:\n",
> +		__func__, want_tid);
> +	spin_lock(&mdsc->cap_dirty_lock);
> +	list_for_each_entry(cf, &mdsc->cap_flush_list, g_list) {
> +		if (cf->tid > want_tid)
> +			break;
> +		pr_info("%llx:%llx %s %llu %llu %d\n",
> +			ceph_vinop(&cf->ci->vfs_inode),
> +			ceph_cap_string(cf->caps), cf->tid,
> +			cf->ci->i_last_cap_flush_ack, cf->wake);
> +	}
> +	spin_unlock(&mdsc->cap_dirty_lock);
> +}
> +
>  /*
>   * flush all dirty inode data to disk.
>   *
> @@ -2072,10 +2090,19 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
>  static void wait_caps_flush(struct ceph_mds_client *mdsc,
>  			    u64 want_flush_tid)
>  {
> +	long ret;
> +
>  	dout("check_caps_flush want %llu\n", want_flush_tid);
>  
> -	wait_event(mdsc->cap_flushing_wq,
> -		   check_caps_flush(mdsc, want_flush_tid));
> +	do {
> +		ret = wait_event_timeout(mdsc->cap_flushing_wq,
> +			   check_caps_flush(mdsc, want_flush_tid), 60 * HZ);
> +		if (ret == 0)
> +			dump_cap_flushes(mdsc, want_flush_tid);
> +		else if (ret == 1)
> +			pr_info("%s: condition evaluated to true after timeout!\n",
> +				  __func__);
> +	} while (ret == 0);
>  
>  	dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
>  }
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 07eb542efa1d..d51d42a00f33 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -189,6 +189,7 @@ struct ceph_cap_flush {
>  	bool wake; /* wake up flush waiters when finish ? */
>  	struct list_head g_list; // global
>  	struct list_head i_list; // per inode
> +	struct ceph_inode_info *ci;
>  };
>  
>  /*
> @@ -388,6 +389,7 @@ struct ceph_inode_info {
>  	struct ceph_snap_context *i_head_snapc;  /* set if wr_buffer_head > 0 or
>  						    dirty|flushing caps */
>  	unsigned i_snap_caps;           /* cap bits for snapped files */
> +	u64 i_last_cap_flush_ack;		/* latest cap flush_ack tid for this inode */
>  
>  	unsigned long i_last_rd;
>  	unsigned long i_last_wr;
> -- 
>
> 2.31.1
>

  reply	other threads:[~2021-07-30 10:09 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-29 18:04 Jeff Layton
2021-07-30 10:09 ` Luis Henriques [this message]
2021-07-30 13:32   ` Jeff Layton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87zgu4m7un.fsf@suse.de \
    --to=lhenriques@suse.de \
    --cc=ceph-devel@vger.kernel.org \
    --cc=idryomov@gmail.com \
    --cc=jlayton@kernel.org \
    --cc=pdonnell@redhat.com \
    --subject='Re: [PATCH v3] ceph: dump info about cap flushes when we'\''re waiting too long for them' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).