Re: [PATCH v3] ceph: defer flushing the capsnap if the Fb is used

From: Xiubo Li <xiubli@redhat.com>
To: Jeff Layton <jlayton@kernel.org>
Cc: idryomov@gmail.com, pdonnell@redhat.com, ceph-devel@vger.kernel.org
Subject: Re: [PATCH v3] ceph: defer flushing the capsnap if the Fb is used
Date: Mon, 18 Jan 2021 17:10:47 +0800	[thread overview]
Message-ID: <376245cf-a60d-6ddb-6ab3-894a491b854e@redhat.com> (raw)
In-Reply-To: <f698d039251d444eec334b119b5ae0b0dd101a21.camel@kernel.org>

On 2021/1/13 5:48, Jeff Layton wrote:
> On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> If the Fb cap is used it means the current inode is flushing the
>> dirty data to OSD, just defer flushing the capsnap.
>>
>> URL: https://tracker.ceph.com/issues/48679
>> URL: https://tracker.ceph.com/issues/48640
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V3:
>> - Add more comments about putting the inode ref
>> - A small change about the code style
>>
>> V2:
>> - Fix inode reference leak bug
>>
>>   fs/ceph/caps.c | 32 +++++++++++++++++++-------------
>>   fs/ceph/snap.c |  6 +++---
>>   2 files changed, 22 insertions(+), 16 deletions(-)
>>
> Hi Xiubo,
>
> This patch seems to cause hangs in some xfstests (generic/013, in
> particular). I'll take a closer look when I have a chance, but I'm
> dropping this for now.

Okay.

BTW, what's your test commands to reproduce it ? I will take a look when 
I am free these days or later.

BRs

>
> -- Jeff
>
>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index abbf48fc6230..b00234cf3b04 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>>   {
>>   	struct inode *inode = &ci->vfs_inode;
>>   	int last = 0, put = 0, flushsnaps = 0, wake = 0;
>> +	bool check_flushsnaps = false;
>>   
>>
>>
>>
>>   	spin_lock(&ci->i_ceph_lock);
>>   	if (had & CEPH_CAP_PIN)
>> @@ -3063,26 +3064,17 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>>   	if (had & CEPH_CAP_FILE_BUFFER) {
>>   		if (--ci->i_wb_ref == 0) {
>>   			last++;
>> +			/* put the ref held by ceph_take_cap_refs() */
>>   			put++;
>> +			check_flushsnaps = true;
>>   		}
>>   		dout("put_cap_refs %p wb %d -> %d (?)\n",
>>   		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
>>   	}
>> -	if (had & CEPH_CAP_FILE_WR)
>> +	if (had & CEPH_CAP_FILE_WR) {
>>   		if (--ci->i_wr_ref == 0) {
>>   			last++;
>> -			if (__ceph_have_pending_cap_snap(ci)) {
>> -				struct ceph_cap_snap *capsnap =
>> -					list_last_entry(&ci->i_cap_snaps,
>> -							struct ceph_cap_snap,
>> -							ci_item);
>> -				capsnap->writing = 0;
>> -				if (ceph_try_drop_cap_snap(ci, capsnap))
>> -					put++;
>> -				else if (__ceph_finish_cap_snap(ci, capsnap))
>> -					flushsnaps = 1;
>> -				wake = 1;
>> -			}
>> +			check_flushsnaps = true;
>>   			if (ci->i_wrbuffer_ref_head == 0 &&
>>   			    ci->i_dirty_caps == 0 &&
>>   			    ci->i_flushing_caps == 0) {
>> @@ -3094,6 +3086,20 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>>   			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
>>   				drop_inode_snap_realm(ci);
>>   		}
>> +	}
>> +	if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
>> +		struct ceph_cap_snap *capsnap =
>> +			list_last_entry(&ci->i_cap_snaps,
>> +					struct ceph_cap_snap,
>> +					ci_item);
>> +		capsnap->writing = 0;
>> +		if (ceph_try_drop_cap_snap(ci, capsnap))
>> +		        /* put the ref held by ceph_queue_cap_snap() */
>> +			put++;
>> +		else if (__ceph_finish_cap_snap(ci, capsnap))
>> +			flushsnaps = 1;
>> +		wake = 1;
>> +	}
>>   	spin_unlock(&ci->i_ceph_lock);
>>   
>>
>>
>>
>>   	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>> index b611f829cb61..639fb91cc9db 100644
>> --- a/fs/ceph/snap.c
>> +++ b/fs/ceph/snap.c
>> @@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>>   	capsnap->context = old_snapc;
>>   	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
>>   
>>
>>
>>
>> -	if (used & CEPH_CAP_FILE_WR) {
>> +	if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
>>   		dout("queue_cap_snap %p cap_snap %p snapc %p"
>> -		     " seq %llu used WR, now pending\n", inode,
>> -		     capsnap, old_snapc, old_snapc->seq);
>> +		     " seq %llu used WR | BUFFFER, now pending\n",
>> +		     inode, capsnap, old_snapc, old_snapc->seq);
>>   		capsnap->writing = 1;
>>   	} else {
>>   		/* note mtime, size NOW. */