All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH 1/2] Ocfs2: Journaling i_flags and i_orphaned_slot when adding inode to orphan dir.
@ 2010-02-21  8:29 Tristan Ye
  2010-02-21  8:29 ` [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly Tristan Ye
  0 siblings, 1 reply; 13+ messages in thread
From: Tristan Ye @ 2010-02-21  8:29 UTC (permalink / raw)
  To: ocfs2-devel

Currently, some callers were missing to journal the dirty inode after
adding it to orphan dir.

Now we're going to journal such modifications within the ocfs2_orphan_add()
itself, It's safe to do so, though some existing caller may duplicate this,
and it makes the logic look more straightforward anyway.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
---
 fs/ocfs2/namei.c |   32 +++++++++++++++++++++++++++-----
 1 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 50fb26a..e501adf 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -84,7 +84,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
 static int ocfs2_orphan_add(struct ocfs2_super *osb,
 			    handle_t *handle,
 			    struct inode *inode,
-			    struct ocfs2_dinode *fe,
+			    struct buffer_head *fe_bh,
 			    char *name,
 			    struct ocfs2_dir_lookup_result *lookup,
 			    struct inode *orphan_dir_inode);
@@ -877,7 +877,7 @@ static int ocfs2_unlink(struct inode *dir,
 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
 	if (inode_is_unlinkable(inode)) {
-		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
+		status = ocfs2_orphan_add(osb, handle, inode, fe_bh, orphan_name,
 					  &orphan_insert, orphan_dir);
 		if (status < 0) {
 			mlog_errno(status);
@@ -1295,7 +1295,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (S_ISDIR(new_inode->i_mode) ||
 		    (ocfs2_read_links_count(newfe) == 1)) {
 			status = ocfs2_orphan_add(osb, handle, new_inode,
-						  newfe, orphan_name,
+						  newfe_bh, orphan_name,
 						  &orphan_insert, orphan_dir);
 			if (status < 0) {
 				mlog_errno(status);
@@ -1909,7 +1909,7 @@ leave:
 static int ocfs2_orphan_add(struct ocfs2_super *osb,
 			    handle_t *handle,
 			    struct inode *inode,
-			    struct ocfs2_dinode *fe,
+			    struct buffer_head *fe_bh,
 			    char *name,
 			    struct ocfs2_dir_lookup_result *lookup,
 			    struct inode *orphan_dir_inode)
@@ -1917,6 +1917,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	struct buffer_head *orphan_dir_bh = NULL;
 	int status = 0;
 	struct ocfs2_dinode *orphan_fe;
+	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
 
 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
 
@@ -1957,6 +1958,21 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 		goto leave;
 	}
 
+	/* 
+	 * We're going to journal the change of i_flags and i_orphaned_slot.
+	 * It's safe anyway, though some callers may duplicate the journaling.
+	 * Journaling within the func just make the logic look more
+	 * straightforward.
+	 */
+	status = ocfs2_journal_access_di(handle,
+					 INODE_CACHE(inode),
+					 fe_bh,
+					 OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
 
 	/* Record which orphan dir our inode now resides
@@ -1964,6 +1980,12 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
 	 * dir to lock. */
 	fe->i_orphaned_slot = cpu_to_le16(osb->slot_num);
 
+	status = ocfs2_journal_dirty(handle, fe_bh);
+	if (status < 0) {
+		mlog_errno(status);
+		goto leave;
+	}
+
 	mlog(0, "Inode %llu orphaned in slot %d\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
 
@@ -2125,7 +2147,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
 	}
 
 	di = (struct ocfs2_dinode *)new_di_bh->b_data;
-	status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name,
+	status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name,
 				  &orphan_insert, orphan_dir);
 	if (status < 0) {
 		mlog_errno(status);
-- 
1.5.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-21  8:29 [Ocfs2-devel] [PATCH 1/2] Ocfs2: Journaling i_flags and i_orphaned_slot when adding inode to orphan dir Tristan Ye
@ 2010-02-21  8:29 ` Tristan Ye
  2010-02-21  8:55   ` Tao Ma
  2010-02-26 23:28   ` Joel Becker
  0 siblings, 2 replies; 13+ messages in thread
From: Tristan Ye @ 2010-02-21  8:29 UTC (permalink / raw)
  To: ocfs2-devel

Current ocfs2 semantic for reflinking a file firstly create a
new orphan_inode in orphan_dir, then remove it to target dir
after refcounting operation done, these 2 steps makes logic
straightfoward, and guarantee a crash during reflinking can
be replayed(half-refcounted inode can be removed), while it
brings us another issue cause these 2 steps is acquiring the
orphan_dir lock respectively, the problem is, orphan_scan()
may detect the half-refcounted inode in orphan_dir as its
proper candidates to wipe off in a later time. actually it's
not of course, we'd handle this correctly.

Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
---
 fs/ocfs2/inode.c |   32 ++++++++++++++++++++++++--------
 1 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 88459bd..61fb546 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
 	di = (struct ocfs2_dinode *) di_bh->b_data;
 	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
 		/* for lack of a better error? */
-		status = -EEXIST;
-		mlog(ML_ERROR,
-		     "Inode %llu (on-disk %llu) not orphaned! "
-		     "Disk flags  0x%x, inode flags 0x%x\n",
-		     (unsigned long long)oi->ip_blkno,
-		     (unsigned long long)le64_to_cpu(di->i_blkno),
-		     le32_to_cpu(di->i_flags), oi->ip_flags);
-		goto bail;
+		if (!(di->i_dyn_features &
+		      cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
+			status = -EEXIST;
+			mlog(ML_ERROR,
+			     "Inode %llu (on-disk %llu) not orphaned! "
+			     "Disk flags  0x%x, inode flags 0x%x\n",
+			     (unsigned long long)oi->ip_blkno,
+			     (unsigned long long)le64_to_cpu(di->i_blkno),
+			     le32_to_cpu(di->i_flags), oi->ip_flags);
+			goto bail;
+		} else {
+			/*
+			 * It did happen to us, though it's a rare case:
+			 * orphan_scan() detects the half-refcounted inode
+			 * in orphan_dir, and delete_inode() attempts to
+			 * wipe it after reflink operation done later. now
+			 * we're not allowed to delete such a valid inode,
+			 * instead, just bail out.
+			 */
+			mlog(0, "Skipping delete of %llu because it's a "
+			     "reflinked inode\n",
+			     (unsigned long long)oi->ip_blkno);
+			goto bail;
+		}
 	}
 
 	/* has someone already deleted us?! baaad... */
-- 
1.5.5

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-21  8:29 ` [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly Tristan Ye
@ 2010-02-21  8:55   ` Tao Ma
  2010-02-21  8:58     ` tristan
  2010-02-26 23:28   ` Joel Becker
  1 sibling, 1 reply; 13+ messages in thread
From: Tao Ma @ 2010-02-21  8:55 UTC (permalink / raw)
  To: ocfs2-devel

Hi tristan,

Tristan Ye wrote:
> Current ocfs2 semantic for reflinking a file firstly create a
> new orphan_inode in orphan_dir, then remove it to target dir
> after refcounting operation done, these 2 steps makes logic
> straightfoward, and guarantee a crash during reflinking can
> be replayed(half-refcounted inode can be removed), while it
> brings us another issue cause these 2 steps is acquiring the
> orphan_dir lock respectively, the problem is, orphan_scan()
> may detect the half-refcounted inode in orphan_dir as its
> proper candidates to wipe off in a later time. actually it's
> not of course, we'd handle this correctly.
> 
> Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
> ---
>  fs/ocfs2/inode.c |   32 ++++++++++++++++++++++++--------
>  1 files changed, 24 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
> index 88459bd..61fb546 100644
> --- a/fs/ocfs2/inode.c
> +++ b/fs/ocfs2/inode.c
> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
>  	di = (struct ocfs2_dinode *) di_bh->b_data;
>  	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>  		/* for lack of a better error? */
> -		status = -EEXIST;
> -		mlog(ML_ERROR,
> -		     "Inode %llu (on-disk %llu) not orphaned! "
> -		     "Disk flags  0x%x, inode flags 0x%x\n",
> -		     (unsigned long long)oi->ip_blkno,
> -		     (unsigned long long)le64_to_cpu(di->i_blkno),
> -		     le32_to_cpu(di->i_flags), oi->ip_flags);
> -		goto bail;
> +		if (!(di->i_dyn_features &
> +		      cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
> +			status = -EEXIST;
> +			mlog(ML_ERROR,
> +			     "Inode %llu (on-disk %llu) not orphaned! "
> +			     "Disk flags  0x%x, inode flags 0x%x\n",
> +			     (unsigned long long)oi->ip_blkno,
> +			     (unsigned long long)le64_to_cpu(di->i_blkno),
> +			     le32_to_cpu(di->i_flags), oi->ip_flags);
> +			goto bail;
> +		} else {
> +			/*
> +			 * It did happen to us, though it's a rare case:
> +			 * orphan_scan() detects the half-refcounted inode
> +			 * in orphan_dir, and delete_inode() attempts to
> +			 * wipe it after reflink operation done later. now
> +			 * we're not allowed to delete such a valid inode,
> +			 * instead, just bail out.
> +			 */
> +			mlog(0, "Skipping delete of %llu because it's a "
> +			     "reflinked inode\n",
> +			     (unsigned long long)oi->ip_blkno);
> +			goto bail;
> +		}
We set i_dyn_features when when attach the tree to the file. This is 
very early. So I am curious why i_dyn_features can tell you whether this 
isn't a crashed reflink inode? Oh, you mean you will never delete a 
reflinked inode in orphan scan?

Regards,
Tao

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-21  8:55   ` Tao Ma
@ 2010-02-21  8:58     ` tristan
  2010-02-21  9:04       ` Tao Ma
  0 siblings, 1 reply; 13+ messages in thread
From: tristan @ 2010-02-21  8:58 UTC (permalink / raw)
  To: ocfs2-devel

Tao Ma wrote:
> Hi tristan,
>
> Tristan Ye wrote:
>> Current ocfs2 semantic for reflinking a file firstly create a
>> new orphan_inode in orphan_dir, then remove it to target dir
>> after refcounting operation done, these 2 steps makes logic
>> straightfoward, and guarantee a crash during reflinking can
>> be replayed(half-refcounted inode can be removed), while it
>> brings us another issue cause these 2 steps is acquiring the
>> orphan_dir lock respectively, the problem is, orphan_scan()
>> may detect the half-refcounted inode in orphan_dir as its
>> proper candidates to wipe off in a later time. actually it's
>> not of course, we'd handle this correctly.
>>
>> Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
>> ---
>>  fs/ocfs2/inode.c |   32 ++++++++++++++++++++++++--------
>>  1 files changed, 24 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>> index 88459bd..61fb546 100644
>> --- a/fs/ocfs2/inode.c
>> +++ b/fs/ocfs2/inode.c
>> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode 
>> *inode,
>>      di = (struct ocfs2_dinode *) di_bh->b_data;
>>      if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>>          /* for lack of a better error? */
>> -        status = -EEXIST;
>> -        mlog(ML_ERROR,
>> -             "Inode %llu (on-disk %llu) not orphaned! "
>> -             "Disk flags  0x%x, inode flags 0x%x\n",
>> -             (unsigned long long)oi->ip_blkno,
>> -             (unsigned long long)le64_to_cpu(di->i_blkno),
>> -             le32_to_cpu(di->i_flags), oi->ip_flags);
>> -        goto bail;
>> +        if (!(di->i_dyn_features &
>> +              cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
>> +            status = -EEXIST;
>> +            mlog(ML_ERROR,
>> +                 "Inode %llu (on-disk %llu) not orphaned! "
>> +                 "Disk flags  0x%x, inode flags 0x%x\n",
>> +                 (unsigned long long)oi->ip_blkno,
>> +                 (unsigned long long)le64_to_cpu(di->i_blkno),
>> +                 le32_to_cpu(di->i_flags), oi->ip_flags);
>> +            goto bail;
>> +        } else {
>> +            /*
>> +             * It did happen to us, though it's a rare case:
>> +             * orphan_scan() detects the half-refcounted inode
>> +             * in orphan_dir, and delete_inode() attempts to
>> +             * wipe it after reflink operation done later. now
>> +             * we're not allowed to delete such a valid inode,
>> +             * instead, just bail out.
>> +             */
>> +            mlog(0, "Skipping delete of %llu because it's a "
>> +                 "reflinked inode\n",
>> +                 (unsigned long long)oi->ip_blkno);
>> +            goto bail;
>> +        }
> We set i_dyn_features when when attach the tree to the file. This is 
> very early. So I am curious why i_dyn_features can tell you whether 
> this isn't a crashed reflink inode? Oh, you mean you will never delete 
> a reflinked inode in orphan scan?

Tao,

Not exactly, if it's reflink operation was crashed somehow, it's 
OCFS2_ORPHANED_FL must be set:), and if it was the case, we then will 
never skip the deletion which is really needed.


Regards,
Tristan.

>
> Regards,
> Tao

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-21  8:58     ` tristan
@ 2010-02-21  9:04       ` Tao Ma
  2010-02-23  0:31         ` Sunil Mushran
  0 siblings, 1 reply; 13+ messages in thread
From: Tao Ma @ 2010-02-21  9:04 UTC (permalink / raw)
  To: ocfs2-devel



tristan wrote:
> Tao Ma wrote:
>> Hi tristan,
>>
>> Tristan Ye wrote:
>>> Current ocfs2 semantic for reflinking a file firstly create a
>>> new orphan_inode in orphan_dir, then remove it to target dir
>>> after refcounting operation done, these 2 steps makes logic
>>> straightfoward, and guarantee a crash during reflinking can
>>> be replayed(half-refcounted inode can be removed), while it
>>> brings us another issue cause these 2 steps is acquiring the
>>> orphan_dir lock respectively, the problem is, orphan_scan()
>>> may detect the half-refcounted inode in orphan_dir as its
>>> proper candidates to wipe off in a later time. actually it's
>>> not of course, we'd handle this correctly.
>>>
>>> Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
>>> ---
>>>  fs/ocfs2/inode.c |   32 ++++++++++++++++++++++++--------
>>>  1 files changed, 24 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>>> index 88459bd..61fb546 100644
>>> --- a/fs/ocfs2/inode.c
>>> +++ b/fs/ocfs2/inode.c
>>> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode 
>>> *inode,
>>>      di = (struct ocfs2_dinode *) di_bh->b_data;
>>>      if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>>>          /* for lack of a better error? */
>>> -        status = -EEXIST;
>>> -        mlog(ML_ERROR,
>>> -             "Inode %llu (on-disk %llu) not orphaned! "
>>> -             "Disk flags  0x%x, inode flags 0x%x\n",
>>> -             (unsigned long long)oi->ip_blkno,
>>> -             (unsigned long long)le64_to_cpu(di->i_blkno),
>>> -             le32_to_cpu(di->i_flags), oi->ip_flags);
>>> -        goto bail;
>>> +        if (!(di->i_dyn_features &
>>> +              cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
>>> +            status = -EEXIST;
>>> +            mlog(ML_ERROR,
>>> +                 "Inode %llu (on-disk %llu) not orphaned! "
>>> +                 "Disk flags  0x%x, inode flags 0x%x\n",
>>> +                 (unsigned long long)oi->ip_blkno,
>>> +                 (unsigned long long)le64_to_cpu(di->i_blkno),
>>> +                 le32_to_cpu(di->i_flags), oi->ip_flags);
>>> +            goto bail;
>>> +        } else {
>>> +            /*
>>> +             * It did happen to us, though it's a rare case:
>>> +             * orphan_scan() detects the half-refcounted inode
>>> +             * in orphan_dir, and delete_inode() attempts to
>>> +             * wipe it after reflink operation done later. now
>>> +             * we're not allowed to delete such a valid inode,
>>> +             * instead, just bail out.
>>> +             */
>>> +            mlog(0, "Skipping delete of %llu because it's a "
>>> +                 "reflinked inode\n",
>>> +                 (unsigned long long)oi->ip_blkno);
>>> +            goto bail;
>>> +        }
>> We set i_dyn_features when when attach the tree to the file. This is 
>> very early. So I am curious why i_dyn_features can tell you whether 
>> this isn't a crashed reflink inode? Oh, you mean you will never delete 
>> a reflinked inode in orphan scan?
> 
> Tao,
> 
> Not exactly, if it's reflink operation was crashed somehow, it's 
> OCFS2_ORPHANED_FL must be set:), and if it was the case, we then will 
> never skip the deletion which is really needed.
oh, so this is really a hack for reflink. I am not sure whether it is 
appropriate. So let Joel decide whether it is OK or not. ;)

Regards,
Tao

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-21  9:04       ` Tao Ma
@ 2010-02-23  0:31         ` Sunil Mushran
  2010-02-23  0:51           ` Tao Ma
  2010-02-23  1:34           ` tristan
  0 siblings, 2 replies; 13+ messages in thread
From: Sunil Mushran @ 2010-02-23  0:31 UTC (permalink / raw)
  To: ocfs2-devel

Tao Ma wrote:
>
> tristan wrote:
>> Tao Ma wrote:
>>> Hi tristan,
>>>
>>> Tristan Ye wrote:
>>>> Current ocfs2 semantic for reflinking a file firstly create a
>>>> new orphan_inode in orphan_dir, then remove it to target dir
>>>> after refcounting operation done, these 2 steps makes logic
>>>> straightfoward, and guarantee a crash during reflinking can
>>>> be replayed(half-refcounted inode can be removed), while it
>>>> brings us another issue cause these 2 steps is acquiring the
>>>> orphan_dir lock respectively, the problem is, orphan_scan()
>>>> may detect the half-refcounted inode in orphan_dir as its
>>>> proper candidates to wipe off in a later time. actually it's
>>>> not of course, we'd handle this correctly.
>>>>
>>>> Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
>>>> ---
>>>>  fs/ocfs2/inode.c |   32 ++++++++++++++++++++++++--------
>>>>  1 files changed, 24 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>>>> index 88459bd..61fb546 100644
>>>> --- a/fs/ocfs2/inode.c
>>>> +++ b/fs/ocfs2/inode.c
>>>> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode 
>>>> *inode,
>>>>      di = (struct ocfs2_dinode *) di_bh->b_data;
>>>>      if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>>>>          /* for lack of a better error? */
>>>> -        status = -EEXIST;
>>>> -        mlog(ML_ERROR,
>>>> -             "Inode %llu (on-disk %llu) not orphaned! "
>>>> -             "Disk flags  0x%x, inode flags 0x%x\n",
>>>> -             (unsigned long long)oi->ip_blkno,
>>>> -             (unsigned long long)le64_to_cpu(di->i_blkno),
>>>> -             le32_to_cpu(di->i_flags), oi->ip_flags);
>>>> -        goto bail;
>>>> +        if (!(di->i_dyn_features &
>>>> +              cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
>>>> +            status = -EEXIST;
>>>> +            mlog(ML_ERROR,
>>>> +                 "Inode %llu (on-disk %llu) not orphaned! "
>>>> +                 "Disk flags  0x%x, inode flags 0x%x\n",
>>>> +                 (unsigned long long)oi->ip_blkno,
>>>> +                 (unsigned long long)le64_to_cpu(di->i_blkno),
>>>> +                 le32_to_cpu(di->i_flags), oi->ip_flags);
>>>> +            goto bail;
>>>> +        } else {
>>>> +            /*
>>>> +             * It did happen to us, though it's a rare case:
>>>> +             * orphan_scan() detects the half-refcounted inode
>>>> +             * in orphan_dir, and delete_inode() attempts to
>>>> +             * wipe it after reflink operation done later. now
>>>> +             * we're not allowed to delete such a valid inode,
>>>> +             * instead, just bail out.
>>>> +             */
>>>> +            mlog(0, "Skipping delete of %llu because it's a "
>>>> +                 "reflinked inode\n",
>>>> +                 (unsigned long long)oi->ip_blkno);
>>>> +            goto bail;
>>>> +        }
>>> We set i_dyn_features when when attach the tree to the file. This is 
>>> very early. So I am curious why i_dyn_features can tell you whether 
>>> this isn't a crashed reflink inode? Oh, you mean you will never delete 
>>> a reflinked inode in orphan scan?
>> Tao,
>>
>> Not exactly, if it's reflink operation was crashed somehow, it's 
>> OCFS2_ORPHANED_FL must be set:), and if it was the case, we then will 
>> never skip the deletion which is really needed.
> oh, so this is really a hack for reflink. I am not sure whether it is 
> appropriate. So let Joel decide whether it is OK or not. ;)

I'm confused. How will the OCFS2_ORPHANED_FL be set for reflinked inodes
if the node crashed?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-23  0:31         ` Sunil Mushran
@ 2010-02-23  0:51           ` Tao Ma
  2010-02-23  1:34           ` tristan
  1 sibling, 0 replies; 13+ messages in thread
From: Tao Ma @ 2010-02-23  0:51 UTC (permalink / raw)
  To: ocfs2-devel

Hi Sunil,

Sunil Mushran wrote:
> Tao Ma wrote:
>>
>> tristan wrote:
>>> Tao Ma wrote:
>>>> Hi tristan,
>>>>
>>>> Tristan Ye wrote:
>>>>> Current ocfs2 semantic for reflinking a file firstly create a
>>>>> new orphan_inode in orphan_dir, then remove it to target dir
>>>>> after refcounting operation done, these 2 steps makes logic
>>>>> straightfoward, and guarantee a crash during reflinking can
>>>>> be replayed(half-refcounted inode can be removed), while it
>>>>> brings us another issue cause these 2 steps is acquiring the
>>>>> orphan_dir lock respectively, the problem is, orphan_scan()
>>>>> may detect the half-refcounted inode in orphan_dir as its
>>>>> proper candidates to wipe off in a later time. actually it's
>>>>> not of course, we'd handle this correctly.
>>>>>
>>>>> Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
>>>>> ---
>>>>>  fs/ocfs2/inode.c |   32 ++++++++++++++++++++++++--------
>>>>>  1 files changed, 24 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>>>>> index 88459bd..61fb546 100644
>>>>> --- a/fs/ocfs2/inode.c
>>>>> +++ b/fs/ocfs2/inode.c
>>>>> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct 
>>>>> inode *inode,
>>>>>      di = (struct ocfs2_dinode *) di_bh->b_data;
>>>>>      if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>>>>>          /* for lack of a better error? */
>>>>> -        status = -EEXIST;
>>>>> -        mlog(ML_ERROR,
>>>>> -             "Inode %llu (on-disk %llu) not orphaned! "
>>>>> -             "Disk flags  0x%x, inode flags 0x%x\n",
>>>>> -             (unsigned long long)oi->ip_blkno,
>>>>> -             (unsigned long long)le64_to_cpu(di->i_blkno),
>>>>> -             le32_to_cpu(di->i_flags), oi->ip_flags);
>>>>> -        goto bail;
>>>>> +        if (!(di->i_dyn_features &
>>>>> +              cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
>>>>> +            status = -EEXIST;
>>>>> +            mlog(ML_ERROR,
>>>>> +                 "Inode %llu (on-disk %llu) not orphaned! "
>>>>> +                 "Disk flags  0x%x, inode flags 0x%x\n",
>>>>> +                 (unsigned long long)oi->ip_blkno,
>>>>> +                 (unsigned long long)le64_to_cpu(di->i_blkno),
>>>>> +                 le32_to_cpu(di->i_flags), oi->ip_flags);
>>>>> +            goto bail;
>>>>> +        } else {
>>>>> +            /*
>>>>> +             * It did happen to us, though it's a rare case:
>>>>> +             * orphan_scan() detects the half-refcounted inode
>>>>> +             * in orphan_dir, and delete_inode() attempts to
>>>>> +             * wipe it after reflink operation done later. now
>>>>> +             * we're not allowed to delete such a valid inode,
>>>>> +             * instead, just bail out.
>>>>> +             */
>>>>> +            mlog(0, "Skipping delete of %llu because it's a "
>>>>> +                 "reflinked inode\n",
>>>>> +                 (unsigned long long)oi->ip_blkno);
>>>>> +            goto bail;
>>>>> +        }
>>>> We set i_dyn_features when when attach the tree to the file. This is 
>>>> very early. So I am curious why i_dyn_features can tell you whether 
>>>> this isn't a crashed reflink inode? Oh, you mean you will never 
>>>> delete a reflinked inode in orphan scan?
>>> Tao,
>>>
>>> Not exactly, if it's reflink operation was crashed somehow, it's 
>>> OCFS2_ORPHANED_FL must be set:), and if it was the case, we then will 
>>> never skip the deletion which is really needed.
>> oh, so this is really a hack for reflink. I am not sure whether it is 
>> appropriate. So let Joel decide whether it is OK or not. ;)
> 
> I'm confused. How will the OCFS2_ORPHANED_FL be set for reflinked inodes
> if the node crashed?
when we create the reflinked inodes in orphan dir at the very beginning, 
we put OCFS2_ORPHANED_FL in i_flags. And when all the work is done, this 
flag is removed together with its moving to the dest directory. So if 
the node crash between these 2 steps, the reflinked file in the orphan 
dir can be replayed and deleted successfully.

Regards,
Tao
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-23  0:31         ` Sunil Mushran
  2010-02-23  0:51           ` Tao Ma
@ 2010-02-23  1:34           ` tristan
  1 sibling, 0 replies; 13+ messages in thread
From: tristan @ 2010-02-23  1:34 UTC (permalink / raw)
  To: ocfs2-devel

Sunil Mushran wrote:
> Tao Ma wrote:
>>
>> tristan wrote:
>>> Tao Ma wrote:
>>>> Hi tristan,
>>>>
>>>> Tristan Ye wrote:
>>>>> Current ocfs2 semantic for reflinking a file firstly create a
>>>>> new orphan_inode in orphan_dir, then remove it to target dir
>>>>> after refcounting operation done, these 2 steps makes logic
>>>>> straightfoward, and guarantee a crash during reflinking can
>>>>> be replayed(half-refcounted inode can be removed), while it
>>>>> brings us another issue cause these 2 steps is acquiring the
>>>>> orphan_dir lock respectively, the problem is, orphan_scan()
>>>>> may detect the half-refcounted inode in orphan_dir as its
>>>>> proper candidates to wipe off in a later time. actually it's
>>>>> not of course, we'd handle this correctly.
>>>>>
>>>>> Signed-off-by: Tristan Ye <tristan.ye@oracle.com>
>>>>> ---
>>>>>  fs/ocfs2/inode.c |   32 ++++++++++++++++++++++++--------
>>>>>  1 files changed, 24 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>>>>> index 88459bd..61fb546 100644
>>>>> --- a/fs/ocfs2/inode.c
>>>>> +++ b/fs/ocfs2/inode.c
>>>>> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct 
>>>>> inode *inode,
>>>>>      di = (struct ocfs2_dinode *) di_bh->b_data;
>>>>>      if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>>>>>          /* for lack of a better error? */
>>>>> -        status = -EEXIST;
>>>>> -        mlog(ML_ERROR,
>>>>> -             "Inode %llu (on-disk %llu) not orphaned! "
>>>>> -             "Disk flags  0x%x, inode flags 0x%x\n",
>>>>> -             (unsigned long long)oi->ip_blkno,
>>>>> -             (unsigned long long)le64_to_cpu(di->i_blkno),
>>>>> -             le32_to_cpu(di->i_flags), oi->ip_flags);
>>>>> -        goto bail;
>>>>> +        if (!(di->i_dyn_features &
>>>>> +              cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
>>>>> +            status = -EEXIST;
>>>>> +            mlog(ML_ERROR,
>>>>> +                 "Inode %llu (on-disk %llu) not orphaned! "
>>>>> +                 "Disk flags  0x%x, inode flags 0x%x\n",
>>>>> +                 (unsigned long long)oi->ip_blkno,
>>>>> +                 (unsigned long long)le64_to_cpu(di->i_blkno),
>>>>> +                 le32_to_cpu(di->i_flags), oi->ip_flags);
>>>>> +            goto bail;
>>>>> +        } else {
>>>>> +            /*
>>>>> +             * It did happen to us, though it's a rare case:
>>>>> +             * orphan_scan() detects the half-refcounted inode
>>>>> +             * in orphan_dir, and delete_inode() attempts to
>>>>> +             * wipe it after reflink operation done later. now
>>>>> +             * we're not allowed to delete such a valid inode,
>>>>> +             * instead, just bail out.
>>>>> +             */
>>>>> +            mlog(0, "Skipping delete of %llu because it's a "
>>>>> +                 "reflinked inode\n",
>>>>> +                 (unsigned long long)oi->ip_blkno);
>>>>> +            goto bail;
>>>>> +        }
>>>> We set i_dyn_features when when attach the tree to the file. This 
>>>> is very early. So I am curious why i_dyn_features can tell you 
>>>> whether this isn't a crashed reflink inode? Oh, you mean you will 
>>>> never delete a reflinked inode in orphan scan?
>>> Tao,
>>>
>>> Not exactly, if it's reflink operation was crashed somehow, it's 
>>> OCFS2_ORPHANED_FL must be set:), and if it was the case, we then 
>>> will never skip the deletion which is really needed.
>> oh, so this is really a hack for reflink. I am not sure whether it is 
>> appropriate. So let Joel decide whether it is OK or not. ;)
>
> I'm confused. How will the OCFS2_ORPHANED_FL be set for reflinked inodes
> if the node crashed?

Sunil,

Reflink separates all of its operation into 3 parts.

1. Create target inode in orphan_dir.

2. Do refcounting thing.

3. Move target inode from orphan_dir into desired place.

Once reflink operation crashed at step 1, no OCFS2_ORPHANED_FL set, and 
also there is no entry in orphan_dir, and if nodes crashed at step2,3, 
the reflinked inode still in orphan_dir with OCFS2_ORPHANED_FL set, so 
it can be used for replaying when doing recovery.

The reason why I used OCFS2_ORPHANED_FL for checking is that,

As you know, our orphan_scan thread scans the orphan_dir at a fixed 
interval, and put the oprhan inodes into a queue, which will be 
iput()'ed in a later time. As a result, our half-refcounted inodes in 
orphan_dir will be taken into account in that case, and 
ocfs2_delete_inode() may be invoked as we set i_nlink=0 in reflink 
codes, that's the root cause of bz1215 I guess. so OCFS2_ORPHANED_FL can 
be used for determining if we're deleting the refcounted inode(in crash 
case) at the recovery stage or we're trying to delete a alive 
half-refcounted inode.


Tao,

How do you think about this?


Tristan.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-21  8:29 ` [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly Tristan Ye
  2010-02-21  8:55   ` Tao Ma
@ 2010-02-26 23:28   ` Joel Becker
  2010-02-27 10:34     ` tristan ye
  1 sibling, 1 reply; 13+ messages in thread
From: Joel Becker @ 2010-02-26 23:28 UTC (permalink / raw)
  To: ocfs2-devel

On Sun, Feb 21, 2010 at 04:29:37PM +0800, Tristan Ye wrote:
> Current ocfs2 semantic for reflinking a file firstly create a
> new orphan_inode in orphan_dir, then remove it to target dir
> after refcounting operation done, these 2 steps makes logic
> straightfoward, and guarantee a crash during reflinking can
> be replayed(half-refcounted inode can be removed), while it
> brings us another issue cause these 2 steps is acquiring the
> orphan_dir lock respectively, the problem is, orphan_scan()
> may detect the half-refcounted inode in orphan_dir as its
> proper candidates to wipe off in a later time. actually it's
> not of course, we'd handle this correctly.

	Why is this necessary?  Don't we have the open lock on the
reflink target?  That should keep an orphan scan from wiping a life
refount target.  Tao, do we not have the open lock?

> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
> index 88459bd..61fb546 100644
> --- a/fs/ocfs2/inode.c
> +++ b/fs/ocfs2/inode.c
> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
>  	di = (struct ocfs2_dinode *) di_bh->b_data;
>  	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>  		/* for lack of a better error? */
> -		status = -EEXIST;
> -		mlog(ML_ERROR,
> -		     "Inode %llu (on-disk %llu) not orphaned! "
> -		     "Disk flags  0x%x, inode flags 0x%x\n",
> -		     (unsigned long long)oi->ip_blkno,
> -		     (unsigned long long)le64_to_cpu(di->i_blkno),
> -		     le32_to_cpu(di->i_flags), oi->ip_flags);
> -		goto bail;

	This should not be inside the ORPHANED_FL check.  Every inode in
the orphan_dir, whether a reflink target or an unlinked inode, should
have ORPHANED_FL set.  I think this is why others were confused.
	Tao, were you removing the ORPHANED_FL from reflink targets?

> +		if (!(di->i_dyn_features &
> +		      cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
> +			status = -EEXIST;
> +			mlog(ML_ERROR,
> +			     "Inode %llu (on-disk %llu) not orphaned! "
> +			     "Disk flags  0x%x, inode flags 0x%x\n",
> +			     (unsigned long long)oi->ip_blkno,
> +			     (unsigned long long)le64_to_cpu(di->i_blkno),
> +			     le32_to_cpu(di->i_flags), oi->ip_flags);
> +			goto bail;
> +		} else {
> +			/*
> +			 * It did happen to us, though it's a rare case:
> +			 * orphan_scan() detects the half-refcounted inode
> +			 * in orphan_dir, and delete_inode() attempts to
> +			 * wipe it after reflink operation done later. now
> +			 * we're not allowed to delete such a valid inode,
> +			 * instead, just bail out.
> +			 */
> +			mlog(0, "Skipping delete of %llu because it's a "
> +			     "reflinked inode\n",
> +			     (unsigned long long)oi->ip_blkno);
> +			goto bail;

	Second, this looks like it skips all reflink targets.  That's
not OK.  Sometimes they do need to be deleted.

Joel

-- 

Life's Little Instruction Book #306

	"Take a nap on Sunday afternoons."

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-26 23:28   ` Joel Becker
@ 2010-02-27 10:34     ` tristan ye
  2010-02-27 10:36       ` tristan ye
  2010-02-28  3:06       ` Joel Becker
  0 siblings, 2 replies; 13+ messages in thread
From: tristan ye @ 2010-02-27 10:34 UTC (permalink / raw)
  To: ocfs2-devel



Joel Becker wrote:
> On Sun, Feb 21, 2010 at 04:29:37PM +0800, Tristan Ye wrote:
>   
>> Current ocfs2 semantic for reflinking a file firstly create a
>> new orphan_inode in orphan_dir, then remove it to target dir
>> after refcounting operation done, these 2 steps makes logic
>> straightfoward, and guarantee a crash during reflinking can
>> be replayed(half-refcounted inode can be removed), while it
>> brings us another issue cause these 2 steps is acquiring the
>> orphan_dir lock respectively, the problem is, orphan_scan()
>> may detect the half-refcounted inode in orphan_dir as its
>> proper candidates to wipe off in a later time. actually it's
>> not of course, we'd handle this correctly.
>>     
>
> 	Why is this necessary?  Don't we have the open lock on the
> reflink target?  That should keep an orphan scan from wiping a life
> refount target.  Tao, do we not have the open lock?
>   

Yes, we have the open lock all time during reflink operation, but we 
didn't hold the orphan_dir's lock during this period, which means 
orphan_scan would have a chance to add our half-refcounted target into 
its working queue, which will be deferred to invoke ocfs2_inode_delete 
after reflink operation done(actually it always be invoked after reflink 
done since we hold the open lock of target inode all the time), in this 
case, we definitely failed at  ocfs2_query_inode_wipe() as bug 1215 
described, since the ORPHAN_FLAG here have been cleared by reflink 
operation. that's not so good to me, we shouldn't have treated this as a 
'ML_ERROR'.

>   
>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>> index 88459bd..61fb546 100644
>> --- a/fs/ocfs2/inode.c
>> +++ b/fs/ocfs2/inode.c
>> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
>>  	di = (struct ocfs2_dinode *) di_bh->b_data;
>>  	if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>>  		/* for lack of a better error? */
>> -		status = -EEXIST;
>> -		mlog(ML_ERROR,
>> -		     "Inode %llu (on-disk %llu) not orphaned! "
>> -		     "Disk flags  0x%x, inode flags 0x%x\n",
>> -		     (unsigned long long)oi->ip_blkno,
>> -		     (unsigned long long)le64_to_cpu(di->i_blkno),
>> -		     le32_to_cpu(di->i_flags), oi->ip_flags);
>> -		goto bail;
>>     
>
> 	This should not be inside the ORPHANED_FL check.  Every inode in
> the orphan_dir, whether a reflink target or an unlinked inode, should
> have ORPHANED_FL set.  I think this is why others were confused.
>   

Not exactly, after reflink operation, the flag will be unset, and then 
ocfs2_delete_inode() was invoked immediately, we did meet bug 1215.

> 	Tao, were you removing the ORPHANED_FL from reflink targets?
>
>   
>> +		if (!(di->i_dyn_features &
>> +		      cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
>> +			status = -EEXIST;
>> +			mlog(ML_ERROR,
>> +			     "Inode %llu (on-disk %llu) not orphaned! "
>> +			     "Disk flags  0x%x, inode flags 0x%x\n",
>> +			     (unsigned long long)oi->ip_blkno,
>> +			     (unsigned long long)le64_to_cpu(di->i_blkno),
>> +			     le32_to_cpu(di->i_flags), oi->ip_flags);
>> +			goto bail;
>> +		} else {
>> +			/*
>> +			 * It did happen to us, though it's a rare case:
>> +			 * orphan_scan() detects the half-refcounted inode
>> +			 * in orphan_dir, and delete_inode() attempts to
>> +			 * wipe it after reflink operation done later. now
>> +			 * we're not allowed to delete such a valid inode,
>> +			 * instead, just bail out.
>> +			 */
>> +			mlog(0, "Skipping delete of %llu because it's a "
>> +			     "reflinked inode\n",
>> +			     (unsigned long long)oi->ip_blkno);
>> +			goto bail;
>>     
>
> 	Second, this looks like it skips all reflink targets.  That's
> not OK.  Sometimes they do need to be deleted.
>   
Not exactly, it will not skip a failed reflink target which is due to a 
machine crash or other fatal failure.

Look,  if reflink operation aborted in a abnormal way, it's 
OCFS2_ORPHANED_FL on disk always there, that can be judged for us to 
determine if it's a alive half-refcounted inode or a failed reflink target.

So I guess this patch will not skip the real orphan inodes created 
during a reflink failure.



Regards,
Tristan.

> Joel
>
>   

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-27 10:34     ` tristan ye
@ 2010-02-27 10:36       ` tristan ye
  2010-02-28  3:06       ` Joel Becker
  1 sibling, 0 replies; 13+ messages in thread
From: tristan ye @ 2010-02-27 10:36 UTC (permalink / raw)
  To: ocfs2-devel



tristan ye wrote:
>
>
> Joel Becker wrote:
>> On Sun, Feb 21, 2010 at 04:29:37PM +0800, Tristan Ye wrote:
>>  
>>> Current ocfs2 semantic for reflinking a file firstly create a
>>> new orphan_inode in orphan_dir, then remove it to target dir
>>> after refcounting operation done, these 2 steps makes logic
>>> straightfoward, and guarantee a crash during reflinking can
>>> be replayed(half-refcounted inode can be removed), while it
>>> brings us another issue cause these 2 steps is acquiring the
>>> orphan_dir lock respectively, the problem is, orphan_scan()
>>> may detect the half-refcounted inode in orphan_dir as its
>>> proper candidates to wipe off in a later time. actually it's
>>> not of course, we'd handle this correctly.
>>>     
>>
>>     Why is this necessary?  Don't we have the open lock on the
>> reflink target?  That should keep an orphan scan from wiping a life
>> refount target.  Tao, do we not have the open lock?
>>   
>
> Yes, we have the open lock all time during reflink operation, but we 
> didn't hold the orphan_dir's lock during this period, which means 
> orphan_scan would have a chance to add our half-refcounted target

Oh, sorry, there is a typo, I meant:

'but we didn't hold the orphan_dir's lock all the time during this period.'


> into its working queue, which will be deferred to invoke 
> ocfs2_inode_delete after reflink operation done(actually it always be 
> invoked after reflink done since we hold the open lock of target inode 
> all the time), in this case, we definitely failed at  
> ocfs2_query_inode_wipe() as bug 1215 described, since the ORPHAN_FLAG 
> here have been cleared by reflink operation. that's not so good to me, 
> we shouldn't have treated this as a 'ML_ERROR'.
>
>>  
>>> diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
>>> index 88459bd..61fb546 100644
>>> --- a/fs/ocfs2/inode.c
>>> +++ b/fs/ocfs2/inode.c
>>> @@ -892,14 +892,30 @@ static int ocfs2_query_inode_wipe(struct inode 
>>> *inode,
>>>      di = (struct ocfs2_dinode *) di_bh->b_data;
>>>      if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
>>>          /* for lack of a better error? */
>>> -        status = -EEXIST;
>>> -        mlog(ML_ERROR,
>>> -             "Inode %llu (on-disk %llu) not orphaned! "
>>> -             "Disk flags  0x%x, inode flags 0x%x\n",
>>> -             (unsigned long long)oi->ip_blkno,
>>> -             (unsigned long long)le64_to_cpu(di->i_blkno),
>>> -             le32_to_cpu(di->i_flags), oi->ip_flags);
>>> -        goto bail;
>>>     
>>
>>     This should not be inside the ORPHANED_FL check.  Every inode in
>> the orphan_dir, whether a reflink target or an unlinked inode, should
>> have ORPHANED_FL set.  I think this is why others were confused.
>>   
>
> Not exactly, after reflink operation, the flag will be unset, and then 
> ocfs2_delete_inode() was invoked immediately, we did meet bug 1215.
>
>>     Tao, were you removing the ORPHANED_FL from reflink targets?
>>
>>  
>>> +        if (!(di->i_dyn_features &
>>> +              cpu_to_le16(OCFS2_HAS_REFCOUNT_FL))) {
>>> +            status = -EEXIST;
>>> +            mlog(ML_ERROR,
>>> +                 "Inode %llu (on-disk %llu) not orphaned! "
>>> +                 "Disk flags  0x%x, inode flags 0x%x\n",
>>> +                 (unsigned long long)oi->ip_blkno,
>>> +                 (unsigned long long)le64_to_cpu(di->i_blkno),
>>> +                 le32_to_cpu(di->i_flags), oi->ip_flags);
>>> +            goto bail;
>>> +        } else {
>>> +            /*
>>> +             * It did happen to us, though it's a rare case:
>>> +             * orphan_scan() detects the half-refcounted inode
>>> +             * in orphan_dir, and delete_inode() attempts to
>>> +             * wipe it after reflink operation done later. now
>>> +             * we're not allowed to delete such a valid inode,
>>> +             * instead, just bail out.
>>> +             */
>>> +            mlog(0, "Skipping delete of %llu because it's a "
>>> +                 "reflinked inode\n",
>>> +                 (unsigned long long)oi->ip_blkno);
>>> +            goto bail;
>>>     
>>
>>     Second, this looks like it skips all reflink targets.  That's
>> not OK.  Sometimes they do need to be deleted.
>>   
> Not exactly, it will not skip a failed reflink target which is due to 
> a machine crash or other fatal failure.
>
> Look,  if reflink operation aborted in a abnormal way, it's 
> OCFS2_ORPHANED_FL on disk always there, that can be judged for us to 
> determine if it's a alive half-refcounted inode or a failed reflink 
> target.
>
> So I guess this patch will not skip the real orphan inodes created 
> during a reflink failure.
>
>
>
> Regards,
> Tristan.
>
>> Joel
>>
>>   

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-27 10:34     ` tristan ye
  2010-02-27 10:36       ` tristan ye
@ 2010-02-28  3:06       ` Joel Becker
  2010-03-01  2:21         ` tristan
  1 sibling, 1 reply; 13+ messages in thread
From: Joel Becker @ 2010-02-28  3:06 UTC (permalink / raw)
  To: ocfs2-devel

On Sat, Feb 27, 2010 at 06:34:02PM +0800, tristan ye wrote:
> > 	Why is this necessary?  Don't we have the open lock on the
> > reflink target?  That should keep an orphan scan from wiping a life
> > refount target.  Tao, do we not have the open lock?
> >   
> 
> Yes, we have the open lock all time during reflink operation, but we 
> didn't hold the orphan_dir's lock during this period, which means 
> orphan_scan would have a chance to add our half-refcounted target into 
> its working queue, which will be deferred to invoke ocfs2_inode_delete 
> after reflink operation done(actually it always be invoked after reflink 
> done since we hold the open lock of target inode all the time), in this 
> case, we definitely failed at  ocfs2_query_inode_wipe() as bug 1215 
> described, since the ORPHAN_FLAG here have been cleared by reflink 
> operation. that's not so good to me, we shouldn't have treated this as a 
> 'ML_ERROR'.

	Aha!  I think this is what we're all missing.  The rule of the
orphan dir is that all inodes in the orphan dir have ORPHANED_FL.  But
you're right, we queue the orphans.  A reflink inode can be moved out of
the orphan dir and have its ORPHANED_FL cleared before the queue is run.
Thus, the queue finds an inode without ORPHANED_FL.  This is correct,
because the inode is no longer orphaned.
	The comment didn't help me.  Let's try reversing the order and
changing the comment.

-----------------------------------------------------------------
        if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+		/*
+		 * Inodes in the orphan dir must have ORPHANED_FL.  The
+		 * only inodes that come back out of the orphan dir are
+		 * reflink targets.  A reflink target may be moved out
+		 * of the orphan dir between the time we scan the
+		 * directory and the time we process it.  This would
+		 * lead to HAS_REFCOUNT_FL being set but ORPHANED_FL
+		 * not.
+		 */
+               if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
+                       mlog(0, "Reflinked inode %llu is no longer orphaned.  "
+                            "it must not be deleted\n",
+                            (unsigned long long)oi->ip_blkno);
+                       goto bail;
+		}
+
                /* for lack of a better error? */
                status = -EEXIST;
                mlog(ML_ERROR,
                     "Inode %llu (on-disk %llu) not orphaned! "
                     "Disk flags  0x%x, inode flags 0x%x\n",
                     (unsigned long long)oi->ip_blkno,
                     (unsigned long long)le64_to_cpu(di->i_blkno),
                     le32_to_cpu(di->i_flags), oi->ip_flags);
                goto bail;
	}
-----------------------------------------------------------------

	I find this more readable.  We're just special casing the
reflink behavior.
	Now let's look at our cases.  First, orphan scans.  If the
orphan flag is not set, we know it was moved out.  If ORPHANED_FL is
set, we continue on to wiping.  This checks the open lock.  If the open
lock is held, query_inode_wipe knows it is in use, and it is skipped.
	What about crash recovery?  Well, if it is in the orphan dir, it
will have ORPHANED_FL.  We'll process it like normal.  If it was already
moved out, recovery won't see it.
	I think this covers everything.

Joel

-- 

"I have never let my schooling interfere with my education."
        - Mark Twain

Joel Becker
Principal Software Developer
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly.
  2010-02-28  3:06       ` Joel Becker
@ 2010-03-01  2:21         ` tristan
  0 siblings, 0 replies; 13+ messages in thread
From: tristan @ 2010-03-01  2:21 UTC (permalink / raw)
  To: ocfs2-devel

Joel Becker wrote:
> On Sat, Feb 27, 2010 at 06:34:02PM +0800, tristan ye wrote:
>   
>>> 	Why is this necessary?  Don't we have the open lock on the
>>> reflink target?  That should keep an orphan scan from wiping a life
>>> refount target.  Tao, do we not have the open lock?
>>>   
>>>       
>> Yes, we have the open lock all time during reflink operation, but we 
>> didn't hold the orphan_dir's lock during this period, which means 
>> orphan_scan would have a chance to add our half-refcounted target into 
>> its working queue, which will be deferred to invoke ocfs2_inode_delete 
>> after reflink operation done(actually it always be invoked after reflink 
>> done since we hold the open lock of target inode all the time), in this 
>> case, we definitely failed at  ocfs2_query_inode_wipe() as bug 1215 
>> described, since the ORPHAN_FLAG here have been cleared by reflink 
>> operation. that's not so good to me, we shouldn't have treated this as a 
>> 'ML_ERROR'.
>>     
>
> 	Aha!  I think this is what we're all missing.  The rule of the
> orphan dir is that all inodes in the orphan dir have ORPHANED_FL.  But
> you're right, we queue the orphans.  A reflink inode can be moved out of
> the orphan dir and have its ORPHANED_FL cleared before the queue is run.
> Thus, the queue finds an inode without ORPHANED_FL.  This is correct,
> because the inode is no longer orphaned.
>   

Yes, that's exactly what I meant:)

> 	The comment didn't help me.  Let's try reversing the order and
> changing the comment.
>   

Yes, comment here is expected to be more descriptive.

> -----------------------------------------------------------------
>         if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
> +		/*
> +		 * Inodes in the orphan dir must have ORPHANED_FL.  The
> +		 * only inodes that come back out of the orphan dir are
> +		 * reflink targets.  A reflink target may be moved out
> +		 * of the orphan dir between the time we scan the
> +		 * directory and the time we process it.  This would
> +		 * lead to HAS_REFCOUNT_FL being set but ORPHANED_FL
> +		 * not.
> +		 */
> +               if (di->i_dyn_features & cpu_to_le16(OCFS2_HAS_REFCOUNT_FL)) {
> +                       mlog(0, "Reflinked inode %llu is no longer orphaned.  "
> +                            "it must not be deleted\n",
> +                            (unsigned long long)oi->ip_blkno);
> +                       goto bail;
> +		}
> +
>                 /* for lack of a better error? */
>   

Oh, Joel,

I loved your comments...it's more straightforward and makes much more sense.


>                 status = -EEXIST;
>                 mlog(ML_ERROR,
>                      "Inode %llu (on-disk %llu) not orphaned! "
>                      "Disk flags  0x%x, inode flags 0x%x\n",
>                      (unsigned long long)oi->ip_blkno,
>                      (unsigned long long)le64_to_cpu(di->i_blkno),
>                      le32_to_cpu(di->i_flags), oi->ip_flags);
>                 goto bail;
> 	}
> -----------------------------------------------------------------
>
> 	I find this more readable.  We're just special casing the
> reflink behavior.
> 	Now let's look at our cases.  First, orphan scans.  If the
> orphan flag is not set, we know it was moved out.  If ORPHANED_FL is
> set, we continue on to wiping.  This checks the open lock.  If the open
> lock is held, query_inode_wipe knows it is in use, and it is skipped.
>   

Exactly, if it hasn't the open lock, we then know it's a failed reflink 
target inode from a crash or something else, which is expected to be 
wiped off!

> 	What about crash recovery?  Well, if it is in the orphan dir, it
> will have ORPHANED_FL.  We'll process it like normal.  If it was already
> moved out, recovery won't see it.
> 	I think this covers everything.
>   

Definitely.

> Joel
>
>   

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-03-01  2:21 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-02-21  8:29 [Ocfs2-devel] [PATCH 1/2] Ocfs2: Journaling i_flags and i_orphaned_slot when adding inode to orphan dir Tristan Ye
2010-02-21  8:29 ` [Ocfs2-devel] [PATCH 2/2] Ocfs2: Handle deletion of refcounted oprhan_inode correctly Tristan Ye
2010-02-21  8:55   ` Tao Ma
2010-02-21  8:58     ` tristan
2010-02-21  9:04       ` Tao Ma
2010-02-23  0:31         ` Sunil Mushran
2010-02-23  0:51           ` Tao Ma
2010-02-23  1:34           ` tristan
2010-02-26 23:28   ` Joel Becker
2010-02-27 10:34     ` tristan ye
2010-02-27 10:36       ` tristan ye
2010-02-28  3:06       ` Joel Becker
2010-03-01  2:21         ` tristan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.