All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write.
@ 2010-10-26  7:29 Tao Ma
  2010-10-26  8:28 ` tristan
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Tao Ma @ 2010-10-26  7:29 UTC (permalink / raw)
  To: ocfs2-devel

Recently, one of our colleagues meet with a problem that if we
write/delete a 32mb files repeatly, we will get an ENOSPC in
the end. And the corresponding bug is 1288.
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288

The real problem is that although we have freed the clusters,
they are in truncate log and they will be summed up so that
we can free them once in a whole.

So this patch just try to resolve it. In case we see -ENOSPC
in ocfs2_write_begin_no_lock, we will check whether the truncate
log has enough clusters for our need, if yes, we will try to
flush the truncate log at that point and try again. This method
is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.

Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c |    3 ++
 fs/ocfs2/aops.c  |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/ocfs2.h |    2 +
 3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 592fae5..8ec418d 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5858,6 +5858,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 
 	ocfs2_journal_dirty(handle, tl_bh);
 
+	osb->truncated_clusters += num_clusters;
 bail:
 	mlog_exit(status);
 	return status;
@@ -5929,6 +5930,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 		i--;
 	}
 
+	osb->truncated_clusters = 0;
+
 bail:
 	mlog_exit(status);
 	return status;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5cfeee1..79adc67 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1642,6 +1642,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
 	return ret;
 }
 
+/*
+ * Try to flush truncate log if we can free enough clusters from it.
+ * As for return value, "< 0" means error, "0" no space and "1" means
+ * we have freed enough spaces and let the caller try to allocate again.
+ */
+static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+					  unsigned int needed)
+{
+	tid_t target;
+	int ret = 0;
+	unsigned int truncated_clusters;
+
+	mutex_lock(&osb->osb_tl_inode->i_mutex);
+	truncated_clusters = osb->truncated_clusters;
+	mutex_unlock(&osb->osb_tl_inode->i_mutex);
+
+	/*
+	 * Check whether we can succeed in allocating if we free
+	 * the truncate log.
+	 */
+	if (truncated_clusters < needed)
+		goto out;
+
+	ret = ocfs2_flush_truncate_log(osb);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
+		jbd2_log_wait_commit(osb->journal->j_journal, target);
+		ret = 1;
+	}
+out:
+	return ret;
+}
+
 int ocfs2_write_begin_nolock(struct file *filp,
 			     struct address_space *mapping,
 			     loff_t pos, unsigned len, unsigned flags,
@@ -1649,7 +1686,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 			     struct buffer_head *di_bh, struct page *mmap_page)
 {
 	int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
-	unsigned int clusters_to_alloc, extents_to_split;
+	unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
 	struct ocfs2_write_ctxt *wc;
 	struct inode *inode = mapping->host;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1658,7 +1695,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
 	struct ocfs2_alloc_context *meta_ac = NULL;
 	handle_t *handle;
 	struct ocfs2_extent_tree et;
+	int try_free = 0, ret1;
 
+try_again:
 	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
 	if (ret) {
 		mlog_errno(ret);
@@ -1693,6 +1732,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 		mlog_errno(ret);
 		goto out;
 	} else if (ret == 1) {
+		clusters_need = wc->w_clen;
 		ret = ocfs2_refcount_cow(inode, filp, di_bh,
 					 wc->w_cpos, wc->w_clen, UINT_MAX);
 		if (ret) {
@@ -1707,6 +1747,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 		mlog_errno(ret);
 		goto out;
 	}
+	clusters_need += clusters_to_alloc;
 
 	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
 
@@ -1829,6 +1870,22 @@ out:
 		ocfs2_free_alloc_context(data_ac);
 	if (meta_ac)
 		ocfs2_free_alloc_context(meta_ac);
+
+	if (ret == -ENOSPC && !try_free) {
+		/*
+		 * Try to free some truncate log so that we can have enough
+		 * clusters to allocate.
+		 */
+		try_free = 1;
+
+		ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
+		if (ret1 == 1)
+			goto try_again;
+
+		if (ret1 < 0)
+			mlog_errno(ret1);
+	}
+
 	return ret;
 }
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d840821..de06558 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -439,6 +439,8 @@ struct ocfs2_super
 	/* rb tree root for refcount lock. */
 	struct rb_root	osb_rf_lock_tree;
 	struct ocfs2_refcount_tree *osb_ref_tree_lru;
+
+	unsigned int truncated_clusters;
 };
 
 #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)
-- 
1.7.1.571.gba4d01

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-10-26  7:29 [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write Tao Ma
@ 2010-10-26  8:28 ` tristan
  2010-10-26  8:54   ` Tao Ma
  2010-11-04  1:46 ` Joel Becker
  2010-11-04  7:14 ` [Ocfs2-devel] [PATCH v2] " Tao Ma
  2 siblings, 1 reply; 11+ messages in thread
From: tristan @ 2010-10-26  8:28 UTC (permalink / raw)
  To: ocfs2-devel

Hi Tao,

Just some tiny comments;)

Tao Ma wrote:
> Recently, one of our colleagues meet with a problem that if we
> write/delete a 32mb files repeatly, we will get an ENOSPC in
> the end. And the corresponding bug is 1288.
> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
>
> The real problem is that although we have freed the clusters,
> they are in truncate log and they will be summed up so that
> we can free them once in a whole.
>
> So this patch just try to resolve it. In case we see -ENOSPC
> in ocfs2_write_begin_no_lock, we will check whether the truncate
> log has enough clusters for our need, if yes, we will try to
> flush the truncate log at that point and try again. This method
> is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.
>
> Cc: Mark Fasheh <mfasheh@suse.com>
> Signed-off-by: Tao Ma <tao.ma@oracle.com>
> ---
>  fs/ocfs2/alloc.c |    3 ++
>  fs/ocfs2/aops.c  |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/ocfs2/ocfs2.h |    2 +
>  3 files changed, 63 insertions(+), 1 deletions(-)
>
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index 592fae5..8ec418d 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -5858,6 +5858,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
>  
>  	ocfs2_journal_dirty(handle, tl_bh);
>  
> +	osb->truncated_clusters += num_clusters;
>  bail:
>  	mlog_exit(status);
>  	return status;
> @@ -5929,6 +5930,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
>  		i--;
>  	}
>  
> +	osb->truncated_clusters = 0;
> +
>  bail:
>  	mlog_exit(status);
>  	return status;
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index 5cfeee1..79adc67 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -1642,6 +1642,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
>  	return ret;
>  }
>  
> +/*
> + * Try to flush truncate log if we can free enough clusters from it.
> + * As for return value, "< 0" means error, "0" no space and "1" means
> + * we have freed enough spaces and let the caller try to allocate again.
> + */
> +static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +					  unsigned int needed)
why not use 'unsigned int *needed, and return the actual cluster being 
freed.
> +{
> +	tid_t target;
> +	int ret = 0;
> +	unsigned int truncated_clusters;
> +
> +	mutex_lock(&osb->osb_tl_inode->i_mutex);
> +	truncated_clusters = osb->truncated_clusters;
> +	mutex_unlock(&osb->osb_tl_inode->i_mutex);
> +
> +	/*
> +	 * Check whether we can succeed in allocating if we free
> +	 * the truncate log.
> +	 */
> +	if (truncated_clusters < needed)
> +		goto out;
> +
> +	ret = ocfs2_flush_truncate_log(osb);
> +	if (ret) {
> +		mlog_errno(ret);
> +		goto out;
> +	}
> +
> +	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> +		jbd2_log_wait_commit(osb->journal->j_journal, target);
> +		ret = 1;
> +	}
> +out:
> +	return ret;
> +}
> +
>  int ocfs2_write_begin_nolock(struct file *filp,
>  			     struct address_space *mapping,
>  			     loff_t pos, unsigned len, unsigned flags,
> @@ -1649,7 +1686,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  			     struct buffer_head *di_bh, struct page *mmap_page)
>  {
>  	int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
> -	unsigned int clusters_to_alloc, extents_to_split;
> +	unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
>  	struct ocfs2_write_ctxt *wc;
>  	struct inode *inode = mapping->host;
>  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> @@ -1658,7 +1695,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  	struct ocfs2_alloc_context *meta_ac = NULL;
>  	handle_t *handle;
>  	struct ocfs2_extent_tree et;
> +	int try_free = 0, ret1;
>  
> +try_again:
>  	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
>  	if (ret) {
>  		mlog_errno(ret);
> @@ -1693,6 +1732,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  		mlog_errno(ret);
>  		goto out;
>  	} else if (ret == 1) {
> +		clusters_need = wc->w_clen;
>  		ret = ocfs2_refcount_cow(inode, filp, di_bh,
>  					 wc->w_cpos, wc->w_clen, UINT_MAX);
>  		if (ret) {
> @@ -1707,6 +1747,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  		mlog_errno(ret);
>  		goto out;
>  	}
> +	clusters_need += clusters_to_alloc;
>  
>  	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
>  
> @@ -1829,6 +1870,22 @@ out:
>  		ocfs2_free_alloc_context(data_ac);
>  	if (meta_ac)
>  		ocfs2_free_alloc_context(meta_ac);
> +
> +	if (ret == -ENOSPC && !try_free) {
Literally, if (ret == -ENOSPC && try_free) make more sense here for a 
better readability;-)

You can set the try_free with a fixed value at the very beginning, which 
in other words, means set the
retry times we're allowing to perform after the allocation failure.

> +		/*
> +		 * Try to free some truncate log so that we can have enough
> +		 * clusters to allocate.
> +		 */
> +		try_free = 1;

try_free--;

> +
> +		ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
> +		if (ret1 == 1)
> +			goto try_again;
> +
> +		if (ret1 < 0)
> +			mlog_errno(ret1);
> +	}
> +
>  	return ret;
>  }
>  
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index d840821..de06558 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -439,6 +439,8 @@ struct ocfs2_super
>  	/* rb tree root for refcount lock. */
>  	struct rb_root	osb_rf_lock_tree;
>  	struct ocfs2_refcount_tree *osb_ref_tree_lru;
> +
> +	unsigned int truncated_clusters;
>  };
>  
>  #define OCFS2_SB(sb)	    ((struct ocfs2_super *)(sb)->s_fs_info)

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-10-26  8:28 ` tristan
@ 2010-10-26  8:54   ` Tao Ma
  2010-10-26  9:05     ` tristan
  0 siblings, 1 reply; 11+ messages in thread
From: Tao Ma @ 2010-10-26  8:54 UTC (permalink / raw)
  To: ocfs2-devel



On 10/26/2010 04:28 PM, tristan wrote:
> Hi Tao,
>
> Just some tiny comments;)
>
> Tao Ma wrote:
>> Recently, one of our colleagues meet with a problem that if we
>> write/delete a 32mb files repeatly, we will get an ENOSPC in
>> the end. And the corresponding bug is 1288.
>> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
>>
>> The real problem is that although we have freed the clusters,
>> they are in truncate log and they will be summed up so that
>> we can free them once in a whole.
>>
>> So this patch just try to resolve it. In case we see -ENOSPC
>> in ocfs2_write_begin_no_lock, we will check whether the truncate
>> log has enough clusters for our need, if yes, we will try to
>> flush the truncate log at that point and try again. This method
>> is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.
>>
>> Cc: Mark Fasheh <mfasheh@suse.com>
>> Signed-off-by: Tao Ma <tao.ma@oracle.com>
>> ---
>> fs/ocfs2/alloc.c | 3 ++
>> fs/ocfs2/aops.c | 59
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> fs/ocfs2/ocfs2.h | 2 +
>> 3 files changed, 63 insertions(+), 1 deletions(-)
>>
>> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
>> index 592fae5..8ec418d 100644
>> --- a/fs/ocfs2/alloc.c
>> +++ b/fs/ocfs2/alloc.c
>> @@ -5858,6 +5858,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super
>> *osb,
>>
>> ocfs2_journal_dirty(handle, tl_bh);
>>
>> + osb->truncated_clusters += num_clusters;
>> bail:
>> mlog_exit(status);
>> return status;
>> @@ -5929,6 +5930,8 @@ static int ocfs2_replay_truncate_records(struct
>> ocfs2_super *osb,
>> i--;
>> }
>>
>> + osb->truncated_clusters = 0;
>> +
>> bail:
>> mlog_exit(status);
>> return status;
>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>> index 5cfeee1..79adc67 100644
>> --- a/fs/ocfs2/aops.c
>> +++ b/fs/ocfs2/aops.c
>> @@ -1642,6 +1642,43 @@ static int ocfs2_zero_tail(struct inode *inode,
>> struct buffer_head *di_bh,
>> return ret;
>> }
>>
>> +/*
>> + * Try to flush truncate log if we can free enough clusters from it.
>> + * As for return value, "< 0" means error, "0" no space and "1" means
>> + * we have freed enough spaces and let the caller try to allocate again.
>> + */
>> +static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
>> + unsigned int needed)
> why not use 'unsigned int *needed, and return the actual cluster being
> freed.
I don't think we need to return 'freed clusters' here(which indicates we 
will flush the truncate log no matter 'needed' is). what I want is that 
if we can free 'needed', just do it. If not, go 'exit' because even if 
we free some clusters, it can't fit our need and the allocation would 
still fail. 'Free some clusters' here means that we have to flush the 
truncate log and wait for the journal commit. It is a bit 
time-consuming, so why let the user wait for some time(for freeing some 
clusters in truncate log) while eventually he will get an ENOSPC?
>> +{
>> + tid_t target;
>> + int ret = 0;
>> + unsigned int truncated_clusters;
>> +
>> + mutex_lock(&osb->osb_tl_inode->i_mutex);
>> + truncated_clusters = osb->truncated_clusters;
>> + mutex_unlock(&osb->osb_tl_inode->i_mutex);
>> +
>> + /*
>> + * Check whether we can succeed in allocating if we free
>> + * the truncate log.
>> + */
>> + if (truncated_clusters < needed)
>> + goto out;
>> +
>> + ret = ocfs2_flush_truncate_log(osb);
>> + if (ret) {
>> + mlog_errno(ret);
>> + goto out;
>> + }
>> +
>> + if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
>> + jbd2_log_wait_commit(osb->journal->j_journal, target);
>> + ret = 1;
>> + }
>> +out:
>> + return ret;
>> +}
>> +
>> int ocfs2_write_begin_nolock(struct file *filp,
>> struct address_space *mapping,
>> loff_t pos, unsigned len, unsigned flags,
>> @@ -1649,7 +1686,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>> struct buffer_head *di_bh, struct page *mmap_page)
>> {
>> int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
>> - unsigned int clusters_to_alloc, extents_to_split;
>> + unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
>> struct ocfs2_write_ctxt *wc;
>> struct inode *inode = mapping->host;
>> struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>> @@ -1658,7 +1695,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
>> struct ocfs2_alloc_context *meta_ac = NULL;
>> handle_t *handle;
>> struct ocfs2_extent_tree et;
>> + int try_free = 0, ret1;
>>
>> +try_again:
>> ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
>> if (ret) {
>> mlog_errno(ret);
>> @@ -1693,6 +1732,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>> mlog_errno(ret);
>> goto out;
>> } else if (ret == 1) {
>> + clusters_need = wc->w_clen;
>> ret = ocfs2_refcount_cow(inode, filp, di_bh,
>> wc->w_cpos, wc->w_clen, UINT_MAX);
>> if (ret) {
>> @@ -1707,6 +1747,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>> mlog_errno(ret);
>> goto out;
>> }
>> + clusters_need += clusters_to_alloc;
>>
>> di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
>>
>> @@ -1829,6 +1870,22 @@ out:
>> ocfs2_free_alloc_context(data_ac);
>> if (meta_ac)
>> ocfs2_free_alloc_context(meta_ac);
>> +
>> + if (ret == -ENOSPC && !try_free) {
> Literally, if (ret == -ENOSPC && try_free) make more sense here for a
> better readability;-)
>
> You can set the try_free with a fixed value at the very beginning, which
> in other words, means set the
> retry times we're allowing to perform after the allocation failure.
Is it really needed for the user to try several times? I am not sure. 
Yes, we can try several times, but if the first try doesn't work, do you 
think we can have another chance that some other process just happen to 
truncate and fill in the truncate log for us between 2 tries?

If yes, it is hard for us to tell how many times is appropriate to try. 
If the system is in this stage(nearly full and needs a truncate log 
flush to allocate clusters), I guess the right step is let the user know 
-ENOSPC does happen(if flush truncate log doesn't help either) and do 
something instead.

Regards,
Tao

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-10-26  8:54   ` Tao Ma
@ 2010-10-26  9:05     ` tristan
  0 siblings, 0 replies; 11+ messages in thread
From: tristan @ 2010-10-26  9:05 UTC (permalink / raw)
  To: ocfs2-devel

Tao Ma wrote:
>
>
> On 10/26/2010 04:28 PM, tristan wrote:
>> Hi Tao,
>>
>> Just some tiny comments;)
>>
>> Tao Ma wrote:
>>> Recently, one of our colleagues meet with a problem that if we
>>> write/delete a 32mb files repeatly, we will get an ENOSPC in
>>> the end. And the corresponding bug is 1288.
>>> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
>>>
>>> The real problem is that although we have freed the clusters,
>>> they are in truncate log and they will be summed up so that
>>> we can free them once in a whole.
>>>
>>> So this patch just try to resolve it. In case we see -ENOSPC
>>> in ocfs2_write_begin_no_lock, we will check whether the truncate
>>> log has enough clusters for our need, if yes, we will try to
>>> flush the truncate log at that point and try again. This method
>>> is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.
>>>
>>> Cc: Mark Fasheh <mfasheh@suse.com>
>>> Signed-off-by: Tao Ma <tao.ma@oracle.com>
>>> ---
>>> fs/ocfs2/alloc.c | 3 ++
>>> fs/ocfs2/aops.c | 59
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>> fs/ocfs2/ocfs2.h | 2 +
>>> 3 files changed, 63 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
>>> index 592fae5..8ec418d 100644
>>> --- a/fs/ocfs2/alloc.c
>>> +++ b/fs/ocfs2/alloc.c
>>> @@ -5858,6 +5858,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super
>>> *osb,
>>>
>>> ocfs2_journal_dirty(handle, tl_bh);
>>>
>>> + osb->truncated_clusters += num_clusters;
>>> bail:
>>> mlog_exit(status);
>>> return status;
>>> @@ -5929,6 +5930,8 @@ static int ocfs2_replay_truncate_records(struct
>>> ocfs2_super *osb,
>>> i--;
>>> }
>>>
>>> + osb->truncated_clusters = 0;
>>> +
>>> bail:
>>> mlog_exit(status);
>>> return status;
>>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>>> index 5cfeee1..79adc67 100644
>>> --- a/fs/ocfs2/aops.c
>>> +++ b/fs/ocfs2/aops.c
>>> @@ -1642,6 +1642,43 @@ static int ocfs2_zero_tail(struct inode *inode,
>>> struct buffer_head *di_bh,
>>> return ret;
>>> }
>>>
>>> +/*
>>> + * Try to flush truncate log if we can free enough clusters from it.
>>> + * As for return value, "< 0" means error, "0" no space and "1" means
>>> + * we have freed enough spaces and let the caller try to allocate 
>>> again.
>>> + */
>>> +static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
>>> + unsigned int needed)
>> why not use 'unsigned int *needed, and return the actual cluster being
>> freed.
> I don't think we need to return 'freed clusters' here(which indicates 
> we will flush the truncate log no matter 'needed' is). what I want is 
> that if we can free 'needed', just do it. If not, go 'exit' because 
> even if we free some clusters, it can't fit our need and the 
> allocation would still fail. 'Free some clusters' here means that we 
> have to flush the truncate log and wait for the journal commit. It is 
> a bit time-consuming, so why let the user wait for some time(for 
> freeing some clusters in truncate log) while eventually he will get an 
> ENOSPC?

Alright.

>>> +{
>>> + tid_t target;
>>> + int ret = 0;
>>> + unsigned int truncated_clusters;
>>> +
>>> + mutex_lock(&osb->osb_tl_inode->i_mutex);
>>> + truncated_clusters = osb->truncated_clusters;
>>> + mutex_unlock(&osb->osb_tl_inode->i_mutex);
>>> +
>>> + /*
>>> + * Check whether we can succeed in allocating if we free
>>> + * the truncate log.
>>> + */
>>> + if (truncated_clusters < needed)
>>> + goto out;
>>> +
>>> + ret = ocfs2_flush_truncate_log(osb);
>>> + if (ret) {
>>> + mlog_errno(ret);
>>> + goto out;
>>> + }
>>> +
>>> + if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
>>> + jbd2_log_wait_commit(osb->journal->j_journal, target);
>>> + ret = 1;
>>> + }
>>> +out:
>>> + return ret;
>>> +}
>>> +
>>> int ocfs2_write_begin_nolock(struct file *filp,
>>> struct address_space *mapping,
>>> loff_t pos, unsigned len, unsigned flags,
>>> @@ -1649,7 +1686,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>>> struct buffer_head *di_bh, struct page *mmap_page)
>>> {
>>> int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
>>> - unsigned int clusters_to_alloc, extents_to_split;
>>> + unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
>>> struct ocfs2_write_ctxt *wc;
>>> struct inode *inode = mapping->host;
>>> struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
>>> @@ -1658,7 +1695,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
>>> struct ocfs2_alloc_context *meta_ac = NULL;
>>> handle_t *handle;
>>> struct ocfs2_extent_tree et;
>>> + int try_free = 0, ret1;
>>>
>>> +try_again:
>>> ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
>>> if (ret) {
>>> mlog_errno(ret);
>>> @@ -1693,6 +1732,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>>> mlog_errno(ret);
>>> goto out;
>>> } else if (ret == 1) {
>>> + clusters_need = wc->w_clen;
>>> ret = ocfs2_refcount_cow(inode, filp, di_bh,
>>> wc->w_cpos, wc->w_clen, UINT_MAX);
>>> if (ret) {
>>> @@ -1707,6 +1747,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>>> mlog_errno(ret);
>>> goto out;
>>> }
>>> + clusters_need += clusters_to_alloc;
>>>
>>> di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
>>>
>>> @@ -1829,6 +1870,22 @@ out:
>>> ocfs2_free_alloc_context(data_ac);
>>> if (meta_ac)
>>> ocfs2_free_alloc_context(meta_ac);
>>> +
>>> + if (ret == -ENOSPC && !try_free) {
>> Literally, if (ret == -ENOSPC && try_free) make more sense here for a
>> better readability;-)
>>
>> You can set the try_free with a fixed value at the very beginning, which
>> in other words, means set the
>> retry times we're allowing to perform after the allocation failure.
> Is it really needed for the user to try several times? I am not sure. 
> Yes, we can try several times, but if the first try doesn't work, do 
> you think we can have another chance that some other process just 
> happen to truncate and fill in the truncate log for us between 2 tries?
>
> If yes, it is hard for us to tell how many times is appropriate to 
> try. If the system is in this stage(nearly full and needs a truncate 
> log flush to allocate clusters), I guess the right step is let the 
> user know -ENOSPC does happen(if flush truncate log doesn't help 
> either) and do something instead.


Yep, the retry time was not that easy to evaluate, the original 
intention from me is to use 'try_free' instead
of '!try_free' to judge if we perform truncate log flushing or not, just 
for a better readability;)


>
> Regards,
> Tao

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-10-26  7:29 [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write Tao Ma
  2010-10-26  8:28 ` tristan
@ 2010-11-04  1:46 ` Joel Becker
  2010-11-04  5:38   ` Tao Ma
  2010-11-04  7:14 ` [Ocfs2-devel] [PATCH v2] " Tao Ma
  2 siblings, 1 reply; 11+ messages in thread
From: Joel Becker @ 2010-11-04  1:46 UTC (permalink / raw)
  To: ocfs2-devel

On Tue, Oct 26, 2010 at 03:29:08PM +0800, Tao Ma wrote:
> Recently, one of our colleagues meet with a problem that if we
> write/delete a 32mb files repeatly, we will get an ENOSPC in
> the end. And the corresponding bug is 1288.
> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
> 
> The real problem is that although we have freed the clusters,
> they are in truncate log and they will be summed up so that
> we can free them once in a whole.
> 
> So this patch just try to resolve it. In case we see -ENOSPC
> in ocfs2_write_begin_no_lock, we will check whether the truncate
> log has enough clusters for our need, if yes, we will try to
> flush the truncate log at that point and try again. This method
> is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.

	Ok, this is more like what I was thinking about.  I guess I wish
we could do it inside the allocation routines, but I think we have too
many locks at that point.  This patch doesn't help any place that
allocates other than write() and mmap().  Is this something that could
be added to extending truncate and unwritten allocation?
	I agree that we only care to have one pass.  Have you run it
against the test case from
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288?  If so, modulo
any cleanups you did after others commented, I'm happy ;-)

Joel
  
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index d840821..de06558 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -439,6 +439,8 @@ struct ocfs2_super
>  	/* rb tree root for refcount lock. */
>  	struct rb_root	osb_rf_lock_tree;
>  	struct ocfs2_refcount_tree *osb_ref_tree_lru;
> +
> +	unsigned int truncated_clusters;

	Make sure you comment what lock is needed to access
truncated_clusters.



-- 

Life's Little Instruction Book #232

	"Keep your promises."

Joel Becker
Senior Development Manager
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-11-04  1:46 ` Joel Becker
@ 2010-11-04  5:38   ` Tao Ma
  2010-11-04  6:19     ` Joel Becker
  0 siblings, 1 reply; 11+ messages in thread
From: Tao Ma @ 2010-11-04  5:38 UTC (permalink / raw)
  To: ocfs2-devel



On 11/04/2010 09:46 AM, Joel Becker wrote:
> On Tue, Oct 26, 2010 at 03:29:08PM +0800, Tao Ma wrote:
>> Recently, one of our colleagues meet with a problem that if we
>> write/delete a 32mb files repeatly, we will get an ENOSPC in
>> the end. And the corresponding bug is 1288.
>> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
>>
>> The real problem is that although we have freed the clusters,
>> they are in truncate log and they will be summed up so that
>> we can free them once in a whole.
>>
>> So this patch just try to resolve it. In case we see -ENOSPC
>> in ocfs2_write_begin_no_lock, we will check whether the truncate
>> log has enough clusters for our need, if yes, we will try to
>> flush the truncate log at that point and try again. This method
>> is inspired by Mark Fasheh<mfasheh@suse.com>. Thanks.
>
> 	Ok, this is more like what I was thinking about.  I guess I wish
> we could do it inside the allocation routines, but I think we have too
> many locks at that point.
yeah, the locks are really the biggest problem for us.
>  This patch doesn't help any place that
> allocates other than write() and mmap().  Is this something that could
> be added to extending truncate and unwritten allocation?
I haven't tried in other cases. but in general it could be possible 
since all the work is done in ocfs2_try_to_free_truncate_log. We may try 
it in other place after thinking carefully of the lock there. But I 
guess we can let it in first to see how it works. If it is right and 
robust, I can generate another patch for them later. :)
> 	I agree that we only care to have one pass.  Have you run it
> against the test case from
> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288?  If so, modulo
> any cleanups you did after others commented, I'm happy ;-)
yes, I have run the test case overnight and didn't hit ENOSPC any more. 
So do you think of accepting it? If yes, I will regenerate the patch for 
your merge.
>
>> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
>> index d840821..de06558 100644
>> --- a/fs/ocfs2/ocfs2.h
>> +++ b/fs/ocfs2/ocfs2.h
>> @@ -439,6 +439,8 @@ struct ocfs2_super
>>   	/* rb tree root for refcount lock. */
>>   	struct rb_root	osb_rf_lock_tree;
>>   	struct ocfs2_refcount_tree *osb_ref_tree_lru;
>> +
>> +	unsigned int truncated_clusters;
>
> 	Make sure you comment what lock is needed to access
> truncated_clusters.
OK, I will add comment for it.

Regards,
Tao

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-11-04  5:38   ` Tao Ma
@ 2010-11-04  6:19     ` Joel Becker
  0 siblings, 0 replies; 11+ messages in thread
From: Joel Becker @ 2010-11-04  6:19 UTC (permalink / raw)
  To: ocfs2-devel

On Thu, Nov 04, 2010 at 01:38:41PM +0800, Tao Ma wrote:
> yes, I have run the test case overnight and didn't hit ENOSPC any
> more. So do you think of accepting it? If yes, I will regenerate the
> patch for your merge.

	Please do.  Thanks.

Joel

-- 

"The question of whether computers can think is just like the question
 of whether submarines can swim."
	- Edsger W. Dijkstra

Joel Becker
Senior Development Manager
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-10-26  7:29 [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write Tao Ma
  2010-10-26  8:28 ` tristan
  2010-11-04  1:46 ` Joel Becker
@ 2010-11-04  7:14 ` Tao Ma
  2010-12-08  1:58   ` Joel Becker
  2010-12-16  8:51   ` Joel Becker
  2 siblings, 2 replies; 11+ messages in thread
From: Tao Ma @ 2010-11-04  7:14 UTC (permalink / raw)
  To: ocfs2-devel

Recently, one of our colleagues meet with a problem that if we
write/delete a 32mb files repeatly, we will get an ENOSPC in
the end. And the corresponding bug is 1288.
http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288

The real problem is that although we have freed the clusters,
they are in truncate log and they will be summed up so that
we can free them once in a whole.

So this patch just try to resolve it. In case we see -ENOSPC
in ocfs2_write_begin_no_lock, we will check whether the truncate
log has enough clusters for our need, if yes, we will try to
flush the truncate log at that point and try again. This method
is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.

Cc: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Tao Ma <tao.ma@oracle.com>
---
 fs/ocfs2/alloc.c |    3 ++
 fs/ocfs2/aops.c  |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ocfs2/ocfs2.h |    5 ++++
 3 files changed, 66 insertions(+), 1 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 592fae5..8ec418d 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5858,6 +5858,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 
 	ocfs2_journal_dirty(handle, tl_bh);
 
+	osb->truncated_clusters += num_clusters;
 bail:
 	mlog_exit(status);
 	return status;
@@ -5929,6 +5930,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
 		i--;
 	}
 
+	osb->truncated_clusters = 0;
+
 bail:
 	mlog_exit(status);
 	return status;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5cfeee1..108743f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1642,6 +1642,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
 	return ret;
 }
 
+/*
+ * Try to flush truncate logs if we can free enough clusters from it.
+ * As for return value, "< 0" means error, "0" no space and "1" means
+ * we have freed enough spaces and let the caller try to allocate again.
+ */
+static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+					  unsigned int needed)
+{
+	tid_t target;
+	int ret = 0;
+	unsigned int truncated_clusters;
+
+	mutex_lock(&osb->osb_tl_inode->i_mutex);
+	truncated_clusters = osb->truncated_clusters;
+	mutex_unlock(&osb->osb_tl_inode->i_mutex);
+
+	/*
+	 * Check whether we can succeed in allocating if we free
+	 * the truncate log.
+	 */
+	if (truncated_clusters < needed)
+		goto out;
+
+	ret = ocfs2_flush_truncate_log(osb);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
+		jbd2_log_wait_commit(osb->journal->j_journal, target);
+		ret = 1;
+	}
+out:
+	return ret;
+}
+
 int ocfs2_write_begin_nolock(struct file *filp,
 			     struct address_space *mapping,
 			     loff_t pos, unsigned len, unsigned flags,
@@ -1649,7 +1686,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 			     struct buffer_head *di_bh, struct page *mmap_page)
 {
 	int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
-	unsigned int clusters_to_alloc, extents_to_split;
+	unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
 	struct ocfs2_write_ctxt *wc;
 	struct inode *inode = mapping->host;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1658,7 +1695,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
 	struct ocfs2_alloc_context *meta_ac = NULL;
 	handle_t *handle;
 	struct ocfs2_extent_tree et;
+	int try_free = 1, ret1;
 
+try_again:
 	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
 	if (ret) {
 		mlog_errno(ret);
@@ -1693,6 +1732,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 		mlog_errno(ret);
 		goto out;
 	} else if (ret == 1) {
+		clusters_need = wc->w_clen;
 		ret = ocfs2_refcount_cow(inode, filp, di_bh,
 					 wc->w_cpos, wc->w_clen, UINT_MAX);
 		if (ret) {
@@ -1707,6 +1747,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
 		mlog_errno(ret);
 		goto out;
 	}
+	clusters_need += clusters_to_alloc;
 
 	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
 
@@ -1829,6 +1870,22 @@ out:
 		ocfs2_free_alloc_context(data_ac);
 	if (meta_ac)
 		ocfs2_free_alloc_context(meta_ac);
+
+	if (ret == -ENOSPC && try_free) {
+		/*
+		 * Try to free some truncate log so that we can have enough
+		 * clusters to allocate.
+		 */
+		try_free = 0;
+
+		ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
+		if (ret1 == 1)
+			goto try_again;
+
+		if (ret1 < 0)
+			mlog_errno(ret1);
+	}
+
 	return ret;
 }
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d840821..f641b27 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -420,6 +420,11 @@ struct ocfs2_super
 	struct inode			*osb_tl_inode;
 	struct buffer_head		*osb_tl_bh;
 	struct delayed_work		osb_truncate_log_wq;
+	/*
+	 * How many clusters in our truncate log.
+	 * It must be protected by osb_tl_inode->i_mutex.
+	 */
+	unsigned int truncated_clusters;
 
 	struct ocfs2_node_map		osb_recovering_orphan_dirs;
 	unsigned int			*osb_orphan_wipes;
-- 
1.7.1.571.gba4d01

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-11-04  7:14 ` [Ocfs2-devel] [PATCH v2] " Tao Ma
@ 2010-12-08  1:58   ` Joel Becker
  2010-12-08  2:15     ` Sunil Mushran
  2010-12-16  8:51   ` Joel Becker
  1 sibling, 1 reply; 11+ messages in thread
From: Joel Becker @ 2010-12-08  1:58 UTC (permalink / raw)
  To: ocfs2-devel

On Thu, Nov 04, 2010 at 03:14:11PM +0800, Tao Ma wrote:
> Recently, one of our colleagues meet with a problem that if we
> write/delete a 32mb files repeatly, we will get an ENOSPC in
> the end. And the corresponding bug is 1288.
> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
> 
> The real problem is that although we have freed the clusters,
> they are in truncate log and they will be summed up so that
> we can free them once in a whole.
> 
> So this patch just try to resolve it. In case we see -ENOSPC
> in ocfs2_write_begin_no_lock, we will check whether the truncate
> log has enough clusters for our need, if yes, we will try to
> flush the truncate log at that point and try again. This method
> is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.

	I think I like this.  Anyone have any objections?

Joel

-- 

"Every new beginning comes from some other beginning's end."

Joel Becker
Senior Development Manager
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-12-08  1:58   ` Joel Becker
@ 2010-12-08  2:15     ` Sunil Mushran
  0 siblings, 0 replies; 11+ messages in thread
From: Sunil Mushran @ 2010-12-08  2:15 UTC (permalink / raw)
  To: ocfs2-devel

On 12/07/2010 05:58 PM, Joel Becker wrote:
> On Thu, Nov 04, 2010 at 03:14:11PM +0800, Tao Ma wrote:
>> Recently, one of our colleagues meet with a problem that if we
>> write/delete a 32mb files repeatly, we will get an ENOSPC in
>> the end. And the corresponding bug is 1288.
>> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
>>
>> The real problem is that although we have freed the clusters,
>> they are in truncate log and they will be summed up so that
>> we can free them once in a whole.
>>
>> So this patch just try to resolve it. In case we see -ENOSPC
>> in ocfs2_write_begin_no_lock, we will check whether the truncate
>> log has enough clusters for our need, if yes, we will try to
>> flush the truncate log at that point and try again. This method
>> is inspired by Mark Fasheh<mfasheh@suse.com>. Thanks.
> 	I think I like this.  Anyone have any objections?

Looks good.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [Ocfs2-devel] [PATCH v2] ocfs2: Try to free truncate log when meeting ENOSPC in write.
  2010-11-04  7:14 ` [Ocfs2-devel] [PATCH v2] " Tao Ma
  2010-12-08  1:58   ` Joel Becker
@ 2010-12-16  8:51   ` Joel Becker
  1 sibling, 0 replies; 11+ messages in thread
From: Joel Becker @ 2010-12-16  8:51 UTC (permalink / raw)
  To: ocfs2-devel

On Thu, Nov 04, 2010 at 03:14:11PM +0800, Tao Ma wrote:
> Recently, one of our colleagues meet with a problem that if we
> write/delete a 32mb files repeatly, we will get an ENOSPC in
> the end. And the corresponding bug is 1288.
> http://oss.oracle.com/bugzilla/show_bug.cgi?id=1288
> 
> The real problem is that although we have freed the clusters,
> they are in truncate log and they will be summed up so that
> we can free them once in a whole.
> 
> So this patch just try to resolve it. In case we see -ENOSPC
> in ocfs2_write_begin_no_lock, we will check whether the truncate
> log has enough clusters for our need, if yes, we will try to
> flush the truncate log at that point and try again. This method
> is inspired by Mark Fasheh <mfasheh@suse.com>. Thanks.
> 
> Cc: Mark Fasheh <mfasheh@suse.com>
> Signed-off-by: Tao Ma <tao.ma@oracle.com>

This is now in the merge-window branch of ocfs2.git.

Joel

> ---
>  fs/ocfs2/alloc.c |    3 ++
>  fs/ocfs2/aops.c  |   59 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  fs/ocfs2/ocfs2.h |    5 ++++
>  3 files changed, 66 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index 592fae5..8ec418d 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -5858,6 +5858,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
>  
>  	ocfs2_journal_dirty(handle, tl_bh);
>  
> +	osb->truncated_clusters += num_clusters;
>  bail:
>  	mlog_exit(status);
>  	return status;
> @@ -5929,6 +5930,8 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
>  		i--;
>  	}
>  
> +	osb->truncated_clusters = 0;
> +
>  bail:
>  	mlog_exit(status);
>  	return status;
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index 5cfeee1..108743f 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -1642,6 +1642,43 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
>  	return ret;
>  }
>  
> +/*
> + * Try to flush truncate logs if we can free enough clusters from it.
> + * As for return value, "< 0" means error, "0" no space and "1" means
> + * we have freed enough spaces and let the caller try to allocate again.
> + */
> +static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +					  unsigned int needed)
> +{
> +	tid_t target;
> +	int ret = 0;
> +	unsigned int truncated_clusters;
> +
> +	mutex_lock(&osb->osb_tl_inode->i_mutex);
> +	truncated_clusters = osb->truncated_clusters;
> +	mutex_unlock(&osb->osb_tl_inode->i_mutex);
> +
> +	/*
> +	 * Check whether we can succeed in allocating if we free
> +	 * the truncate log.
> +	 */
> +	if (truncated_clusters < needed)
> +		goto out;
> +
> +	ret = ocfs2_flush_truncate_log(osb);
> +	if (ret) {
> +		mlog_errno(ret);
> +		goto out;
> +	}
> +
> +	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> +		jbd2_log_wait_commit(osb->journal->j_journal, target);
> +		ret = 1;
> +	}
> +out:
> +	return ret;
> +}
> +
>  int ocfs2_write_begin_nolock(struct file *filp,
>  			     struct address_space *mapping,
>  			     loff_t pos, unsigned len, unsigned flags,
> @@ -1649,7 +1686,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  			     struct buffer_head *di_bh, struct page *mmap_page)
>  {
>  	int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
> -	unsigned int clusters_to_alloc, extents_to_split;
> +	unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
>  	struct ocfs2_write_ctxt *wc;
>  	struct inode *inode = mapping->host;
>  	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
> @@ -1658,7 +1695,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  	struct ocfs2_alloc_context *meta_ac = NULL;
>  	handle_t *handle;
>  	struct ocfs2_extent_tree et;
> +	int try_free = 1, ret1;
>  
> +try_again:
>  	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
>  	if (ret) {
>  		mlog_errno(ret);
> @@ -1693,6 +1732,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  		mlog_errno(ret);
>  		goto out;
>  	} else if (ret == 1) {
> +		clusters_need = wc->w_clen;
>  		ret = ocfs2_refcount_cow(inode, filp, di_bh,
>  					 wc->w_cpos, wc->w_clen, UINT_MAX);
>  		if (ret) {
> @@ -1707,6 +1747,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  		mlog_errno(ret);
>  		goto out;
>  	}
> +	clusters_need += clusters_to_alloc;
>  
>  	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
>  
> @@ -1829,6 +1870,22 @@ out:
>  		ocfs2_free_alloc_context(data_ac);
>  	if (meta_ac)
>  		ocfs2_free_alloc_context(meta_ac);
> +
> +	if (ret == -ENOSPC && try_free) {
> +		/*
> +		 * Try to free some truncate log so that we can have enough
> +		 * clusters to allocate.
> +		 */
> +		try_free = 0;
> +
> +		ret1 = ocfs2_try_to_free_truncate_log(osb, clusters_need);
> +		if (ret1 == 1)
> +			goto try_again;
> +
> +		if (ret1 < 0)
> +			mlog_errno(ret1);
> +	}
> +
>  	return ret;
>  }
>  
> diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
> index d840821..f641b27 100644
> --- a/fs/ocfs2/ocfs2.h
> +++ b/fs/ocfs2/ocfs2.h
> @@ -420,6 +420,11 @@ struct ocfs2_super
>  	struct inode			*osb_tl_inode;
>  	struct buffer_head		*osb_tl_bh;
>  	struct delayed_work		osb_truncate_log_wq;
> +	/*
> +	 * How many clusters in our truncate log.
> +	 * It must be protected by osb_tl_inode->i_mutex.
> +	 */
> +	unsigned int truncated_clusters;
>  
>  	struct ocfs2_node_map		osb_recovering_orphan_dirs;
>  	unsigned int			*osb_orphan_wipes;
> -- 
> 1.7.1.571.gba4d01
> 

-- 

"Egotist: a person more interested in himself than in me."
         - Ambrose Bierce 

Joel Becker
Senior Development Manager
Oracle
E-mail: joel.becker at oracle.com
Phone: (650) 506-8127

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2010-12-16  8:51 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-26  7:29 [Ocfs2-devel] [PATCH] ocfs2: Try to free truncate log when meeting ENOSPC in write Tao Ma
2010-10-26  8:28 ` tristan
2010-10-26  8:54   ` Tao Ma
2010-10-26  9:05     ` tristan
2010-11-04  1:46 ` Joel Becker
2010-11-04  5:38   ` Tao Ma
2010-11-04  6:19     ` Joel Becker
2010-11-04  7:14 ` [Ocfs2-devel] [PATCH v2] " Tao Ma
2010-12-08  1:58   ` Joel Becker
2010-12-08  2:15     ` Sunil Mushran
2010-12-16  8:51   ` Joel Becker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.