All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism
@ 2016-10-12  9:03 Wang Xiaoguang
  2016-10-12  9:03 ` [PATCH 2/2] btrfs: try to satisfy metadata requests if wen can overcommit Wang Xiaoguang
  2016-10-12 17:20 ` [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism Josef Bacik
  0 siblings, 2 replies; 8+ messages in thread
From: Wang Xiaoguang @ 2016-10-12  9:03 UTC (permalink / raw)
  To: linux-btrfs; +Cc: jbacik

Since commit b02441999efcc6152b87cd58e7970bb7843f76cf, we don't wait all
ordered extents, but I run into some enospc errors when doing large file
create and delete tests, it's because shrink_delalloc() does not write
enough delalloc bytes and wait them finished:
    From: Miao Xie <miaox@cn.fujitsu.com>
    Date: Mon, 4 Nov 2013 23:13:25 +0800
    Subject: [PATCH] Btrfs: don't wait for the completion of all the ordered extents

    It is very likely that there are lots of ordered extents in the filesytem,
    if we wait for the completion of all of them when we want to reclaim some
    space for the metadata space reservation, we would be blocked for a long
    time. The performance would drop down suddenly for a long time.

Here we introduce a simple reclaim_priority variable, the higher the
value, the higher the priority, 0 is the minimum priority. The core
idea is:
        if (reclaim_priority >= 3)
                to_reclaim = percpu_counter_sum_positive(&root->fs_info->delalloc_bytes);
        else
                to_reclaim = orig * (2 << (reclaim_priority + 1));
As the priority increases, we try wo write more delalloc bytes, here orig
is the number of metadata we want to reserve. Meanwhile if
"reclaim_priority >= 3" returns true, we'll also wait all current ordered
extents to finish.

Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
 fs/btrfs/extent-tree.c       | 49 ++++++++++++++++++++++++++++----------------
 include/trace/events/btrfs.h | 11 ++++------
 2 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 665da8f..661f039 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4690,12 +4690,13 @@ static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
 /*
  * shrink metadata reservation for delalloc
  */
-static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
-			    bool wait_ordered)
+static void shrink_delalloc(struct btrfs_root *root, u64 orig,
+			    bool wait_ordered, int reclaim_priority)
 {
 	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_space_info *space_info;
 	struct btrfs_trans_handle *trans;
+	u64 to_reclaim;
 	u64 delalloc_bytes;
 	u64 max_reclaim;
 	long time_left;
@@ -4703,22 +4704,35 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 	int loops;
 	int items;
 	enum btrfs_reserve_flush_enum flush;
+	int items_to_wait;
+
+	if (reclaim_priority < 0)
+		reclaim_priority = 0;
+
+	delalloc_bytes = percpu_counter_sum_positive(
+				&root->fs_info->delalloc_bytes);
+	if (reclaim_priority >= 3)
+		to_reclaim = delalloc_bytes;
+	else
+		to_reclaim = orig * (2 << (reclaim_priority + 1));
 
 	/* Calc the number of the pages we need flush for space reservation */
 	items = calc_reclaim_items_nr(root, to_reclaim);
 	to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
+	if (reclaim_priority >= 3)
+		items_to_wait = -1;
+	else
+		items_to_wait = items;
 
 	trans = (struct btrfs_trans_handle *)current->journal_info;
 	block_rsv = &root->fs_info->delalloc_block_rsv;
 	space_info = block_rsv->space_info;
 
-	delalloc_bytes = percpu_counter_sum_positive(
-						&root->fs_info->delalloc_bytes);
 	if (delalloc_bytes == 0) {
 		if (trans)
 			return;
 		if (wait_ordered)
-			btrfs_wait_ordered_roots(root->fs_info, items,
+			btrfs_wait_ordered_roots(root->fs_info, items_to_wait,
 						 0, (u64)-1);
 		return;
 	}
@@ -4763,7 +4777,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 
 		loops++;
 		if (wait_ordered && !trans) {
-			btrfs_wait_ordered_roots(root->fs_info, items,
+			btrfs_wait_ordered_roots(root->fs_info, items_to_wait,
 						 0, (u64)-1);
 		} else {
 			time_left = schedule_timeout_killable(1);
@@ -4836,7 +4850,7 @@ struct reserve_ticket {
 
 static int flush_space(struct btrfs_root *root,
 		       struct btrfs_space_info *space_info, u64 num_bytes,
-		       u64 orig_bytes, int state)
+		       int state, int reclaim_priority)
 {
 	struct btrfs_trans_handle *trans;
 	int nr;
@@ -4860,8 +4874,8 @@ static int flush_space(struct btrfs_root *root,
 		break;
 	case FLUSH_DELALLOC:
 	case FLUSH_DELALLOC_WAIT:
-		shrink_delalloc(root, num_bytes * 2, orig_bytes,
-				state == FLUSH_DELALLOC_WAIT);
+		shrink_delalloc(root, num_bytes, state == FLUSH_DELALLOC_WAIT,
+				reclaim_priority);
 		break;
 	case ALLOC_CHUNK:
 		trans = btrfs_join_transaction(root);
@@ -4877,7 +4891,7 @@ static int flush_space(struct btrfs_root *root,
 			ret = 0;
 		break;
 	case COMMIT_TRANS:
-		ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+		ret = may_commit_transaction(root, space_info, num_bytes, 0);
 		break;
 	default:
 		ret = -ENOSPC;
@@ -4885,7 +4899,7 @@ static int flush_space(struct btrfs_root *root,
 	}
 
 	trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
-				orig_bytes, state, ret);
+				state, ret);
 	return ret;
 }
 
@@ -4967,7 +4981,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 	struct btrfs_space_info *space_info;
 	u64 to_reclaim;
 	int flush_state;
-	int commit_cycles = 0;
+	int reclaim_priority = 0;
 	u64 last_tickets_id;
 
 	fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
@@ -4990,7 +5004,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 		int ret;
 
 		ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
-			    to_reclaim, flush_state);
+				  flush_state, reclaim_priority);
 		spin_lock(&space_info->lock);
 		if (list_empty(&space_info->tickets)) {
 			space_info->flush = 0;
@@ -5006,13 +5020,12 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 		} else {
 			last_tickets_id = space_info->tickets_id;
 			flush_state = FLUSH_DELAYED_ITEMS_NR;
-			if (commit_cycles)
-				commit_cycles--;
+			reclaim_priority = 0;
 		}
 
 		if (flush_state > COMMIT_TRANS) {
-			commit_cycles++;
-			if (commit_cycles > 2) {
+			reclaim_priority++;
+			if (reclaim_priority > 3) {
 				wake_all_tickets(&space_info->tickets);
 				space_info->flush = 0;
 			} else {
@@ -5046,7 +5059,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 
 	do {
 		flush_space(fs_info->fs_root, space_info, to_reclaim,
-			    to_reclaim, flush_state);
+			    flush_state, 1);
 		flush_state++;
 		spin_lock(&space_info->lock);
 		if (ticket->bytes == 0) {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index e030d6f..7d953a6 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -857,15 +857,14 @@ TRACE_EVENT(btrfs_trigger_flush,
 TRACE_EVENT(btrfs_flush_space,
 
 	TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
-		 u64 orig_bytes, int state, int ret),
+		 int state, int ret),
 
-	TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret),
+	TP_ARGS(fs_info, flags, num_bytes, state, ret),
 
 	TP_STRUCT__entry(
 		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
 		__field(	u64,	flags			)
 		__field(	u64,	num_bytes		)
-		__field(	u64,	orig_bytes		)
 		__field(	int,	state			)
 		__field(	int,	ret			)
 	),
@@ -874,19 +873,17 @@ TRACE_EVENT(btrfs_flush_space,
 		memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
 		__entry->flags		=	flags;
 		__entry->num_bytes	=	num_bytes;
-		__entry->orig_bytes	=	orig_bytes;
 		__entry->state		=	state;
 		__entry->ret		=	ret;
 	),
 
 	TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
-		  "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
+		  "ret = %d", __entry->fsid, __entry->state,
 		  show_flush_state(__entry->state),
 		  (unsigned long long)__entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS),
-		  (unsigned long long)__entry->num_bytes,
-		  (unsigned long long)__entry->orig_bytes, __entry->ret)
+		  (unsigned long long)__entry->num_bytes, __entry->ret)
 );
 
 DECLARE_EVENT_CLASS(btrfs__reserved_extent,
-- 
2.7.4




^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] btrfs: try to satisfy metadata requests if wen can overcommit
  2016-10-12  9:03 [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism Wang Xiaoguang
@ 2016-10-12  9:03 ` Wang Xiaoguang
  2016-10-12 17:21   ` Josef Bacik
  2016-10-12 17:20 ` [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism Josef Bacik
  1 sibling, 1 reply; 8+ messages in thread
From: Wang Xiaoguang @ 2016-10-12  9:03 UTC (permalink / raw)
  To: linux-btrfs; +Cc: jbacik

In shrink_delalloc(), if can_overcommit() returns true, shrink_delalloc()
will give up shrinking delalloc bytes, in this case we should check whether
some tickcts' requests can overcommit, if some can, we can satisfy them
timely and directly.

Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
 fs/btrfs/extent-tree.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 661f039..c2e6871 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4971,6 +4971,47 @@ static void wake_all_tickets(struct list_head *head)
 }
 
 /*
+ * This function must be protected by btrfs_space_info's lock.
+ * In shrink_delalloc(), if can_overcommit() returns true, shrink_delalloc()
+ * will not shrink delalloc bytes any more, in this case we should check
+ * whether some tickcts' requests can overcommit, if some can, we can satisfy
+ * them timely and directly.
+ */
+static void try_to_wake_tickets(struct btrfs_root *root,
+				struct btrfs_space_info *space_info)
+{
+	struct reserve_ticket *ticket;
+	struct list_head *head = &space_info->priority_tickets;
+	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
+	u64 used;
+
+again:
+	while (!list_empty(head)) {
+		ticket = list_first_entry(head, struct reserve_ticket,
+					  list);
+		used = space_info->bytes_used +
+			space_info->bytes_reserved + space_info->bytes_pinned +
+			space_info->bytes_readonly + space_info->bytes_may_use;
+
+		if (used + ticket->bytes <= space_info->total_bytes ||
+		    can_overcommit(root, space_info, ticket->bytes, flush)) {
+			space_info->bytes_may_use += ticket->bytes;
+			list_del_init(&ticket->list);
+			ticket->bytes = 0;
+			space_info->tickets_id++;
+			wake_up(&ticket->wait);
+		} else
+			return;
+	}
+
+	if (head == &space_info->priority_tickets) {
+		head = &space_info->tickets;
+		flush = BTRFS_RESERVE_FLUSH_ALL;
+		goto again;
+	}
+}
+
+/*
  * This is for normal flushers, we can wait all goddamned day if we want to.  We
  * will loop and continuously try to flush as long as we are making progress.
  * We count progress as clearing off tickets each time we have to loop.
@@ -5006,6 +5047,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 		ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
 				  flush_state, reclaim_priority);
 		spin_lock(&space_info->lock);
+		if (!ret)
+			try_to_wake_tickets(fs_info->fs_root, space_info);
 		if (list_empty(&space_info->tickets)) {
 			space_info->flush = 0;
 			spin_unlock(&space_info->lock);
-- 
2.7.4




^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism
  2016-10-12  9:03 [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism Wang Xiaoguang
  2016-10-12  9:03 ` [PATCH 2/2] btrfs: try to satisfy metadata requests if wen can overcommit Wang Xiaoguang
@ 2016-10-12 17:20 ` Josef Bacik
  2016-10-13  5:40   ` Wang Xiaoguang
  1 sibling, 1 reply; 8+ messages in thread
From: Josef Bacik @ 2016-10-12 17:20 UTC (permalink / raw)
  To: Wang Xiaoguang, linux-btrfs

On 10/12/2016 05:03 AM, Wang Xiaoguang wrote:
> Since commit b02441999efcc6152b87cd58e7970bb7843f76cf, we don't wait all
> ordered extents, but I run into some enospc errors when doing large file
> create and delete tests, it's because shrink_delalloc() does not write
> enough delalloc bytes and wait them finished:
>     From: Miao Xie <miaox@cn.fujitsu.com>
>     Date: Mon, 4 Nov 2013 23:13:25 +0800
>     Subject: [PATCH] Btrfs: don't wait for the completion of all the ordered extents
>
>     It is very likely that there are lots of ordered extents in the filesytem,
>     if we wait for the completion of all of them when we want to reclaim some
>     space for the metadata space reservation, we would be blocked for a long
>     time. The performance would drop down suddenly for a long time.
>
> Here we introduce a simple reclaim_priority variable, the higher the
> value, the higher the priority, 0 is the minimum priority. The core
> idea is:
>         if (reclaim_priority >= 3)
>                 to_reclaim = percpu_counter_sum_positive(&root->fs_info->delalloc_bytes);
>         else
>                 to_reclaim = orig * (2 << (reclaim_priority + 1));
> As the priority increases, we try wo write more delalloc bytes, here orig
> is the number of metadata we want to reserve. Meanwhile if
> "reclaim_priority >= 3" returns true, we'll also wait all current ordered
> extents to finish.

Ok this is closer to what I want but not quite there.  I want to get rid of the 
magic numbers, so we start with

#define BTRFS_DEFAULT_FLUSH_PRIORITY 3

and once priority == 0 we know it's time to flush and wait for all the things. 
Then we use the priority as a multiplier, with DEFAULT being no multiplier, so 
it would look something like this

if (reclaim_priority)
	to_reclaim = orig * (2 << BTRFS_DEFAULT_FLUSH_PRIORITY - priority);
else
	to_reclaim = delalloc_bytes;
....
if (reclaim_priority)
	items_to_wait = calc_reclaim_items_nr(root, to_reclaim);
else
	items_to_wait = -1;


and then instead of

if (reclaim_priority > 3) {
	wake_all_tickets(&space_info->tickets);
...

we can change the while loop to be

while ((flush_state <= COMMIT_TRANS) || (--reclaim_priority >= 0));

and move the wake_all_tickets() to outside the loop, and add a

if (flush_state > COMMIT_TRANS)
	flush_state = FLUSH_DELAYED_ITEMS_NR;

to the start of the do loop.  Does that make sense?  Thanks,

Josef

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] btrfs: try to satisfy metadata requests if wen can overcommit
  2016-10-12  9:03 ` [PATCH 2/2] btrfs: try to satisfy metadata requests if wen can overcommit Wang Xiaoguang
@ 2016-10-12 17:21   ` Josef Bacik
  0 siblings, 0 replies; 8+ messages in thread
From: Josef Bacik @ 2016-10-12 17:21 UTC (permalink / raw)
  To: Wang Xiaoguang, linux-btrfs

On 10/12/2016 05:03 AM, Wang Xiaoguang wrote:
> In shrink_delalloc(), if can_overcommit() returns true, shrink_delalloc()
> will give up shrinking delalloc bytes, in this case we should check whether
> some tickcts' requests can overcommit, if some can, we can satisfy them
> timely and directly.
>
> Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>

I think I said this elsewhere but just in case you can add

Reviewed-by: Josef Bacik <jbacik@fb.com>

to this, thanks,

Josef

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism
  2016-10-12 17:20 ` [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism Josef Bacik
@ 2016-10-13  5:40   ` Wang Xiaoguang
  2016-10-13  9:31     ` [PATCH v2] " Wang Xiaoguang
  0 siblings, 1 reply; 8+ messages in thread
From: Wang Xiaoguang @ 2016-10-13  5:40 UTC (permalink / raw)
  To: Josef Bacik, linux-btrfs

hi,

On 10/13/2016 01:20 AM, Josef Bacik wrote:
> On 10/12/2016 05:03 AM, Wang Xiaoguang wrote:
>> Since commit b02441999efcc6152b87cd58e7970bb7843f76cf, we don't wait all
>> ordered extents, but I run into some enospc errors when doing large file
>> create and delete tests, it's because shrink_delalloc() does not write
>> enough delalloc bytes and wait them finished:
>>     From: Miao Xie <miaox@cn.fujitsu.com>
>>     Date: Mon, 4 Nov 2013 23:13:25 +0800
>>     Subject: [PATCH] Btrfs: don't wait for the completion of all the 
>> ordered extents
>>
>>     It is very likely that there are lots of ordered extents in the 
>> filesytem,
>>     if we wait for the completion of all of them when we want to 
>> reclaim some
>>     space for the metadata space reservation, we would be blocked for 
>> a long
>>     time. The performance would drop down suddenly for a long time.
>>
>> Here we introduce a simple reclaim_priority variable, the higher the
>> value, the higher the priority, 0 is the minimum priority. The core
>> idea is:
>>         if (reclaim_priority >= 3)
>>                 to_reclaim = 
>> percpu_counter_sum_positive(&root->fs_info->delalloc_bytes);
>>         else
>>                 to_reclaim = orig * (2 << (reclaim_priority + 1));
>> As the priority increases, we try wo write more delalloc bytes, here 
>> orig
>> is the number of metadata we want to reserve. Meanwhile if
>> "reclaim_priority >= 3" returns true, we'll also wait all current 
>> ordered
>> extents to finish.
>
> Ok this is closer to what I want but not quite there.  I want to get 
> rid of the magic numbers, so we start with
>
> #define BTRFS_DEFAULT_FLUSH_PRIORITY 3
>
> and once priority == 0 we know it's time to flush and wait for all the 
> things. Then we use the priority as a multiplier, with DEFAULT being 
> no multiplier, so it would look something like this
>
> if (reclaim_priority)
>     to_reclaim = orig * (2 << BTRFS_DEFAULT_FLUSH_PRIORITY - priority);
> else
>     to_reclaim = delalloc_bytes;
> ....
> if (reclaim_priority)
>     items_to_wait = calc_reclaim_items_nr(root, to_reclaim);
> else
>     items_to_wait = -1;
>
>
> and then instead of
>
> if (reclaim_priority > 3) {
>     wake_all_tickets(&space_info->tickets);
> ...
>
> we can change the while loop to be
>
> while ((flush_state <= COMMIT_TRANS) || (--reclaim_priority >= 0));
>
> and move the wake_all_tickets() to outside the loop, and add a
>
> if (flush_state > COMMIT_TRANS)
>     flush_state = FLUSH_DELAYED_ITEMS_NR;
>
> to the start of the do loop.  Does that make sense?  Thanks,
Yes, thanks for your kindly help.
For "wake_all_tickets()", I think we still need to put it in the while loop,
If we move it outside, we need to relock btrfs_space_info's lock, but
here there maybe some race window, some new tickets maybe added in,
we should not wake up them directly and should also do some flush work
for them.

Regards,
Xiaoguang Wang

>
> Josef
>
>




^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] btrfs: introduce priority based delalloc shrink mechanism
  2016-10-13  5:40   ` Wang Xiaoguang
@ 2016-10-13  9:31     ` Wang Xiaoguang
  2016-10-24 15:43       ` David Sterba
  0 siblings, 1 reply; 8+ messages in thread
From: Wang Xiaoguang @ 2016-10-13  9:31 UTC (permalink / raw)
  To: linux-btrfs

Since commit b02441999efcc6152b87cd58e7970bb7843f76cf, we don't wait all
ordered extents, but I run into some enospc errors when doing large file
create and delete tests, it's because shrink_delalloc() does not write
enough delalloc bytes and wait them finished:
    From: Miao Xie <miaox@cn.fujitsu.com>
    Date: Mon, 4 Nov 2013 23:13:25 +0800
    Subject: [PATCH] Btrfs: don't wait for the completion of all the ordered extents

    It is very likely that there are lots of ordered extents in the filesytem,
    if we wait for the completion of all of them when we want to reclaim some
    space for the metadata space reservation, we would be blocked for a long
    time. The performance would drop down suddenly for a long time.

Here we introduce a simple reclaim_priority variable, the lower the
value, the higher the priority, 0 is the maximum priority. The core
idea is:
    delalloc_bytes = percpu_counter_sum_positive(&root->fs_info->delalloc_bytes);
    if (reclaim_priority)
        to_reclaim = orig * (2 << (BTRFS_DEFAULT_FLUSH_PRIORITY - reclaim_priority));
    else
        to_reclaim = delalloc_bytes;

Here 'orig' is the number of metadata we want to reserve, and as the priority
increases, we will try wo write more delalloc bytes, meanwhile if
"reclaim_priority == 0" returns true, we'll also wait all current ordered
extents to finish.

Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
---
 fs/btrfs/extent-tree.c       | 63 ++++++++++++++++++++++++++------------------
 include/trace/events/btrfs.h | 11 +++-----
 2 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e08791d..7477c25 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4686,16 +4686,18 @@ static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
 }
 
 #define EXTENT_SIZE_PER_ITEM	SZ_256K
+#define	BTRFS_DEFAULT_FLUSH_PRIORITY	3
 
 /*
  * shrink metadata reservation for delalloc
  */
-static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
-			    bool wait_ordered)
+static void shrink_delalloc(struct btrfs_root *root, u64 orig,
+			    bool wait_ordered, int reclaim_priority)
 {
 	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_space_info *space_info;
 	struct btrfs_trans_handle *trans;
+	u64 to_reclaim;
 	u64 delalloc_bytes;
 	u64 max_reclaim;
 	long time_left;
@@ -4703,22 +4705,36 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 	int loops;
 	int items;
 	enum btrfs_reserve_flush_enum flush;
+	int items_to_wait;
+
+	delalloc_bytes = percpu_counter_sum_positive(
+				&root->fs_info->delalloc_bytes);
+	if (reclaim_priority < 0)
+		reclaim_priority = 0;
+
+	if (reclaim_priority)
+		to_reclaim = orig * (2 << (BTRFS_DEFAULT_FLUSH_PRIORITY -
+					reclaim_priority));
+	else
+		to_reclaim = delalloc_bytes;
 
 	/* Calc the number of the pages we need flush for space reservation */
 	items = calc_reclaim_items_nr(root, to_reclaim);
 	to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
+	if (reclaim_priority)
+		items_to_wait = items;
+	else
+		items_to_wait = -1;
 
 	trans = (struct btrfs_trans_handle *)current->journal_info;
 	block_rsv = &root->fs_info->delalloc_block_rsv;
 	space_info = block_rsv->space_info;
 
-	delalloc_bytes = percpu_counter_sum_positive(
-						&root->fs_info->delalloc_bytes);
 	if (delalloc_bytes == 0) {
 		if (trans)
 			return;
 		if (wait_ordered)
-			btrfs_wait_ordered_roots(root->fs_info, items,
+			btrfs_wait_ordered_roots(root->fs_info, items_to_wait,
 						 0, (u64)-1);
 		return;
 	}
@@ -4763,7 +4779,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
 
 		loops++;
 		if (wait_ordered && !trans) {
-			btrfs_wait_ordered_roots(root->fs_info, items,
+			btrfs_wait_ordered_roots(root->fs_info, items_to_wait,
 						 0, (u64)-1);
 		} else {
 			time_left = schedule_timeout_killable(1);
@@ -4836,7 +4852,7 @@ struct reserve_ticket {
 
 static int flush_space(struct btrfs_root *root,
 		       struct btrfs_space_info *space_info, u64 num_bytes,
-		       u64 orig_bytes, int state)
+		       int state, int reclaim_priority)
 {
 	struct btrfs_trans_handle *trans;
 	int nr;
@@ -4860,8 +4876,8 @@ static int flush_space(struct btrfs_root *root,
 		break;
 	case FLUSH_DELALLOC:
 	case FLUSH_DELALLOC_WAIT:
-		shrink_delalloc(root, num_bytes * 2, orig_bytes,
-				state == FLUSH_DELALLOC_WAIT);
+		shrink_delalloc(root, num_bytes, state == FLUSH_DELALLOC_WAIT,
+				reclaim_priority);
 		break;
 	case ALLOC_CHUNK:
 		trans = btrfs_join_transaction(root);
@@ -4877,7 +4893,7 @@ static int flush_space(struct btrfs_root *root,
 			ret = 0;
 		break;
 	case COMMIT_TRANS:
-		ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+		ret = may_commit_transaction(root, space_info, num_bytes, 0);
 		break;
 	default:
 		ret = -ENOSPC;
@@ -4885,7 +4901,7 @@ static int flush_space(struct btrfs_root *root,
 	}
 
 	trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
-				orig_bytes, state, ret);
+				state, ret);
 	return ret;
 }
 
@@ -5008,8 +5024,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 	struct btrfs_space_info *space_info;
 	u64 to_reclaim;
 	int flush_state;
-	int commit_cycles = 0;
 	u64 last_tickets_id;
+	int reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY;
 
 	fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
 	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
@@ -5030,8 +5046,11 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 		struct reserve_ticket *ticket;
 		int ret;
 
+		if (flush_state > COMMIT_TRANS)
+			flush_state = FLUSH_DELAYED_ITEMS_NR;
 		ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
-			    to_reclaim, flush_state);
+				  flush_state, reclaim_priority);
+
 		spin_lock(&space_info->lock);
 		if (!ret)
 			try_to_wake_tickets(fs_info->fs_root, space_info);
@@ -5049,21 +5068,15 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 		} else {
 			last_tickets_id = space_info->tickets_id;
 			flush_state = FLUSH_DELAYED_ITEMS_NR;
-			if (commit_cycles)
-				commit_cycles--;
+			reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY;
 		}
 
-		if (flush_state > COMMIT_TRANS) {
-			commit_cycles++;
-			if (commit_cycles > 2) {
-				wake_all_tickets(&space_info->tickets);
-				space_info->flush = 0;
-			} else {
-				flush_state = FLUSH_DELAYED_ITEMS_NR;
-			}
+		if (flush_state > COMMIT_TRANS && reclaim_priority == 0) {
+			wake_all_tickets(&space_info->tickets);
+			space_info->flush = 0;
 		}
 		spin_unlock(&space_info->lock);
-	} while (flush_state <= COMMIT_TRANS);
+	} while ((flush_state <= COMMIT_TRANS) || (--reclaim_priority >= 0));
 }
 
 void btrfs_init_async_reclaim_work(struct work_struct *work)
@@ -5089,7 +5102,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
 
 	do {
 		flush_space(fs_info->fs_root, space_info, to_reclaim,
-			    to_reclaim, flush_state);
+			    flush_state, 1);
 		flush_state++;
 		spin_lock(&space_info->lock);
 		if (ticket->bytes == 0) {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index e030d6f..7d953a6 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -857,15 +857,14 @@ TRACE_EVENT(btrfs_trigger_flush,
 TRACE_EVENT(btrfs_flush_space,
 
 	TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
-		 u64 orig_bytes, int state, int ret),
+		 int state, int ret),
 
-	TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret),
+	TP_ARGS(fs_info, flags, num_bytes, state, ret),
 
 	TP_STRUCT__entry(
 		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
 		__field(	u64,	flags			)
 		__field(	u64,	num_bytes		)
-		__field(	u64,	orig_bytes		)
 		__field(	int,	state			)
 		__field(	int,	ret			)
 	),
@@ -874,19 +873,17 @@ TRACE_EVENT(btrfs_flush_space,
 		memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
 		__entry->flags		=	flags;
 		__entry->num_bytes	=	num_bytes;
-		__entry->orig_bytes	=	orig_bytes;
 		__entry->state		=	state;
 		__entry->ret		=	ret;
 	),
 
 	TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
-		  "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
+		  "ret = %d", __entry->fsid, __entry->state,
 		  show_flush_state(__entry->state),
 		  (unsigned long long)__entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS),
-		  (unsigned long long)__entry->num_bytes,
-		  (unsigned long long)__entry->orig_bytes, __entry->ret)
+		  (unsigned long long)__entry->num_bytes, __entry->ret)
 );
 
 DECLARE_EVENT_CLASS(btrfs__reserved_extent,
-- 
2.7.4




^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] btrfs: introduce priority based delalloc shrink mechanism
  2016-10-13  9:31     ` [PATCH v2] " Wang Xiaoguang
@ 2016-10-24 15:43       ` David Sterba
  2016-11-22 19:35         ` Chris Mason
  0 siblings, 1 reply; 8+ messages in thread
From: David Sterba @ 2016-10-24 15:43 UTC (permalink / raw)
  To: jbacik; +Cc: Wang Xiaoguang, linux-btrfs

Hi Josef,

are you fine with V2?

On Thu, Oct 13, 2016 at 05:31:25PM +0800, Wang Xiaoguang wrote:
> Since commit b02441999efcc6152b87cd58e7970bb7843f76cf, we don't wait all
> ordered extents, but I run into some enospc errors when doing large file
> create and delete tests, it's because shrink_delalloc() does not write
> enough delalloc bytes and wait them finished:
>     From: Miao Xie <miaox@cn.fujitsu.com>
>     Date: Mon, 4 Nov 2013 23:13:25 +0800
>     Subject: [PATCH] Btrfs: don't wait for the completion of all the ordered extents
> 
>     It is very likely that there are lots of ordered extents in the filesytem,
>     if we wait for the completion of all of them when we want to reclaim some
>     space for the metadata space reservation, we would be blocked for a long
>     time. The performance would drop down suddenly for a long time.
> 
> Here we introduce a simple reclaim_priority variable, the lower the
> value, the higher the priority, 0 is the maximum priority. The core
> idea is:
>     delalloc_bytes = percpu_counter_sum_positive(&root->fs_info->delalloc_bytes);
>     if (reclaim_priority)
>         to_reclaim = orig * (2 << (BTRFS_DEFAULT_FLUSH_PRIORITY - reclaim_priority));
>     else
>         to_reclaim = delalloc_bytes;
> 
> Here 'orig' is the number of metadata we want to reserve, and as the priority
> increases, we will try wo write more delalloc bytes, meanwhile if
> "reclaim_priority == 0" returns true, we'll also wait all current ordered
> extents to finish.
> 
> Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
> ---
>  fs/btrfs/extent-tree.c       | 63 ++++++++++++++++++++++++++------------------
>  include/trace/events/btrfs.h | 11 +++-----
>  2 files changed, 42 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index e08791d..7477c25 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4686,16 +4686,18 @@ static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
>  }
>  
>  #define EXTENT_SIZE_PER_ITEM	SZ_256K
> +#define	BTRFS_DEFAULT_FLUSH_PRIORITY	3
>  
>  /*
>   * shrink metadata reservation for delalloc
>   */
> -static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
> -			    bool wait_ordered)
> +static void shrink_delalloc(struct btrfs_root *root, u64 orig,
> +			    bool wait_ordered, int reclaim_priority)
>  {
>  	struct btrfs_block_rsv *block_rsv;
>  	struct btrfs_space_info *space_info;
>  	struct btrfs_trans_handle *trans;
> +	u64 to_reclaim;
>  	u64 delalloc_bytes;
>  	u64 max_reclaim;
>  	long time_left;
> @@ -4703,22 +4705,36 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
>  	int loops;
>  	int items;
>  	enum btrfs_reserve_flush_enum flush;
> +	int items_to_wait;
> +
> +	delalloc_bytes = percpu_counter_sum_positive(
> +				&root->fs_info->delalloc_bytes);
> +	if (reclaim_priority < 0)
> +		reclaim_priority = 0;
> +
> +	if (reclaim_priority)
> +		to_reclaim = orig * (2 << (BTRFS_DEFAULT_FLUSH_PRIORITY -
> +					reclaim_priority));
> +	else
> +		to_reclaim = delalloc_bytes;
>  
>  	/* Calc the number of the pages we need flush for space reservation */
>  	items = calc_reclaim_items_nr(root, to_reclaim);
>  	to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
> +	if (reclaim_priority)
> +		items_to_wait = items;
> +	else
> +		items_to_wait = -1;
>  
>  	trans = (struct btrfs_trans_handle *)current->journal_info;
>  	block_rsv = &root->fs_info->delalloc_block_rsv;
>  	space_info = block_rsv->space_info;
>  
> -	delalloc_bytes = percpu_counter_sum_positive(
> -						&root->fs_info->delalloc_bytes);
>  	if (delalloc_bytes == 0) {
>  		if (trans)
>  			return;
>  		if (wait_ordered)
> -			btrfs_wait_ordered_roots(root->fs_info, items,
> +			btrfs_wait_ordered_roots(root->fs_info, items_to_wait,
>  						 0, (u64)-1);
>  		return;
>  	}
> @@ -4763,7 +4779,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
>  
>  		loops++;
>  		if (wait_ordered && !trans) {
> -			btrfs_wait_ordered_roots(root->fs_info, items,
> +			btrfs_wait_ordered_roots(root->fs_info, items_to_wait,
>  						 0, (u64)-1);
>  		} else {
>  			time_left = schedule_timeout_killable(1);
> @@ -4836,7 +4852,7 @@ struct reserve_ticket {
>  
>  static int flush_space(struct btrfs_root *root,
>  		       struct btrfs_space_info *space_info, u64 num_bytes,
> -		       u64 orig_bytes, int state)
> +		       int state, int reclaim_priority)
>  {
>  	struct btrfs_trans_handle *trans;
>  	int nr;
> @@ -4860,8 +4876,8 @@ static int flush_space(struct btrfs_root *root,
>  		break;
>  	case FLUSH_DELALLOC:
>  	case FLUSH_DELALLOC_WAIT:
> -		shrink_delalloc(root, num_bytes * 2, orig_bytes,
> -				state == FLUSH_DELALLOC_WAIT);
> +		shrink_delalloc(root, num_bytes, state == FLUSH_DELALLOC_WAIT,
> +				reclaim_priority);
>  		break;
>  	case ALLOC_CHUNK:
>  		trans = btrfs_join_transaction(root);
> @@ -4877,7 +4893,7 @@ static int flush_space(struct btrfs_root *root,
>  			ret = 0;
>  		break;
>  	case COMMIT_TRANS:
> -		ret = may_commit_transaction(root, space_info, orig_bytes, 0);
> +		ret = may_commit_transaction(root, space_info, num_bytes, 0);
>  		break;
>  	default:
>  		ret = -ENOSPC;
> @@ -4885,7 +4901,7 @@ static int flush_space(struct btrfs_root *root,
>  	}
>  
>  	trace_btrfs_flush_space(root->fs_info, space_info->flags, num_bytes,
> -				orig_bytes, state, ret);
> +				state, ret);
>  	return ret;
>  }
>  
> @@ -5008,8 +5024,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
>  	struct btrfs_space_info *space_info;
>  	u64 to_reclaim;
>  	int flush_state;
> -	int commit_cycles = 0;
>  	u64 last_tickets_id;
> +	int reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY;
>  
>  	fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
>  	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
> @@ -5030,8 +5046,11 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
>  		struct reserve_ticket *ticket;
>  		int ret;
>  
> +		if (flush_state > COMMIT_TRANS)
> +			flush_state = FLUSH_DELAYED_ITEMS_NR;
>  		ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
> -			    to_reclaim, flush_state);
> +				  flush_state, reclaim_priority);
> +
>  		spin_lock(&space_info->lock);
>  		if (!ret)
>  			try_to_wake_tickets(fs_info->fs_root, space_info);
> @@ -5049,21 +5068,15 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
>  		} else {
>  			last_tickets_id = space_info->tickets_id;
>  			flush_state = FLUSH_DELAYED_ITEMS_NR;
> -			if (commit_cycles)
> -				commit_cycles--;
> +			reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY;
>  		}
>  
> -		if (flush_state > COMMIT_TRANS) {
> -			commit_cycles++;
> -			if (commit_cycles > 2) {
> -				wake_all_tickets(&space_info->tickets);
> -				space_info->flush = 0;
> -			} else {
> -				flush_state = FLUSH_DELAYED_ITEMS_NR;
> -			}
> +		if (flush_state > COMMIT_TRANS && reclaim_priority == 0) {
> +			wake_all_tickets(&space_info->tickets);
> +			space_info->flush = 0;
>  		}
>  		spin_unlock(&space_info->lock);
> -	} while (flush_state <= COMMIT_TRANS);
> +	} while ((flush_state <= COMMIT_TRANS) || (--reclaim_priority >= 0));
>  }
>  
>  void btrfs_init_async_reclaim_work(struct work_struct *work)
> @@ -5089,7 +5102,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
>  
>  	do {
>  		flush_space(fs_info->fs_root, space_info, to_reclaim,
> -			    to_reclaim, flush_state);
> +			    flush_state, 1);
>  		flush_state++;
>  		spin_lock(&space_info->lock);
>  		if (ticket->bytes == 0) {
> diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
> index e030d6f..7d953a6 100644
> --- a/include/trace/events/btrfs.h
> +++ b/include/trace/events/btrfs.h
> @@ -857,15 +857,14 @@ TRACE_EVENT(btrfs_trigger_flush,
>  TRACE_EVENT(btrfs_flush_space,
>  
>  	TP_PROTO(struct btrfs_fs_info *fs_info, u64 flags, u64 num_bytes,
> -		 u64 orig_bytes, int state, int ret),
> +		 int state, int ret),
>  
> -	TP_ARGS(fs_info, flags, num_bytes, orig_bytes, state, ret),
> +	TP_ARGS(fs_info, flags, num_bytes, state, ret),
>  
>  	TP_STRUCT__entry(
>  		__array(	u8,	fsid,	BTRFS_UUID_SIZE	)
>  		__field(	u64,	flags			)
>  		__field(	u64,	num_bytes		)
> -		__field(	u64,	orig_bytes		)
>  		__field(	int,	state			)
>  		__field(	int,	ret			)
>  	),
> @@ -874,19 +873,17 @@ TRACE_EVENT(btrfs_flush_space,
>  		memcpy(__entry->fsid, fs_info->fsid, BTRFS_UUID_SIZE);
>  		__entry->flags		=	flags;
>  		__entry->num_bytes	=	num_bytes;
> -		__entry->orig_bytes	=	orig_bytes;
>  		__entry->state		=	state;
>  		__entry->ret		=	ret;
>  	),
>  
>  	TP_printk("%pU: state = %d(%s), flags = %llu(%s), num_bytes = %llu, "
> -		  "orig_bytes = %llu, ret = %d", __entry->fsid, __entry->state,
> +		  "ret = %d", __entry->fsid, __entry->state,
>  		  show_flush_state(__entry->state),
>  		  (unsigned long long)__entry->flags,
>  		  __print_flags((unsigned long)__entry->flags, "|",
>  				BTRFS_GROUP_FLAGS),
> -		  (unsigned long long)__entry->num_bytes,
> -		  (unsigned long long)__entry->orig_bytes, __entry->ret)
> +		  (unsigned long long)__entry->num_bytes, __entry->ret)
>  );
>  
>  DECLARE_EVENT_CLASS(btrfs__reserved_extent,
> -- 
> 2.7.4
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] btrfs: introduce priority based delalloc shrink mechanism
  2016-10-24 15:43       ` David Sterba
@ 2016-11-22 19:35         ` Chris Mason
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Mason @ 2016-11-22 19:35 UTC (permalink / raw)
  To: dsterba, jbacik, Wang Xiaoguang, linux-btrfs

On 10/24/2016 11:43 AM, David Sterba wrote:
> Hi Josef,
>
> are you fine with V2?
>
> On Thu, Oct 13, 2016 at 05:31:25PM +0800, Wang Xiaoguang wrote:
>> Since commit b02441999efcc6152b87cd58e7970bb7843f76cf, we don't wait all
>> ordered extents, but I run into some enospc errors when doing large file
>> create and delete tests, it's because shrink_delalloc() does not write
>> enough delalloc bytes and wait them finished:
>>     From: Miao Xie <miaox@cn.fujitsu.com>
>>     Date: Mon, 4 Nov 2013 23:13:25 +0800
>>     Subject: [PATCH] Btrfs: don't wait for the completion of all the ordered extents
>>
>>     It is very likely that there are lots of ordered extents in the filesytem,
>>     if we wait for the completion of all of them when we want to reclaim some
>>     space for the metadata space reservation, we would be blocked for a long
>>     time. The performance would drop down suddenly for a long time.
>>
>> Here we introduce a simple reclaim_priority variable, the lower the
>> value, the higher the priority, 0 is the maximum priority. The core
>> idea is:
>>     delalloc_bytes = percpu_counter_sum_positive(&root->fs_info->delalloc_bytes);
>>     if (reclaim_priority)
>>         to_reclaim = orig * (2 << (BTRFS_DEFAULT_FLUSH_PRIORITY - reclaim_priority));
>>     else
>>         to_reclaim = delalloc_bytes;
>>
>> Here 'orig' is the number of metadata we want to reserve, and as the priority
>> increases, we will try wo write more delalloc bytes, meanwhile if
>> "reclaim_priority == 0" returns true, we'll also wait all current ordered
>> extents to finish.
>>
>> Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
>> ---
>>  fs/btrfs/extent-tree.c       | 63 ++++++++++++++++++++++++++------------------
>>  include/trace/events/btrfs.h | 11 +++-----
>>  2 files changed, 42 insertions(+), 32 deletions(-)
>>

[ ... ]

>> @@ -5008,8 +5024,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
>>  	struct btrfs_space_info *space_info;
>>  	u64 to_reclaim;
>>  	int flush_state;
>> -	int commit_cycles = 0;
>>  	u64 last_tickets_id;
>> +	int reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY;
>>
>>  	fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
>>  	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
>> @@ -5030,8 +5046,11 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
>>  		struct reserve_ticket *ticket;
>>  		int ret;
>>
>> +		if (flush_state > COMMIT_TRANS)
>> +			flush_state = FLUSH_DELAYED_ITEMS_NR;
>>  		ret = flush_space(fs_info->fs_root, space_info, to_reclaim,
>> -			    to_reclaim, flush_state);
>> +				  flush_state, reclaim_priority);
>> +
>>  		spin_lock(&space_info->lock);
>>  		if (!ret)
>>  			try_to_wake_tickets(fs_info->fs_root, space_info);
>> @@ -5049,21 +5068,15 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
>>  		} else {
>>  			last_tickets_id = space_info->tickets_id;
>>  			flush_state = FLUSH_DELAYED_ITEMS_NR;
>> -			if (commit_cycles)
>> -				commit_cycles--;
>> +			reclaim_priority = BTRFS_DEFAULT_FLUSH_PRIORITY;
>>  		}
>>
>> -		if (flush_state > COMMIT_TRANS) {
>> -			commit_cycles++;
>> -			if (commit_cycles > 2) {
>> -				wake_all_tickets(&space_info->tickets);
>> -				space_info->flush = 0;
>> -			} else {
>> -				flush_state = FLUSH_DELAYED_ITEMS_NR;
>> -			}
>> +		if (flush_state > COMMIT_TRANS && reclaim_priority == 0) {
>> +			wake_all_tickets(&space_info->tickets);
>> +			space_info->flush = 0;
>>  		}
>>  		spin_unlock(&space_info->lock);
>> -	} while (flush_state <= COMMIT_TRANS);
>> +	} while ((flush_state <= COMMIT_TRANS) || (--reclaim_priority >= 0));
>>  }

I really like the idea behind this patch, but it's very difficult to 
follow what is going on with flush_state and reclaim_priority in this 
loop.  It needs big flashing signs explaining how long the loop is going 
to continue and what the goals are for bumping or dropping each one.

-chris

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-11-22 19:35 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-12  9:03 [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism Wang Xiaoguang
2016-10-12  9:03 ` [PATCH 2/2] btrfs: try to satisfy metadata requests if wen can overcommit Wang Xiaoguang
2016-10-12 17:21   ` Josef Bacik
2016-10-12 17:20 ` [PATCH 1/2] btrfs: introduce priority based delalloc shrink mechanism Josef Bacik
2016-10-13  5:40   ` Wang Xiaoguang
2016-10-13  9:31     ` [PATCH v2] " Wang Xiaoguang
2016-10-24 15:43       ` David Sterba
2016-11-22 19:35         ` Chris Mason

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.